diff options
Diffstat (limited to 'drivers/iommu')
41 files changed, 35604 insertions, 0 deletions
diff --git a/drivers/iommu/Kconfig b/drivers/iommu/Kconfig new file mode 100644 index 000000000..1ae4e547b --- /dev/null +++ b/drivers/iommu/Kconfig @@ -0,0 +1,355 @@ +# IOMMU_API always gets selected by whoever wants it. +config IOMMU_API + bool + +menuconfig IOMMU_SUPPORT + bool "IOMMU Hardware Support" + depends on MMU + default y + ---help--- + Say Y here if you want to compile device drivers for IO Memory + Management Units into the kernel. These devices usually allow to + remap DMA requests and/or remap interrupts from other devices on the + system. + +if IOMMU_SUPPORT + +menu "Generic IOMMU Pagetable Support" + +# Selected by the actual pagetable implementations +config IOMMU_IO_PGTABLE + bool + +config IOMMU_IO_PGTABLE_LPAE + bool "ARMv7/v8 Long Descriptor Format" + select IOMMU_IO_PGTABLE + depends on ARM || ARM64 || COMPILE_TEST + help + Enable support for the ARM long descriptor pagetable format. + This allocator supports 4K/2M/1G, 16K/32M and 64K/512M page + sizes at both stage-1 and stage-2, as well as address spaces + up to 48-bits in size. + +config IOMMU_IO_PGTABLE_LPAE_SELFTEST + bool "LPAE selftests" + depends on IOMMU_IO_PGTABLE_LPAE + help + Enable self-tests for LPAE page table allocator. This performs + a series of page-table consistency checks during boot. + + If unsure, say N here. + +endmenu + +config IOMMU_IOVA + bool + +config OF_IOMMU + def_bool y + depends on OF && IOMMU_API + +config FSL_PAMU + bool "Freescale IOMMU support" + depends on PPC32 + depends on PPC_E500MC || COMPILE_TEST + select IOMMU_API + select GENERIC_ALLOCATOR + help + Freescale PAMU support. PAMU is the IOMMU present on Freescale QorIQ platforms. + PAMU can authorize memory access, remap the memory address, and remap I/O + transaction types. + +# MSM IOMMU support +config MSM_IOMMU + bool "MSM IOMMU Support" + depends on ARM + depends on ARCH_MSM8X60 || ARCH_MSM8960 || COMPILE_TEST + depends on BROKEN + select IOMMU_API + help + Support for the IOMMUs found on certain Qualcomm SOCs. + These IOMMUs allow virtualization of the address space used by most + cores within the multimedia subsystem. + + If unsure, say N here. + +config IOMMU_PGTABLES_L2 + def_bool y + depends on MSM_IOMMU && MMU && SMP && CPU_DCACHE_DISABLE=n + +# AMD IOMMU support +config AMD_IOMMU + bool "AMD IOMMU support" + select SWIOTLB + select PCI_MSI + select PCI_ATS + select PCI_PRI + select PCI_PASID + select IOMMU_API + depends on X86_64 && PCI && ACPI + ---help--- + With this option you can enable support for AMD IOMMU hardware in + your system. An IOMMU is a hardware component which provides + remapping of DMA memory accesses from devices. With an AMD IOMMU you + can isolate the DMA memory of different devices and protect the + system from misbehaving device drivers or hardware. + + You can find out if your system has an AMD IOMMU if you look into + your BIOS for an option to enable it or if you have an IVRS ACPI + table. + +config AMD_IOMMU_STATS + bool "Export AMD IOMMU statistics to debugfs" + depends on AMD_IOMMU + select DEBUG_FS + ---help--- + This option enables code in the AMD IOMMU driver to collect various + statistics about whats happening in the driver and exports that + information to userspace via debugfs. + If unsure, say N. + +config AMD_IOMMU_V2 + tristate "AMD IOMMU Version 2 driver" + depends on AMD_IOMMU + select MMU_NOTIFIER + ---help--- + This option enables support for the AMD IOMMUv2 features of the IOMMU + hardware. Select this option if you want to use devices that support + the PCI PRI and PASID interface. + +# Intel IOMMU support +config DMAR_TABLE + bool + +config INTEL_IOMMU + bool "Support for Intel IOMMU using DMA Remapping Devices" + depends on PCI_MSI && ACPI && (X86 || IA64_GENERIC) + select IOMMU_API + select IOMMU_IOVA + select DMAR_TABLE + help + DMA remapping (DMAR) devices support enables independent address + translations for Direct Memory Access (DMA) from devices. + These DMA remapping devices are reported via ACPI tables + and include PCI device scope covered by these DMA + remapping devices. + +config INTEL_IOMMU_DEFAULT_ON + def_bool y + prompt "Enable Intel DMA Remapping Devices by default" + depends on INTEL_IOMMU + help + Selecting this option will enable a DMAR device at boot time if + one is found. If this option is not selected, DMAR support can + be enabled by passing intel_iommu=on to the kernel. + +config INTEL_IOMMU_BROKEN_GFX_WA + bool "Workaround broken graphics drivers (going away soon)" + depends on INTEL_IOMMU && BROKEN && X86 + ---help--- + Current Graphics drivers tend to use physical address + for DMA and avoid using DMA APIs. Setting this config + option permits the IOMMU driver to set a unity map for + all the OS-visible memory. Hence the driver can continue + to use physical addresses for DMA, at least until this + option is removed in the 2.6.32 kernel. + +config INTEL_IOMMU_FLOPPY_WA + def_bool y + depends on INTEL_IOMMU && X86 + ---help--- + Floppy disk drivers are known to bypass DMA API calls + thereby failing to work when IOMMU is enabled. This + workaround will setup a 1:1 mapping for the first + 16MiB to make floppy (an ISA device) work. + +config IRQ_REMAP + bool "Support for Interrupt Remapping" + depends on X86_64 && X86_IO_APIC && PCI_MSI && ACPI + select DMAR_TABLE + ---help--- + Supports Interrupt remapping for IO-APIC and MSI devices. + To use x2apic mode in the CPU's which support x2APIC enhancements or + to support platforms with CPU's having > 8 bit APIC ID, say Y. + +# OMAP IOMMU support +config OMAP_IOMMU + bool "OMAP IOMMU Support" + depends on ARM && MMU + depends on ARCH_OMAP2PLUS || COMPILE_TEST + select IOMMU_API + +config OMAP_IOMMU_DEBUG + bool "Export OMAP IOMMU internals in DebugFS" + depends on OMAP_IOMMU && DEBUG_FS + ---help--- + Select this to see extensive information about + the internal state of OMAP IOMMU in debugfs. + + Say N unless you know you need this. + +config ROCKCHIP_IOMMU + bool "Rockchip IOMMU Support" + depends on ARM + depends on ARCH_ROCKCHIP || COMPILE_TEST + select IOMMU_API + select ARM_DMA_USE_IOMMU + help + Support for IOMMUs found on Rockchip rk32xx SOCs. + These IOMMUs allow virtualization of the address space used by most + cores within the multimedia subsystem. + Say Y here if you are using a Rockchip SoC that includes an IOMMU + device. + +config TEGRA_IOMMU_GART + bool "Tegra GART IOMMU Support" + depends on ARCH_TEGRA_2x_SOC + select IOMMU_API + help + Enables support for remapping discontiguous physical memory + shared with the operating system into contiguous I/O virtual + space through the GART (Graphics Address Relocation Table) + hardware included on Tegra SoCs. + +config TEGRA_IOMMU_SMMU + bool "NVIDIA Tegra SMMU Support" + depends on ARCH_TEGRA + depends on TEGRA_AHB + depends on TEGRA_MC + select IOMMU_API + help + This driver supports the IOMMU hardware (SMMU) found on NVIDIA Tegra + SoCs (Tegra30 up to Tegra124). + +config EXYNOS_IOMMU + bool "Exynos IOMMU Support" + depends on ARCH_EXYNOS && ARM && MMU + select IOMMU_API + select ARM_DMA_USE_IOMMU + help + Support for the IOMMU (System MMU) of Samsung Exynos application + processor family. This enables H/W multimedia accelerators to see + non-linear physical memory chunks as linear memory in their + address space. + + If unsure, say N here. + +config EXYNOS_IOMMU_DEBUG + bool "Debugging log for Exynos IOMMU" + depends on EXYNOS_IOMMU + help + Select this to see the detailed log message that shows what + happens in the IOMMU driver. + + Say N unless you need kernel log message for IOMMU debugging. + +config SHMOBILE_IPMMU + bool + +config SHMOBILE_IPMMU_TLB + bool + +config SHMOBILE_IOMMU + bool "IOMMU for Renesas IPMMU/IPMMUI" + default n + depends on ARM && MMU + depends on ARCH_SHMOBILE || COMPILE_TEST + select IOMMU_API + select ARM_DMA_USE_IOMMU + select SHMOBILE_IPMMU + select SHMOBILE_IPMMU_TLB + help + Support for Renesas IPMMU/IPMMUI. This option enables + remapping of DMA memory accesses from all of the IP blocks + on the ICB. + + Warning: Drivers (including userspace drivers of UIO + devices) of the IP blocks on the ICB *must* use addresses + allocated from the IPMMU (iova) for DMA with this option + enabled. + + If unsure, say N. + +choice + prompt "IPMMU/IPMMUI address space size" + default SHMOBILE_IOMMU_ADDRSIZE_2048MB + depends on SHMOBILE_IOMMU + help + This option sets IPMMU/IPMMUI address space size by + adjusting the 1st level page table size. The page table size + is calculated as follows: + + page table size = number of page table entries * 4 bytes + number of page table entries = address space size / 1 MiB + + For example, when the address space size is 2048 MiB, the + 1st level page table size is 8192 bytes. + + config SHMOBILE_IOMMU_ADDRSIZE_2048MB + bool "2 GiB" + + config SHMOBILE_IOMMU_ADDRSIZE_1024MB + bool "1 GiB" + + config SHMOBILE_IOMMU_ADDRSIZE_512MB + bool "512 MiB" + + config SHMOBILE_IOMMU_ADDRSIZE_256MB + bool "256 MiB" + + config SHMOBILE_IOMMU_ADDRSIZE_128MB + bool "128 MiB" + + config SHMOBILE_IOMMU_ADDRSIZE_64MB + bool "64 MiB" + + config SHMOBILE_IOMMU_ADDRSIZE_32MB + bool "32 MiB" + +endchoice + +config SHMOBILE_IOMMU_L1SIZE + int + default 8192 if SHMOBILE_IOMMU_ADDRSIZE_2048MB + default 4096 if SHMOBILE_IOMMU_ADDRSIZE_1024MB + default 2048 if SHMOBILE_IOMMU_ADDRSIZE_512MB + default 1024 if SHMOBILE_IOMMU_ADDRSIZE_256MB + default 512 if SHMOBILE_IOMMU_ADDRSIZE_128MB + default 256 if SHMOBILE_IOMMU_ADDRSIZE_64MB + default 128 if SHMOBILE_IOMMU_ADDRSIZE_32MB + +config IPMMU_VMSA + bool "Renesas VMSA-compatible IPMMU" + depends on ARM_LPAE + depends on ARCH_SHMOBILE || COMPILE_TEST + select IOMMU_API + select IOMMU_IO_PGTABLE_LPAE + select ARM_DMA_USE_IOMMU + help + Support for the Renesas VMSA-compatible IPMMU Renesas found in the + R-Mobile APE6 and R-Car H2/M2 SoCs. + + If unsure, say N. + +config SPAPR_TCE_IOMMU + bool "sPAPR TCE IOMMU Support" + depends on PPC_POWERNV || PPC_PSERIES + select IOMMU_API + help + Enables bits of IOMMU API required by VFIO. The iommu_ops + is not implemented as it is not necessary for VFIO. + +config ARM_SMMU + bool "ARM Ltd. System MMU (SMMU) Support" + depends on (ARM64 || ARM) && MMU + select IOMMU_API + select IOMMU_IO_PGTABLE_LPAE + select ARM_DMA_USE_IOMMU if ARM + help + Support for implementations of the ARM System MMU architecture + versions 1 and 2. + + Say Y here if your SoC includes an IOMMU device implementing + the ARM SMMU architecture. + +endif # IOMMU_SUPPORT diff --git a/drivers/iommu/Makefile b/drivers/iommu/Makefile new file mode 100644 index 000000000..080ffab4e --- /dev/null +++ b/drivers/iommu/Makefile @@ -0,0 +1,24 @@ +obj-$(CONFIG_IOMMU_API) += iommu.o +obj-$(CONFIG_IOMMU_API) += iommu-traces.o +obj-$(CONFIG_IOMMU_API) += iommu-sysfs.o +obj-$(CONFIG_IOMMU_IO_PGTABLE) += io-pgtable.o +obj-$(CONFIG_IOMMU_IO_PGTABLE_LPAE) += io-pgtable-arm.o +obj-$(CONFIG_IOMMU_IOVA) += iova.o +obj-$(CONFIG_OF_IOMMU) += of_iommu.o +obj-$(CONFIG_MSM_IOMMU) += msm_iommu.o msm_iommu_dev.o +obj-$(CONFIG_AMD_IOMMU) += amd_iommu.o amd_iommu_init.o +obj-$(CONFIG_AMD_IOMMU_V2) += amd_iommu_v2.o +obj-$(CONFIG_ARM_SMMU) += arm-smmu.o +obj-$(CONFIG_DMAR_TABLE) += dmar.o +obj-$(CONFIG_INTEL_IOMMU) += intel-iommu.o +obj-$(CONFIG_IPMMU_VMSA) += ipmmu-vmsa.o +obj-$(CONFIG_IRQ_REMAP) += intel_irq_remapping.o irq_remapping.o +obj-$(CONFIG_OMAP_IOMMU) += omap-iommu.o +obj-$(CONFIG_OMAP_IOMMU_DEBUG) += omap-iommu-debug.o +obj-$(CONFIG_ROCKCHIP_IOMMU) += rockchip-iommu.o +obj-$(CONFIG_TEGRA_IOMMU_GART) += tegra-gart.o +obj-$(CONFIG_TEGRA_IOMMU_SMMU) += tegra-smmu.o +obj-$(CONFIG_EXYNOS_IOMMU) += exynos-iommu.o +obj-$(CONFIG_SHMOBILE_IOMMU) += shmobile-iommu.o +obj-$(CONFIG_SHMOBILE_IPMMU) += shmobile-ipmmu.o +obj-$(CONFIG_FSL_PAMU) += fsl_pamu.o fsl_pamu_domain.o diff --git a/drivers/iommu/amd_iommu.c b/drivers/iommu/amd_iommu.c new file mode 100644 index 000000000..ca9f4edbb --- /dev/null +++ b/drivers/iommu/amd_iommu.c @@ -0,0 +1,4301 @@ +/* + * Copyright (C) 2007-2010 Advanced Micro Devices, Inc. + * Author: Joerg Roedel <jroedel@suse.de> + * Leo Duran <leo.duran@amd.com> + * + * This program is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 as published + * by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + */ + +#include <linux/ratelimit.h> +#include <linux/pci.h> +#include <linux/pci-ats.h> +#include <linux/bitmap.h> +#include <linux/slab.h> +#include <linux/debugfs.h> +#include <linux/scatterlist.h> +#include <linux/dma-mapping.h> +#include <linux/iommu-helper.h> +#include <linux/iommu.h> +#include <linux/delay.h> +#include <linux/amd-iommu.h> +#include <linux/notifier.h> +#include <linux/export.h> +#include <linux/irq.h> +#include <linux/msi.h> +#include <linux/dma-contiguous.h> +#include <asm/irq_remapping.h> +#include <asm/io_apic.h> +#include <asm/apic.h> +#include <asm/hw_irq.h> +#include <asm/msidef.h> +#include <asm/proto.h> +#include <asm/iommu.h> +#include <asm/gart.h> +#include <asm/dma.h> + +#include "amd_iommu_proto.h" +#include "amd_iommu_types.h" +#include "irq_remapping.h" + +#define CMD_SET_TYPE(cmd, t) ((cmd)->data[1] |= ((t) << 28)) + +#define LOOP_TIMEOUT 100000 + +/* + * This bitmap is used to advertise the page sizes our hardware support + * to the IOMMU core, which will then use this information to split + * physically contiguous memory regions it is mapping into page sizes + * that we support. + * + * 512GB Pages are not supported due to a hardware bug + */ +#define AMD_IOMMU_PGSIZES ((~0xFFFUL) & ~(2ULL << 38)) + +static DEFINE_RWLOCK(amd_iommu_devtable_lock); + +/* A list of preallocated protection domains */ +static LIST_HEAD(iommu_pd_list); +static DEFINE_SPINLOCK(iommu_pd_list_lock); + +/* List of all available dev_data structures */ +static LIST_HEAD(dev_data_list); +static DEFINE_SPINLOCK(dev_data_list_lock); + +LIST_HEAD(ioapic_map); +LIST_HEAD(hpet_map); + +/* + * Domain for untranslated devices - only allocated + * if iommu=pt passed on kernel cmd line. + */ +static struct protection_domain *pt_domain; + +static const struct iommu_ops amd_iommu_ops; + +static ATOMIC_NOTIFIER_HEAD(ppr_notifier); +int amd_iommu_max_glx_val = -1; + +static struct dma_map_ops amd_iommu_dma_ops; + +/* + * This struct contains device specific data for the IOMMU + */ +struct iommu_dev_data { + struct list_head list; /* For domain->dev_list */ + struct list_head dev_data_list; /* For global dev_data_list */ + struct list_head alias_list; /* Link alias-groups together */ + struct iommu_dev_data *alias_data;/* The alias dev_data */ + struct protection_domain *domain; /* Domain the device is bound to */ + u16 devid; /* PCI Device ID */ + bool iommu_v2; /* Device can make use of IOMMUv2 */ + bool passthrough; /* Default for device is pt_domain */ + struct { + bool enabled; + int qdep; + } ats; /* ATS state */ + bool pri_tlp; /* PASID TLB required for + PPR completions */ + u32 errata; /* Bitmap for errata to apply */ +}; + +/* + * general struct to manage commands send to an IOMMU + */ +struct iommu_cmd { + u32 data[4]; +}; + +struct kmem_cache *amd_iommu_irq_cache; + +static void update_domain(struct protection_domain *domain); +static int __init alloc_passthrough_domain(void); + +/**************************************************************************** + * + * Helper functions + * + ****************************************************************************/ + +static struct protection_domain *to_pdomain(struct iommu_domain *dom) +{ + return container_of(dom, struct protection_domain, domain); +} + +static struct iommu_dev_data *alloc_dev_data(u16 devid) +{ + struct iommu_dev_data *dev_data; + unsigned long flags; + + dev_data = kzalloc(sizeof(*dev_data), GFP_KERNEL); + if (!dev_data) + return NULL; + + INIT_LIST_HEAD(&dev_data->alias_list); + + dev_data->devid = devid; + + spin_lock_irqsave(&dev_data_list_lock, flags); + list_add_tail(&dev_data->dev_data_list, &dev_data_list); + spin_unlock_irqrestore(&dev_data_list_lock, flags); + + return dev_data; +} + +static void free_dev_data(struct iommu_dev_data *dev_data) +{ + unsigned long flags; + + spin_lock_irqsave(&dev_data_list_lock, flags); + list_del(&dev_data->dev_data_list); + spin_unlock_irqrestore(&dev_data_list_lock, flags); + + kfree(dev_data); +} + +static struct iommu_dev_data *search_dev_data(u16 devid) +{ + struct iommu_dev_data *dev_data; + unsigned long flags; + + spin_lock_irqsave(&dev_data_list_lock, flags); + list_for_each_entry(dev_data, &dev_data_list, dev_data_list) { + if (dev_data->devid == devid) + goto out_unlock; + } + + dev_data = NULL; + +out_unlock: + spin_unlock_irqrestore(&dev_data_list_lock, flags); + + return dev_data; +} + +static struct iommu_dev_data *find_dev_data(u16 devid) +{ + struct iommu_dev_data *dev_data; + + dev_data = search_dev_data(devid); + + if (dev_data == NULL) + dev_data = alloc_dev_data(devid); + + return dev_data; +} + +static inline u16 get_device_id(struct device *dev) +{ + struct pci_dev *pdev = to_pci_dev(dev); + + return PCI_DEVID(pdev->bus->number, pdev->devfn); +} + +static struct iommu_dev_data *get_dev_data(struct device *dev) +{ + return dev->archdata.iommu; +} + +static bool pci_iommuv2_capable(struct pci_dev *pdev) +{ + static const int caps[] = { + PCI_EXT_CAP_ID_ATS, + PCI_EXT_CAP_ID_PRI, + PCI_EXT_CAP_ID_PASID, + }; + int i, pos; + + for (i = 0; i < 3; ++i) { + pos = pci_find_ext_capability(pdev, caps[i]); + if (pos == 0) + return false; + } + + return true; +} + +static bool pdev_pri_erratum(struct pci_dev *pdev, u32 erratum) +{ + struct iommu_dev_data *dev_data; + + dev_data = get_dev_data(&pdev->dev); + + return dev_data->errata & (1 << erratum) ? true : false; +} + +/* + * In this function the list of preallocated protection domains is traversed to + * find the domain for a specific device + */ +static struct dma_ops_domain *find_protection_domain(u16 devid) +{ + struct dma_ops_domain *entry, *ret = NULL; + unsigned long flags; + u16 alias = amd_iommu_alias_table[devid]; + + if (list_empty(&iommu_pd_list)) + return NULL; + + spin_lock_irqsave(&iommu_pd_list_lock, flags); + + list_for_each_entry(entry, &iommu_pd_list, list) { + if (entry->target_dev == devid || + entry->target_dev == alias) { + ret = entry; + break; + } + } + + spin_unlock_irqrestore(&iommu_pd_list_lock, flags); + + return ret; +} + +/* + * This function checks if the driver got a valid device from the caller to + * avoid dereferencing invalid pointers. + */ +static bool check_device(struct device *dev) +{ + u16 devid; + + if (!dev || !dev->dma_mask) + return false; + + /* No PCI device */ + if (!dev_is_pci(dev)) + return false; + + devid = get_device_id(dev); + + /* Out of our scope? */ + if (devid > amd_iommu_last_bdf) + return false; + + if (amd_iommu_rlookup_table[devid] == NULL) + return false; + + return true; +} + +static void init_iommu_group(struct device *dev) +{ + struct iommu_group *group; + + group = iommu_group_get_for_dev(dev); + if (!IS_ERR(group)) + iommu_group_put(group); +} + +static int __last_alias(struct pci_dev *pdev, u16 alias, void *data) +{ + *(u16 *)data = alias; + return 0; +} + +static u16 get_alias(struct device *dev) +{ + struct pci_dev *pdev = to_pci_dev(dev); + u16 devid, ivrs_alias, pci_alias; + + devid = get_device_id(dev); + ivrs_alias = amd_iommu_alias_table[devid]; + pci_for_each_dma_alias(pdev, __last_alias, &pci_alias); + + if (ivrs_alias == pci_alias) + return ivrs_alias; + + /* + * DMA alias showdown + * + * The IVRS is fairly reliable in telling us about aliases, but it + * can't know about every screwy device. If we don't have an IVRS + * reported alias, use the PCI reported alias. In that case we may + * still need to initialize the rlookup and dev_table entries if the + * alias is to a non-existent device. + */ + if (ivrs_alias == devid) { + if (!amd_iommu_rlookup_table[pci_alias]) { + amd_iommu_rlookup_table[pci_alias] = + amd_iommu_rlookup_table[devid]; + memcpy(amd_iommu_dev_table[pci_alias].data, + amd_iommu_dev_table[devid].data, + sizeof(amd_iommu_dev_table[pci_alias].data)); + } + + return pci_alias; + } + + pr_info("AMD-Vi: Using IVRS reported alias %02x:%02x.%d " + "for device %s[%04x:%04x], kernel reported alias " + "%02x:%02x.%d\n", PCI_BUS_NUM(ivrs_alias), PCI_SLOT(ivrs_alias), + PCI_FUNC(ivrs_alias), dev_name(dev), pdev->vendor, pdev->device, + PCI_BUS_NUM(pci_alias), PCI_SLOT(pci_alias), + PCI_FUNC(pci_alias)); + + /* + * If we don't have a PCI DMA alias and the IVRS alias is on the same + * bus, then the IVRS table may know about a quirk that we don't. + */ + if (pci_alias == devid && + PCI_BUS_NUM(ivrs_alias) == pdev->bus->number) { + pdev->dev_flags |= PCI_DEV_FLAGS_DMA_ALIAS_DEVFN; + pdev->dma_alias_devfn = ivrs_alias & 0xff; + pr_info("AMD-Vi: Added PCI DMA alias %02x.%d for %s\n", + PCI_SLOT(ivrs_alias), PCI_FUNC(ivrs_alias), + dev_name(dev)); + } + + return ivrs_alias; +} + +static int iommu_init_device(struct device *dev) +{ + struct pci_dev *pdev = to_pci_dev(dev); + struct iommu_dev_data *dev_data; + u16 alias; + + if (dev->archdata.iommu) + return 0; + + dev_data = find_dev_data(get_device_id(dev)); + if (!dev_data) + return -ENOMEM; + + alias = get_alias(dev); + + if (alias != dev_data->devid) { + struct iommu_dev_data *alias_data; + + alias_data = find_dev_data(alias); + if (alias_data == NULL) { + pr_err("AMD-Vi: Warning: Unhandled device %s\n", + dev_name(dev)); + free_dev_data(dev_data); + return -ENOTSUPP; + } + dev_data->alias_data = alias_data; + + /* Add device to the alias_list */ + list_add(&dev_data->alias_list, &alias_data->alias_list); + } + + if (pci_iommuv2_capable(pdev)) { + struct amd_iommu *iommu; + + iommu = amd_iommu_rlookup_table[dev_data->devid]; + dev_data->iommu_v2 = iommu->is_iommu_v2; + } + + dev->archdata.iommu = dev_data; + + iommu_device_link(amd_iommu_rlookup_table[dev_data->devid]->iommu_dev, + dev); + + return 0; +} + +static void iommu_ignore_device(struct device *dev) +{ + u16 devid, alias; + + devid = get_device_id(dev); + alias = amd_iommu_alias_table[devid]; + + memset(&amd_iommu_dev_table[devid], 0, sizeof(struct dev_table_entry)); + memset(&amd_iommu_dev_table[alias], 0, sizeof(struct dev_table_entry)); + + amd_iommu_rlookup_table[devid] = NULL; + amd_iommu_rlookup_table[alias] = NULL; +} + +static void iommu_uninit_device(struct device *dev) +{ + struct iommu_dev_data *dev_data = search_dev_data(get_device_id(dev)); + + if (!dev_data) + return; + + iommu_device_unlink(amd_iommu_rlookup_table[dev_data->devid]->iommu_dev, + dev); + + iommu_group_remove_device(dev); + + /* Unlink from alias, it may change if another device is re-plugged */ + dev_data->alias_data = NULL; + + /* + * We keep dev_data around for unplugged devices and reuse it when the + * device is re-plugged - not doing so would introduce a ton of races. + */ +} + +void __init amd_iommu_uninit_devices(void) +{ + struct iommu_dev_data *dev_data, *n; + struct pci_dev *pdev = NULL; + + for_each_pci_dev(pdev) { + + if (!check_device(&pdev->dev)) + continue; + + iommu_uninit_device(&pdev->dev); + } + + /* Free all of our dev_data structures */ + list_for_each_entry_safe(dev_data, n, &dev_data_list, dev_data_list) + free_dev_data(dev_data); +} + +int __init amd_iommu_init_devices(void) +{ + struct pci_dev *pdev = NULL; + int ret = 0; + + for_each_pci_dev(pdev) { + + if (!check_device(&pdev->dev)) + continue; + + ret = iommu_init_device(&pdev->dev); + if (ret == -ENOTSUPP) + iommu_ignore_device(&pdev->dev); + else if (ret) + goto out_free; + } + + /* + * Initialize IOMMU groups only after iommu_init_device() has + * had a chance to populate any IVRS defined aliases. + */ + for_each_pci_dev(pdev) { + if (check_device(&pdev->dev)) + init_iommu_group(&pdev->dev); + } + + return 0; + +out_free: + + amd_iommu_uninit_devices(); + + return ret; +} +#ifdef CONFIG_AMD_IOMMU_STATS + +/* + * Initialization code for statistics collection + */ + +DECLARE_STATS_COUNTER(compl_wait); +DECLARE_STATS_COUNTER(cnt_map_single); +DECLARE_STATS_COUNTER(cnt_unmap_single); +DECLARE_STATS_COUNTER(cnt_map_sg); +DECLARE_STATS_COUNTER(cnt_unmap_sg); +DECLARE_STATS_COUNTER(cnt_alloc_coherent); +DECLARE_STATS_COUNTER(cnt_free_coherent); +DECLARE_STATS_COUNTER(cross_page); +DECLARE_STATS_COUNTER(domain_flush_single); +DECLARE_STATS_COUNTER(domain_flush_all); +DECLARE_STATS_COUNTER(alloced_io_mem); +DECLARE_STATS_COUNTER(total_map_requests); +DECLARE_STATS_COUNTER(complete_ppr); +DECLARE_STATS_COUNTER(invalidate_iotlb); +DECLARE_STATS_COUNTER(invalidate_iotlb_all); +DECLARE_STATS_COUNTER(pri_requests); + +static struct dentry *stats_dir; +static struct dentry *de_fflush; + +static void amd_iommu_stats_add(struct __iommu_counter *cnt) +{ + if (stats_dir == NULL) + return; + + cnt->dent = debugfs_create_u64(cnt->name, 0444, stats_dir, + &cnt->value); +} + +static void amd_iommu_stats_init(void) +{ + stats_dir = debugfs_create_dir("amd-iommu", NULL); + if (stats_dir == NULL) + return; + + de_fflush = debugfs_create_bool("fullflush", 0444, stats_dir, + &amd_iommu_unmap_flush); + + amd_iommu_stats_add(&compl_wait); + amd_iommu_stats_add(&cnt_map_single); + amd_iommu_stats_add(&cnt_unmap_single); + amd_iommu_stats_add(&cnt_map_sg); + amd_iommu_stats_add(&cnt_unmap_sg); + amd_iommu_stats_add(&cnt_alloc_coherent); + amd_iommu_stats_add(&cnt_free_coherent); + amd_iommu_stats_add(&cross_page); + amd_iommu_stats_add(&domain_flush_single); + amd_iommu_stats_add(&domain_flush_all); + amd_iommu_stats_add(&alloced_io_mem); + amd_iommu_stats_add(&total_map_requests); + amd_iommu_stats_add(&complete_ppr); + amd_iommu_stats_add(&invalidate_iotlb); + amd_iommu_stats_add(&invalidate_iotlb_all); + amd_iommu_stats_add(&pri_requests); +} + +#endif + +/**************************************************************************** + * + * Interrupt handling functions + * + ****************************************************************************/ + +static void dump_dte_entry(u16 devid) +{ + int i; + + for (i = 0; i < 4; ++i) + pr_err("AMD-Vi: DTE[%d]: %016llx\n", i, + amd_iommu_dev_table[devid].data[i]); +} + +static void dump_command(unsigned long phys_addr) +{ + struct iommu_cmd *cmd = phys_to_virt(phys_addr); + int i; + + for (i = 0; i < 4; ++i) + pr_err("AMD-Vi: CMD[%d]: %08x\n", i, cmd->data[i]); +} + +static void iommu_print_event(struct amd_iommu *iommu, void *__evt) +{ + int type, devid, domid, flags; + volatile u32 *event = __evt; + int count = 0; + u64 address; + +retry: + type = (event[1] >> EVENT_TYPE_SHIFT) & EVENT_TYPE_MASK; + devid = (event[0] >> EVENT_DEVID_SHIFT) & EVENT_DEVID_MASK; + domid = (event[1] >> EVENT_DOMID_SHIFT) & EVENT_DOMID_MASK; + flags = (event[1] >> EVENT_FLAGS_SHIFT) & EVENT_FLAGS_MASK; + address = (u64)(((u64)event[3]) << 32) | event[2]; + + if (type == 0) { + /* Did we hit the erratum? */ + if (++count == LOOP_TIMEOUT) { + pr_err("AMD-Vi: No event written to event log\n"); + return; + } + udelay(1); + goto retry; + } + + printk(KERN_ERR "AMD-Vi: Event logged ["); + + switch (type) { + case EVENT_TYPE_ILL_DEV: + printk("ILLEGAL_DEV_TABLE_ENTRY device=%02x:%02x.%x " + "address=0x%016llx flags=0x%04x]\n", + PCI_BUS_NUM(devid), PCI_SLOT(devid), PCI_FUNC(devid), + address, flags); + dump_dte_entry(devid); + break; + case EVENT_TYPE_IO_FAULT: + printk("IO_PAGE_FAULT device=%02x:%02x.%x " + "domain=0x%04x address=0x%016llx flags=0x%04x]\n", + PCI_BUS_NUM(devid), PCI_SLOT(devid), PCI_FUNC(devid), + domid, address, flags); + break; + case EVENT_TYPE_DEV_TAB_ERR: + printk("DEV_TAB_HARDWARE_ERROR device=%02x:%02x.%x " + "address=0x%016llx flags=0x%04x]\n", + PCI_BUS_NUM(devid), PCI_SLOT(devid), PCI_FUNC(devid), + address, flags); + break; + case EVENT_TYPE_PAGE_TAB_ERR: + printk("PAGE_TAB_HARDWARE_ERROR device=%02x:%02x.%x " + "domain=0x%04x address=0x%016llx flags=0x%04x]\n", + PCI_BUS_NUM(devid), PCI_SLOT(devid), PCI_FUNC(devid), + domid, address, flags); + break; + case EVENT_TYPE_ILL_CMD: + printk("ILLEGAL_COMMAND_ERROR address=0x%016llx]\n", address); + dump_command(address); + break; + case EVENT_TYPE_CMD_HARD_ERR: + printk("COMMAND_HARDWARE_ERROR address=0x%016llx " + "flags=0x%04x]\n", address, flags); + break; + case EVENT_TYPE_IOTLB_INV_TO: + printk("IOTLB_INV_TIMEOUT device=%02x:%02x.%x " + "address=0x%016llx]\n", + PCI_BUS_NUM(devid), PCI_SLOT(devid), PCI_FUNC(devid), + address); + break; + case EVENT_TYPE_INV_DEV_REQ: + printk("INVALID_DEVICE_REQUEST device=%02x:%02x.%x " + "address=0x%016llx flags=0x%04x]\n", + PCI_BUS_NUM(devid), PCI_SLOT(devid), PCI_FUNC(devid), + address, flags); + break; + default: + printk(KERN_ERR "UNKNOWN type=0x%02x]\n", type); + } + + memset(__evt, 0, 4 * sizeof(u32)); +} + +static void iommu_poll_events(struct amd_iommu *iommu) +{ + u32 head, tail; + + head = readl(iommu->mmio_base + MMIO_EVT_HEAD_OFFSET); + tail = readl(iommu->mmio_base + MMIO_EVT_TAIL_OFFSET); + + while (head != tail) { + iommu_print_event(iommu, iommu->evt_buf + head); + head = (head + EVENT_ENTRY_SIZE) % iommu->evt_buf_size; + } + + writel(head, iommu->mmio_base + MMIO_EVT_HEAD_OFFSET); +} + +static void iommu_handle_ppr_entry(struct amd_iommu *iommu, u64 *raw) +{ + struct amd_iommu_fault fault; + + INC_STATS_COUNTER(pri_requests); + + if (PPR_REQ_TYPE(raw[0]) != PPR_REQ_FAULT) { + pr_err_ratelimited("AMD-Vi: Unknown PPR request received\n"); + return; + } + + fault.address = raw[1]; + fault.pasid = PPR_PASID(raw[0]); + fault.device_id = PPR_DEVID(raw[0]); + fault.tag = PPR_TAG(raw[0]); + fault.flags = PPR_FLAGS(raw[0]); + + atomic_notifier_call_chain(&ppr_notifier, 0, &fault); +} + +static void iommu_poll_ppr_log(struct amd_iommu *iommu) +{ + u32 head, tail; + + if (iommu->ppr_log == NULL) + return; + + head = readl(iommu->mmio_base + MMIO_PPR_HEAD_OFFSET); + tail = readl(iommu->mmio_base + MMIO_PPR_TAIL_OFFSET); + + while (head != tail) { + volatile u64 *raw; + u64 entry[2]; + int i; + + raw = (u64 *)(iommu->ppr_log + head); + + /* + * Hardware bug: Interrupt may arrive before the entry is + * written to memory. If this happens we need to wait for the + * entry to arrive. + */ + for (i = 0; i < LOOP_TIMEOUT; ++i) { + if (PPR_REQ_TYPE(raw[0]) != 0) + break; + udelay(1); + } + + /* Avoid memcpy function-call overhead */ + entry[0] = raw[0]; + entry[1] = raw[1]; + + /* + * To detect the hardware bug we need to clear the entry + * back to zero. + */ + raw[0] = raw[1] = 0UL; + + /* Update head pointer of hardware ring-buffer */ + head = (head + PPR_ENTRY_SIZE) % PPR_LOG_SIZE; + writel(head, iommu->mmio_base + MMIO_PPR_HEAD_OFFSET); + + /* Handle PPR entry */ + iommu_handle_ppr_entry(iommu, entry); + + /* Refresh ring-buffer information */ + head = readl(iommu->mmio_base + MMIO_PPR_HEAD_OFFSET); + tail = readl(iommu->mmio_base + MMIO_PPR_TAIL_OFFSET); + } +} + +irqreturn_t amd_iommu_int_thread(int irq, void *data) +{ + struct amd_iommu *iommu = (struct amd_iommu *) data; + u32 status = readl(iommu->mmio_base + MMIO_STATUS_OFFSET); + + while (status & (MMIO_STATUS_EVT_INT_MASK | MMIO_STATUS_PPR_INT_MASK)) { + /* Enable EVT and PPR interrupts again */ + writel((MMIO_STATUS_EVT_INT_MASK | MMIO_STATUS_PPR_INT_MASK), + iommu->mmio_base + MMIO_STATUS_OFFSET); + + if (status & MMIO_STATUS_EVT_INT_MASK) { + pr_devel("AMD-Vi: Processing IOMMU Event Log\n"); + iommu_poll_events(iommu); + } + + if (status & MMIO_STATUS_PPR_INT_MASK) { + pr_devel("AMD-Vi: Processing IOMMU PPR Log\n"); + iommu_poll_ppr_log(iommu); + } + + /* + * Hardware bug: ERBT1312 + * When re-enabling interrupt (by writing 1 + * to clear the bit), the hardware might also try to set + * the interrupt bit in the event status register. + * In this scenario, the bit will be set, and disable + * subsequent interrupts. + * + * Workaround: The IOMMU driver should read back the + * status register and check if the interrupt bits are cleared. + * If not, driver will need to go through the interrupt handler + * again and re-clear the bits + */ + status = readl(iommu->mmio_base + MMIO_STATUS_OFFSET); + } + return IRQ_HANDLED; +} + +irqreturn_t amd_iommu_int_handler(int irq, void *data) +{ + return IRQ_WAKE_THREAD; +} + +/**************************************************************************** + * + * IOMMU command queuing functions + * + ****************************************************************************/ + +static int wait_on_sem(volatile u64 *sem) +{ + int i = 0; + + while (*sem == 0 && i < LOOP_TIMEOUT) { + udelay(1); + i += 1; + } + + if (i == LOOP_TIMEOUT) { + pr_alert("AMD-Vi: Completion-Wait loop timed out\n"); + return -EIO; + } + + return 0; +} + +static void copy_cmd_to_buffer(struct amd_iommu *iommu, + struct iommu_cmd *cmd, + u32 tail) +{ + u8 *target; + + target = iommu->cmd_buf + tail; + tail = (tail + sizeof(*cmd)) % iommu->cmd_buf_size; + + /* Copy command to buffer */ + memcpy(target, cmd, sizeof(*cmd)); + + /* Tell the IOMMU about it */ + writel(tail, iommu->mmio_base + MMIO_CMD_TAIL_OFFSET); +} + +static void build_completion_wait(struct iommu_cmd *cmd, u64 address) +{ + WARN_ON(address & 0x7ULL); + + memset(cmd, 0, sizeof(*cmd)); + cmd->data[0] = lower_32_bits(__pa(address)) | CMD_COMPL_WAIT_STORE_MASK; + cmd->data[1] = upper_32_bits(__pa(address)); + cmd->data[2] = 1; + CMD_SET_TYPE(cmd, CMD_COMPL_WAIT); +} + +static void build_inv_dte(struct iommu_cmd *cmd, u16 devid) +{ + memset(cmd, 0, sizeof(*cmd)); + cmd->data[0] = devid; + CMD_SET_TYPE(cmd, CMD_INV_DEV_ENTRY); +} + +static void build_inv_iommu_pages(struct iommu_cmd *cmd, u64 address, + size_t size, u16 domid, int pde) +{ + u64 pages; + bool s; + + pages = iommu_num_pages(address, size, PAGE_SIZE); + s = false; + + if (pages > 1) { + /* + * If we have to flush more than one page, flush all + * TLB entries for this domain + */ + address = CMD_INV_IOMMU_ALL_PAGES_ADDRESS; + s = true; + } + + address &= PAGE_MASK; + + memset(cmd, 0, sizeof(*cmd)); + cmd->data[1] |= domid; + cmd->data[2] = lower_32_bits(address); + cmd->data[3] = upper_32_bits(address); + CMD_SET_TYPE(cmd, CMD_INV_IOMMU_PAGES); + if (s) /* size bit - we flush more than one 4kb page */ + cmd->data[2] |= CMD_INV_IOMMU_PAGES_SIZE_MASK; + if (pde) /* PDE bit - we want to flush everything, not only the PTEs */ + cmd->data[2] |= CMD_INV_IOMMU_PAGES_PDE_MASK; +} + +static void build_inv_iotlb_pages(struct iommu_cmd *cmd, u16 devid, int qdep, + u64 address, size_t size) +{ + u64 pages; + bool s; + + pages = iommu_num_pages(address, size, PAGE_SIZE); + s = false; + + if (pages > 1) { + /* + * If we have to flush more than one page, flush all + * TLB entries for this domain + */ + address = CMD_INV_IOMMU_ALL_PAGES_ADDRESS; + s = true; + } + + address &= PAGE_MASK; + + memset(cmd, 0, sizeof(*cmd)); + cmd->data[0] = devid; + cmd->data[0] |= (qdep & 0xff) << 24; + cmd->data[1] = devid; + cmd->data[2] = lower_32_bits(address); + cmd->data[3] = upper_32_bits(address); + CMD_SET_TYPE(cmd, CMD_INV_IOTLB_PAGES); + if (s) + cmd->data[2] |= CMD_INV_IOMMU_PAGES_SIZE_MASK; +} + +static void build_inv_iommu_pasid(struct iommu_cmd *cmd, u16 domid, int pasid, + u64 address, bool size) +{ + memset(cmd, 0, sizeof(*cmd)); + + address &= ~(0xfffULL); + + cmd->data[0] = pasid; + cmd->data[1] = domid; + cmd->data[2] = lower_32_bits(address); + cmd->data[3] = upper_32_bits(address); + cmd->data[2] |= CMD_INV_IOMMU_PAGES_PDE_MASK; + cmd->data[2] |= CMD_INV_IOMMU_PAGES_GN_MASK; + if (size) + cmd->data[2] |= CMD_INV_IOMMU_PAGES_SIZE_MASK; + CMD_SET_TYPE(cmd, CMD_INV_IOMMU_PAGES); +} + +static void build_inv_iotlb_pasid(struct iommu_cmd *cmd, u16 devid, int pasid, + int qdep, u64 address, bool size) +{ + memset(cmd, 0, sizeof(*cmd)); + + address &= ~(0xfffULL); + + cmd->data[0] = devid; + cmd->data[0] |= ((pasid >> 8) & 0xff) << 16; + cmd->data[0] |= (qdep & 0xff) << 24; + cmd->data[1] = devid; + cmd->data[1] |= (pasid & 0xff) << 16; + cmd->data[2] = lower_32_bits(address); + cmd->data[2] |= CMD_INV_IOMMU_PAGES_GN_MASK; + cmd->data[3] = upper_32_bits(address); + if (size) + cmd->data[2] |= CMD_INV_IOMMU_PAGES_SIZE_MASK; + CMD_SET_TYPE(cmd, CMD_INV_IOTLB_PAGES); +} + +static void build_complete_ppr(struct iommu_cmd *cmd, u16 devid, int pasid, + int status, int tag, bool gn) +{ + memset(cmd, 0, sizeof(*cmd)); + + cmd->data[0] = devid; + if (gn) { + cmd->data[1] = pasid; + cmd->data[2] = CMD_INV_IOMMU_PAGES_GN_MASK; + } + cmd->data[3] = tag & 0x1ff; + cmd->data[3] |= (status & PPR_STATUS_MASK) << PPR_STATUS_SHIFT; + + CMD_SET_TYPE(cmd, CMD_COMPLETE_PPR); +} + +static void build_inv_all(struct iommu_cmd *cmd) +{ + memset(cmd, 0, sizeof(*cmd)); + CMD_SET_TYPE(cmd, CMD_INV_ALL); +} + +static void build_inv_irt(struct iommu_cmd *cmd, u16 devid) +{ + memset(cmd, 0, sizeof(*cmd)); + cmd->data[0] = devid; + CMD_SET_TYPE(cmd, CMD_INV_IRT); +} + +/* + * Writes the command to the IOMMUs command buffer and informs the + * hardware about the new command. + */ +static int iommu_queue_command_sync(struct amd_iommu *iommu, + struct iommu_cmd *cmd, + bool sync) +{ + u32 left, tail, head, next_tail; + unsigned long flags; + + WARN_ON(iommu->cmd_buf_size & CMD_BUFFER_UNINITIALIZED); + +again: + spin_lock_irqsave(&iommu->lock, flags); + + head = readl(iommu->mmio_base + MMIO_CMD_HEAD_OFFSET); + tail = readl(iommu->mmio_base + MMIO_CMD_TAIL_OFFSET); + next_tail = (tail + sizeof(*cmd)) % iommu->cmd_buf_size; + left = (head - next_tail) % iommu->cmd_buf_size; + + if (left <= 2) { + struct iommu_cmd sync_cmd; + volatile u64 sem = 0; + int ret; + + build_completion_wait(&sync_cmd, (u64)&sem); + copy_cmd_to_buffer(iommu, &sync_cmd, tail); + + spin_unlock_irqrestore(&iommu->lock, flags); + + if ((ret = wait_on_sem(&sem)) != 0) + return ret; + + goto again; + } + + copy_cmd_to_buffer(iommu, cmd, tail); + + /* We need to sync now to make sure all commands are processed */ + iommu->need_sync = sync; + + spin_unlock_irqrestore(&iommu->lock, flags); + + return 0; +} + +static int iommu_queue_command(struct amd_iommu *iommu, struct iommu_cmd *cmd) +{ + return iommu_queue_command_sync(iommu, cmd, true); +} + +/* + * This function queues a completion wait command into the command + * buffer of an IOMMU + */ +static int iommu_completion_wait(struct amd_iommu *iommu) +{ + struct iommu_cmd cmd; + volatile u64 sem = 0; + int ret; + + if (!iommu->need_sync) + return 0; + + build_completion_wait(&cmd, (u64)&sem); + + ret = iommu_queue_command_sync(iommu, &cmd, false); + if (ret) + return ret; + + return wait_on_sem(&sem); +} + +static int iommu_flush_dte(struct amd_iommu *iommu, u16 devid) +{ + struct iommu_cmd cmd; + + build_inv_dte(&cmd, devid); + + return iommu_queue_command(iommu, &cmd); +} + +static void iommu_flush_dte_all(struct amd_iommu *iommu) +{ + u32 devid; + + for (devid = 0; devid <= 0xffff; ++devid) + iommu_flush_dte(iommu, devid); + + iommu_completion_wait(iommu); +} + +/* + * This function uses heavy locking and may disable irqs for some time. But + * this is no issue because it is only called during resume. + */ +static void iommu_flush_tlb_all(struct amd_iommu *iommu) +{ + u32 dom_id; + + for (dom_id = 0; dom_id <= 0xffff; ++dom_id) { + struct iommu_cmd cmd; + build_inv_iommu_pages(&cmd, 0, CMD_INV_IOMMU_ALL_PAGES_ADDRESS, + dom_id, 1); + iommu_queue_command(iommu, &cmd); + } + + iommu_completion_wait(iommu); +} + +static void iommu_flush_all(struct amd_iommu *iommu) +{ + struct iommu_cmd cmd; + + build_inv_all(&cmd); + + iommu_queue_command(iommu, &cmd); + iommu_completion_wait(iommu); +} + +static void iommu_flush_irt(struct amd_iommu *iommu, u16 devid) +{ + struct iommu_cmd cmd; + + build_inv_irt(&cmd, devid); + + iommu_queue_command(iommu, &cmd); +} + +static void iommu_flush_irt_all(struct amd_iommu *iommu) +{ + u32 devid; + + for (devid = 0; devid <= MAX_DEV_TABLE_ENTRIES; devid++) + iommu_flush_irt(iommu, devid); + + iommu_completion_wait(iommu); +} + +void iommu_flush_all_caches(struct amd_iommu *iommu) +{ + if (iommu_feature(iommu, FEATURE_IA)) { + iommu_flush_all(iommu); + } else { + iommu_flush_dte_all(iommu); + iommu_flush_irt_all(iommu); + iommu_flush_tlb_all(iommu); + } +} + +/* + * Command send function for flushing on-device TLB + */ +static int device_flush_iotlb(struct iommu_dev_data *dev_data, + u64 address, size_t size) +{ + struct amd_iommu *iommu; + struct iommu_cmd cmd; + int qdep; + + qdep = dev_data->ats.qdep; + iommu = amd_iommu_rlookup_table[dev_data->devid]; + + build_inv_iotlb_pages(&cmd, dev_data->devid, qdep, address, size); + + return iommu_queue_command(iommu, &cmd); +} + +/* + * Command send function for invalidating a device table entry + */ +static int device_flush_dte(struct iommu_dev_data *dev_data) +{ + struct amd_iommu *iommu; + int ret; + + iommu = amd_iommu_rlookup_table[dev_data->devid]; + + ret = iommu_flush_dte(iommu, dev_data->devid); + if (ret) + return ret; + + if (dev_data->ats.enabled) + ret = device_flush_iotlb(dev_data, 0, ~0UL); + + return ret; +} + +/* + * TLB invalidation function which is called from the mapping functions. + * It invalidates a single PTE if the range to flush is within a single + * page. Otherwise it flushes the whole TLB of the IOMMU. + */ +static void __domain_flush_pages(struct protection_domain *domain, + u64 address, size_t size, int pde) +{ + struct iommu_dev_data *dev_data; + struct iommu_cmd cmd; + int ret = 0, i; + + build_inv_iommu_pages(&cmd, address, size, domain->id, pde); + + for (i = 0; i < amd_iommus_present; ++i) { + if (!domain->dev_iommu[i]) + continue; + + /* + * Devices of this domain are behind this IOMMU + * We need a TLB flush + */ + ret |= iommu_queue_command(amd_iommus[i], &cmd); + } + + list_for_each_entry(dev_data, &domain->dev_list, list) { + + if (!dev_data->ats.enabled) + continue; + + ret |= device_flush_iotlb(dev_data, address, size); + } + + WARN_ON(ret); +} + +static void domain_flush_pages(struct protection_domain *domain, + u64 address, size_t size) +{ + __domain_flush_pages(domain, address, size, 0); +} + +/* Flush the whole IO/TLB for a given protection domain */ +static void domain_flush_tlb(struct protection_domain *domain) +{ + __domain_flush_pages(domain, 0, CMD_INV_IOMMU_ALL_PAGES_ADDRESS, 0); +} + +/* Flush the whole IO/TLB for a given protection domain - including PDE */ +static void domain_flush_tlb_pde(struct protection_domain *domain) +{ + __domain_flush_pages(domain, 0, CMD_INV_IOMMU_ALL_PAGES_ADDRESS, 1); +} + +static void domain_flush_complete(struct protection_domain *domain) +{ + int i; + + for (i = 0; i < amd_iommus_present; ++i) { + if (!domain->dev_iommu[i]) + continue; + + /* + * Devices of this domain are behind this IOMMU + * We need to wait for completion of all commands. + */ + iommu_completion_wait(amd_iommus[i]); + } +} + + +/* + * This function flushes the DTEs for all devices in domain + */ +static void domain_flush_devices(struct protection_domain *domain) +{ + struct iommu_dev_data *dev_data; + + list_for_each_entry(dev_data, &domain->dev_list, list) + device_flush_dte(dev_data); +} + +/**************************************************************************** + * + * The functions below are used the create the page table mappings for + * unity mapped regions. + * + ****************************************************************************/ + +/* + * This function is used to add another level to an IO page table. Adding + * another level increases the size of the address space by 9 bits to a size up + * to 64 bits. + */ +static bool increase_address_space(struct protection_domain *domain, + gfp_t gfp) +{ + u64 *pte; + + if (domain->mode == PAGE_MODE_6_LEVEL) + /* address space already 64 bit large */ + return false; + + pte = (void *)get_zeroed_page(gfp); + if (!pte) + return false; + + *pte = PM_LEVEL_PDE(domain->mode, + virt_to_phys(domain->pt_root)); + domain->pt_root = pte; + domain->mode += 1; + domain->updated = true; + + return true; +} + +static u64 *alloc_pte(struct protection_domain *domain, + unsigned long address, + unsigned long page_size, + u64 **pte_page, + gfp_t gfp) +{ + int level, end_lvl; + u64 *pte, *page; + + BUG_ON(!is_power_of_2(page_size)); + + while (address > PM_LEVEL_SIZE(domain->mode)) + increase_address_space(domain, gfp); + + level = domain->mode - 1; + pte = &domain->pt_root[PM_LEVEL_INDEX(level, address)]; + address = PAGE_SIZE_ALIGN(address, page_size); + end_lvl = PAGE_SIZE_LEVEL(page_size); + + while (level > end_lvl) { + if (!IOMMU_PTE_PRESENT(*pte)) { + page = (u64 *)get_zeroed_page(gfp); + if (!page) + return NULL; + *pte = PM_LEVEL_PDE(level, virt_to_phys(page)); + } + + /* No level skipping support yet */ + if (PM_PTE_LEVEL(*pte) != level) + return NULL; + + level -= 1; + + pte = IOMMU_PTE_PAGE(*pte); + + if (pte_page && level == end_lvl) + *pte_page = pte; + + pte = &pte[PM_LEVEL_INDEX(level, address)]; + } + + return pte; +} + +/* + * This function checks if there is a PTE for a given dma address. If + * there is one, it returns the pointer to it. + */ +static u64 *fetch_pte(struct protection_domain *domain, + unsigned long address, + unsigned long *page_size) +{ + int level; + u64 *pte; + + if (address > PM_LEVEL_SIZE(domain->mode)) + return NULL; + + level = domain->mode - 1; + pte = &domain->pt_root[PM_LEVEL_INDEX(level, address)]; + *page_size = PTE_LEVEL_PAGE_SIZE(level); + + while (level > 0) { + + /* Not Present */ + if (!IOMMU_PTE_PRESENT(*pte)) + return NULL; + + /* Large PTE */ + if (PM_PTE_LEVEL(*pte) == 7 || + PM_PTE_LEVEL(*pte) == 0) + break; + + /* No level skipping support yet */ + if (PM_PTE_LEVEL(*pte) != level) + return NULL; + + level -= 1; + + /* Walk to the next level */ + pte = IOMMU_PTE_PAGE(*pte); + pte = &pte[PM_LEVEL_INDEX(level, address)]; + *page_size = PTE_LEVEL_PAGE_SIZE(level); + } + + if (PM_PTE_LEVEL(*pte) == 0x07) { + unsigned long pte_mask; + + /* + * If we have a series of large PTEs, make + * sure to return a pointer to the first one. + */ + *page_size = pte_mask = PTE_PAGE_SIZE(*pte); + pte_mask = ~((PAGE_SIZE_PTE_COUNT(pte_mask) << 3) - 1); + pte = (u64 *)(((unsigned long)pte) & pte_mask); + } + + return pte; +} + +/* + * Generic mapping functions. It maps a physical address into a DMA + * address space. It allocates the page table pages if necessary. + * In the future it can be extended to a generic mapping function + * supporting all features of AMD IOMMU page tables like level skipping + * and full 64 bit address spaces. + */ +static int iommu_map_page(struct protection_domain *dom, + unsigned long bus_addr, + unsigned long phys_addr, + int prot, + unsigned long page_size) +{ + u64 __pte, *pte; + int i, count; + + BUG_ON(!IS_ALIGNED(bus_addr, page_size)); + BUG_ON(!IS_ALIGNED(phys_addr, page_size)); + + if (!(prot & IOMMU_PROT_MASK)) + return -EINVAL; + + count = PAGE_SIZE_PTE_COUNT(page_size); + pte = alloc_pte(dom, bus_addr, page_size, NULL, GFP_KERNEL); + + if (!pte) + return -ENOMEM; + + for (i = 0; i < count; ++i) + if (IOMMU_PTE_PRESENT(pte[i])) + return -EBUSY; + + if (count > 1) { + __pte = PAGE_SIZE_PTE(phys_addr, page_size); + __pte |= PM_LEVEL_ENC(7) | IOMMU_PTE_P | IOMMU_PTE_FC; + } else + __pte = phys_addr | IOMMU_PTE_P | IOMMU_PTE_FC; + + if (prot & IOMMU_PROT_IR) + __pte |= IOMMU_PTE_IR; + if (prot & IOMMU_PROT_IW) + __pte |= IOMMU_PTE_IW; + + for (i = 0; i < count; ++i) + pte[i] = __pte; + + update_domain(dom); + + return 0; +} + +static unsigned long iommu_unmap_page(struct protection_domain *dom, + unsigned long bus_addr, + unsigned long page_size) +{ + unsigned long long unmapped; + unsigned long unmap_size; + u64 *pte; + + BUG_ON(!is_power_of_2(page_size)); + + unmapped = 0; + + while (unmapped < page_size) { + + pte = fetch_pte(dom, bus_addr, &unmap_size); + + if (pte) { + int i, count; + + count = PAGE_SIZE_PTE_COUNT(unmap_size); + for (i = 0; i < count; i++) + pte[i] = 0ULL; + } + + bus_addr = (bus_addr & ~(unmap_size - 1)) + unmap_size; + unmapped += unmap_size; + } + + BUG_ON(unmapped && !is_power_of_2(unmapped)); + + return unmapped; +} + +/* + * This function checks if a specific unity mapping entry is needed for + * this specific IOMMU. + */ +static int iommu_for_unity_map(struct amd_iommu *iommu, + struct unity_map_entry *entry) +{ + u16 bdf, i; + + for (i = entry->devid_start; i <= entry->devid_end; ++i) { + bdf = amd_iommu_alias_table[i]; + if (amd_iommu_rlookup_table[bdf] == iommu) + return 1; + } + + return 0; +} + +/* + * This function actually applies the mapping to the page table of the + * dma_ops domain. + */ +static int dma_ops_unity_map(struct dma_ops_domain *dma_dom, + struct unity_map_entry *e) +{ + u64 addr; + int ret; + + for (addr = e->address_start; addr < e->address_end; + addr += PAGE_SIZE) { + ret = iommu_map_page(&dma_dom->domain, addr, addr, e->prot, + PAGE_SIZE); + if (ret) + return ret; + /* + * if unity mapping is in aperture range mark the page + * as allocated in the aperture + */ + if (addr < dma_dom->aperture_size) + __set_bit(addr >> PAGE_SHIFT, + dma_dom->aperture[0]->bitmap); + } + + return 0; +} + +/* + * Init the unity mappings for a specific IOMMU in the system + * + * Basically iterates over all unity mapping entries and applies them to + * the default domain DMA of that IOMMU if necessary. + */ +static int iommu_init_unity_mappings(struct amd_iommu *iommu) +{ + struct unity_map_entry *entry; + int ret; + + list_for_each_entry(entry, &amd_iommu_unity_map, list) { + if (!iommu_for_unity_map(iommu, entry)) + continue; + ret = dma_ops_unity_map(iommu->default_dom, entry); + if (ret) + return ret; + } + + return 0; +} + +/* + * Inits the unity mappings required for a specific device + */ +static int init_unity_mappings_for_device(struct dma_ops_domain *dma_dom, + u16 devid) +{ + struct unity_map_entry *e; + int ret; + + list_for_each_entry(e, &amd_iommu_unity_map, list) { + if (!(devid >= e->devid_start && devid <= e->devid_end)) + continue; + ret = dma_ops_unity_map(dma_dom, e); + if (ret) + return ret; + } + + return 0; +} + +/**************************************************************************** + * + * The next functions belong to the address allocator for the dma_ops + * interface functions. They work like the allocators in the other IOMMU + * drivers. Its basically a bitmap which marks the allocated pages in + * the aperture. Maybe it could be enhanced in the future to a more + * efficient allocator. + * + ****************************************************************************/ + +/* + * The address allocator core functions. + * + * called with domain->lock held + */ + +/* + * Used to reserve address ranges in the aperture (e.g. for exclusion + * ranges. + */ +static void dma_ops_reserve_addresses(struct dma_ops_domain *dom, + unsigned long start_page, + unsigned int pages) +{ + unsigned int i, last_page = dom->aperture_size >> PAGE_SHIFT; + + if (start_page + pages > last_page) + pages = last_page - start_page; + + for (i = start_page; i < start_page + pages; ++i) { + int index = i / APERTURE_RANGE_PAGES; + int page = i % APERTURE_RANGE_PAGES; + __set_bit(page, dom->aperture[index]->bitmap); + } +} + +/* + * This function is used to add a new aperture range to an existing + * aperture in case of dma_ops domain allocation or address allocation + * failure. + */ +static int alloc_new_range(struct dma_ops_domain *dma_dom, + bool populate, gfp_t gfp) +{ + int index = dma_dom->aperture_size >> APERTURE_RANGE_SHIFT; + struct amd_iommu *iommu; + unsigned long i, old_size, pte_pgsize; + +#ifdef CONFIG_IOMMU_STRESS + populate = false; +#endif + + if (index >= APERTURE_MAX_RANGES) + return -ENOMEM; + + dma_dom->aperture[index] = kzalloc(sizeof(struct aperture_range), gfp); + if (!dma_dom->aperture[index]) + return -ENOMEM; + + dma_dom->aperture[index]->bitmap = (void *)get_zeroed_page(gfp); + if (!dma_dom->aperture[index]->bitmap) + goto out_free; + + dma_dom->aperture[index]->offset = dma_dom->aperture_size; + + if (populate) { + unsigned long address = dma_dom->aperture_size; + int i, num_ptes = APERTURE_RANGE_PAGES / 512; + u64 *pte, *pte_page; + + for (i = 0; i < num_ptes; ++i) { + pte = alloc_pte(&dma_dom->domain, address, PAGE_SIZE, + &pte_page, gfp); + if (!pte) + goto out_free; + + dma_dom->aperture[index]->pte_pages[i] = pte_page; + + address += APERTURE_RANGE_SIZE / 64; + } + } + + old_size = dma_dom->aperture_size; + dma_dom->aperture_size += APERTURE_RANGE_SIZE; + + /* Reserve address range used for MSI messages */ + if (old_size < MSI_ADDR_BASE_LO && + dma_dom->aperture_size > MSI_ADDR_BASE_LO) { + unsigned long spage; + int pages; + + pages = iommu_num_pages(MSI_ADDR_BASE_LO, 0x10000, PAGE_SIZE); + spage = MSI_ADDR_BASE_LO >> PAGE_SHIFT; + + dma_ops_reserve_addresses(dma_dom, spage, pages); + } + + /* Initialize the exclusion range if necessary */ + for_each_iommu(iommu) { + if (iommu->exclusion_start && + iommu->exclusion_start >= dma_dom->aperture[index]->offset + && iommu->exclusion_start < dma_dom->aperture_size) { + unsigned long startpage; + int pages = iommu_num_pages(iommu->exclusion_start, + iommu->exclusion_length, + PAGE_SIZE); + startpage = iommu->exclusion_start >> PAGE_SHIFT; + dma_ops_reserve_addresses(dma_dom, startpage, pages); + } + } + + /* + * Check for areas already mapped as present in the new aperture + * range and mark those pages as reserved in the allocator. Such + * mappings may already exist as a result of requested unity + * mappings for devices. + */ + for (i = dma_dom->aperture[index]->offset; + i < dma_dom->aperture_size; + i += pte_pgsize) { + u64 *pte = fetch_pte(&dma_dom->domain, i, &pte_pgsize); + if (!pte || !IOMMU_PTE_PRESENT(*pte)) + continue; + + dma_ops_reserve_addresses(dma_dom, i >> PAGE_SHIFT, + pte_pgsize >> 12); + } + + update_domain(&dma_dom->domain); + + return 0; + +out_free: + update_domain(&dma_dom->domain); + + free_page((unsigned long)dma_dom->aperture[index]->bitmap); + + kfree(dma_dom->aperture[index]); + dma_dom->aperture[index] = NULL; + + return -ENOMEM; +} + +static unsigned long dma_ops_area_alloc(struct device *dev, + struct dma_ops_domain *dom, + unsigned int pages, + unsigned long align_mask, + u64 dma_mask, + unsigned long start) +{ + unsigned long next_bit = dom->next_address % APERTURE_RANGE_SIZE; + int max_index = dom->aperture_size >> APERTURE_RANGE_SHIFT; + int i = start >> APERTURE_RANGE_SHIFT; + unsigned long boundary_size; + unsigned long address = -1; + unsigned long limit; + + next_bit >>= PAGE_SHIFT; + + boundary_size = ALIGN(dma_get_seg_boundary(dev) + 1, + PAGE_SIZE) >> PAGE_SHIFT; + + for (;i < max_index; ++i) { + unsigned long offset = dom->aperture[i]->offset >> PAGE_SHIFT; + + if (dom->aperture[i]->offset >= dma_mask) + break; + + limit = iommu_device_max_index(APERTURE_RANGE_PAGES, offset, + dma_mask >> PAGE_SHIFT); + + address = iommu_area_alloc(dom->aperture[i]->bitmap, + limit, next_bit, pages, 0, + boundary_size, align_mask); + if (address != -1) { + address = dom->aperture[i]->offset + + (address << PAGE_SHIFT); + dom->next_address = address + (pages << PAGE_SHIFT); + break; + } + + next_bit = 0; + } + + return address; +} + +static unsigned long dma_ops_alloc_addresses(struct device *dev, + struct dma_ops_domain *dom, + unsigned int pages, + unsigned long align_mask, + u64 dma_mask) +{ + unsigned long address; + +#ifdef CONFIG_IOMMU_STRESS + dom->next_address = 0; + dom->need_flush = true; +#endif + + address = dma_ops_area_alloc(dev, dom, pages, align_mask, + dma_mask, dom->next_address); + + if (address == -1) { + dom->next_address = 0; + address = dma_ops_area_alloc(dev, dom, pages, align_mask, + dma_mask, 0); + dom->need_flush = true; + } + + if (unlikely(address == -1)) + address = DMA_ERROR_CODE; + + WARN_ON((address + (PAGE_SIZE*pages)) > dom->aperture_size); + + return address; +} + +/* + * The address free function. + * + * called with domain->lock held + */ +static void dma_ops_free_addresses(struct dma_ops_domain *dom, + unsigned long address, + unsigned int pages) +{ + unsigned i = address >> APERTURE_RANGE_SHIFT; + struct aperture_range *range = dom->aperture[i]; + + BUG_ON(i >= APERTURE_MAX_RANGES || range == NULL); + +#ifdef CONFIG_IOMMU_STRESS + if (i < 4) + return; +#endif + + if (address >= dom->next_address) + dom->need_flush = true; + + address = (address % APERTURE_RANGE_SIZE) >> PAGE_SHIFT; + + bitmap_clear(range->bitmap, address, pages); + +} + +/**************************************************************************** + * + * The next functions belong to the domain allocation. A domain is + * allocated for every IOMMU as the default domain. If device isolation + * is enabled, every device get its own domain. The most important thing + * about domains is the page table mapping the DMA address space they + * contain. + * + ****************************************************************************/ + +/* + * This function adds a protection domain to the global protection domain list + */ +static void add_domain_to_list(struct protection_domain *domain) +{ + unsigned long flags; + + spin_lock_irqsave(&amd_iommu_pd_lock, flags); + list_add(&domain->list, &amd_iommu_pd_list); + spin_unlock_irqrestore(&amd_iommu_pd_lock, flags); +} + +/* + * This function removes a protection domain to the global + * protection domain list + */ +static void del_domain_from_list(struct protection_domain *domain) +{ + unsigned long flags; + + spin_lock_irqsave(&amd_iommu_pd_lock, flags); + list_del(&domain->list); + spin_unlock_irqrestore(&amd_iommu_pd_lock, flags); +} + +static u16 domain_id_alloc(void) +{ + unsigned long flags; + int id; + + write_lock_irqsave(&amd_iommu_devtable_lock, flags); + id = find_first_zero_bit(amd_iommu_pd_alloc_bitmap, MAX_DOMAIN_ID); + BUG_ON(id == 0); + if (id > 0 && id < MAX_DOMAIN_ID) + __set_bit(id, amd_iommu_pd_alloc_bitmap); + else + id = 0; + write_unlock_irqrestore(&amd_iommu_devtable_lock, flags); + + return id; +} + +static void domain_id_free(int id) +{ + unsigned long flags; + + write_lock_irqsave(&amd_iommu_devtable_lock, flags); + if (id > 0 && id < MAX_DOMAIN_ID) + __clear_bit(id, amd_iommu_pd_alloc_bitmap); + write_unlock_irqrestore(&amd_iommu_devtable_lock, flags); +} + +#define DEFINE_FREE_PT_FN(LVL, FN) \ +static void free_pt_##LVL (unsigned long __pt) \ +{ \ + unsigned long p; \ + u64 *pt; \ + int i; \ + \ + pt = (u64 *)__pt; \ + \ + for (i = 0; i < 512; ++i) { \ + /* PTE present? */ \ + if (!IOMMU_PTE_PRESENT(pt[i])) \ + continue; \ + \ + /* Large PTE? */ \ + if (PM_PTE_LEVEL(pt[i]) == 0 || \ + PM_PTE_LEVEL(pt[i]) == 7) \ + continue; \ + \ + p = (unsigned long)IOMMU_PTE_PAGE(pt[i]); \ + FN(p); \ + } \ + free_page((unsigned long)pt); \ +} + +DEFINE_FREE_PT_FN(l2, free_page) +DEFINE_FREE_PT_FN(l3, free_pt_l2) +DEFINE_FREE_PT_FN(l4, free_pt_l3) +DEFINE_FREE_PT_FN(l5, free_pt_l4) +DEFINE_FREE_PT_FN(l6, free_pt_l5) + +static void free_pagetable(struct protection_domain *domain) +{ + unsigned long root = (unsigned long)domain->pt_root; + + switch (domain->mode) { + case PAGE_MODE_NONE: + break; + case PAGE_MODE_1_LEVEL: + free_page(root); + break; + case PAGE_MODE_2_LEVEL: + free_pt_l2(root); + break; + case PAGE_MODE_3_LEVEL: + free_pt_l3(root); + break; + case PAGE_MODE_4_LEVEL: + free_pt_l4(root); + break; + case PAGE_MODE_5_LEVEL: + free_pt_l5(root); + break; + case PAGE_MODE_6_LEVEL: + free_pt_l6(root); + break; + default: + BUG(); + } +} + +static void free_gcr3_tbl_level1(u64 *tbl) +{ + u64 *ptr; + int i; + + for (i = 0; i < 512; ++i) { + if (!(tbl[i] & GCR3_VALID)) + continue; + + ptr = __va(tbl[i] & PAGE_MASK); + + free_page((unsigned long)ptr); + } +} + +static void free_gcr3_tbl_level2(u64 *tbl) +{ + u64 *ptr; + int i; + + for (i = 0; i < 512; ++i) { + if (!(tbl[i] & GCR3_VALID)) + continue; + + ptr = __va(tbl[i] & PAGE_MASK); + + free_gcr3_tbl_level1(ptr); + } +} + +static void free_gcr3_table(struct protection_domain *domain) +{ + if (domain->glx == 2) + free_gcr3_tbl_level2(domain->gcr3_tbl); + else if (domain->glx == 1) + free_gcr3_tbl_level1(domain->gcr3_tbl); + else if (domain->glx != 0) + BUG(); + + free_page((unsigned long)domain->gcr3_tbl); +} + +/* + * Free a domain, only used if something went wrong in the + * allocation path and we need to free an already allocated page table + */ +static void dma_ops_domain_free(struct dma_ops_domain *dom) +{ + int i; + + if (!dom) + return; + + del_domain_from_list(&dom->domain); + + free_pagetable(&dom->domain); + + for (i = 0; i < APERTURE_MAX_RANGES; ++i) { + if (!dom->aperture[i]) + continue; + free_page((unsigned long)dom->aperture[i]->bitmap); + kfree(dom->aperture[i]); + } + + kfree(dom); +} + +/* + * Allocates a new protection domain usable for the dma_ops functions. + * It also initializes the page table and the address allocator data + * structures required for the dma_ops interface + */ +static struct dma_ops_domain *dma_ops_domain_alloc(void) +{ + struct dma_ops_domain *dma_dom; + + dma_dom = kzalloc(sizeof(struct dma_ops_domain), GFP_KERNEL); + if (!dma_dom) + return NULL; + + spin_lock_init(&dma_dom->domain.lock); + + dma_dom->domain.id = domain_id_alloc(); + if (dma_dom->domain.id == 0) + goto free_dma_dom; + INIT_LIST_HEAD(&dma_dom->domain.dev_list); + dma_dom->domain.mode = PAGE_MODE_2_LEVEL; + dma_dom->domain.pt_root = (void *)get_zeroed_page(GFP_KERNEL); + dma_dom->domain.flags = PD_DMA_OPS_MASK; + dma_dom->domain.priv = dma_dom; + if (!dma_dom->domain.pt_root) + goto free_dma_dom; + + dma_dom->need_flush = false; + dma_dom->target_dev = 0xffff; + + add_domain_to_list(&dma_dom->domain); + + if (alloc_new_range(dma_dom, true, GFP_KERNEL)) + goto free_dma_dom; + + /* + * mark the first page as allocated so we never return 0 as + * a valid dma-address. So we can use 0 as error value + */ + dma_dom->aperture[0]->bitmap[0] = 1; + dma_dom->next_address = 0; + + + return dma_dom; + +free_dma_dom: + dma_ops_domain_free(dma_dom); + + return NULL; +} + +/* + * little helper function to check whether a given protection domain is a + * dma_ops domain + */ +static bool dma_ops_domain(struct protection_domain *domain) +{ + return domain->flags & PD_DMA_OPS_MASK; +} + +static void set_dte_entry(u16 devid, struct protection_domain *domain, bool ats) +{ + u64 pte_root = 0; + u64 flags = 0; + + if (domain->mode != PAGE_MODE_NONE) + pte_root = virt_to_phys(domain->pt_root); + + pte_root |= (domain->mode & DEV_ENTRY_MODE_MASK) + << DEV_ENTRY_MODE_SHIFT; + pte_root |= IOMMU_PTE_IR | IOMMU_PTE_IW | IOMMU_PTE_P | IOMMU_PTE_TV; + + flags = amd_iommu_dev_table[devid].data[1]; + + if (ats) + flags |= DTE_FLAG_IOTLB; + + if (domain->flags & PD_IOMMUV2_MASK) { + u64 gcr3 = __pa(domain->gcr3_tbl); + u64 glx = domain->glx; + u64 tmp; + + pte_root |= DTE_FLAG_GV; + pte_root |= (glx & DTE_GLX_MASK) << DTE_GLX_SHIFT; + + /* First mask out possible old values for GCR3 table */ + tmp = DTE_GCR3_VAL_B(~0ULL) << DTE_GCR3_SHIFT_B; + flags &= ~tmp; + + tmp = DTE_GCR3_VAL_C(~0ULL) << DTE_GCR3_SHIFT_C; + flags &= ~tmp; + + /* Encode GCR3 table into DTE */ + tmp = DTE_GCR3_VAL_A(gcr3) << DTE_GCR3_SHIFT_A; + pte_root |= tmp; + + tmp = DTE_GCR3_VAL_B(gcr3) << DTE_GCR3_SHIFT_B; + flags |= tmp; + + tmp = DTE_GCR3_VAL_C(gcr3) << DTE_GCR3_SHIFT_C; + flags |= tmp; + } + + flags &= ~(0xffffUL); + flags |= domain->id; + + amd_iommu_dev_table[devid].data[1] = flags; + amd_iommu_dev_table[devid].data[0] = pte_root; +} + +static void clear_dte_entry(u16 devid) +{ + /* remove entry from the device table seen by the hardware */ + amd_iommu_dev_table[devid].data[0] = IOMMU_PTE_P | IOMMU_PTE_TV; + amd_iommu_dev_table[devid].data[1] = 0; + + amd_iommu_apply_erratum_63(devid); +} + +static void do_attach(struct iommu_dev_data *dev_data, + struct protection_domain *domain) +{ + struct amd_iommu *iommu; + bool ats; + + iommu = amd_iommu_rlookup_table[dev_data->devid]; + ats = dev_data->ats.enabled; + + /* Update data structures */ + dev_data->domain = domain; + list_add(&dev_data->list, &domain->dev_list); + set_dte_entry(dev_data->devid, domain, ats); + + /* Do reference counting */ + domain->dev_iommu[iommu->index] += 1; + domain->dev_cnt += 1; + + /* Flush the DTE entry */ + device_flush_dte(dev_data); +} + +static void do_detach(struct iommu_dev_data *dev_data) +{ + struct amd_iommu *iommu; + + iommu = amd_iommu_rlookup_table[dev_data->devid]; + + /* decrease reference counters */ + dev_data->domain->dev_iommu[iommu->index] -= 1; + dev_data->domain->dev_cnt -= 1; + + /* Update data structures */ + dev_data->domain = NULL; + list_del(&dev_data->list); + clear_dte_entry(dev_data->devid); + + /* Flush the DTE entry */ + device_flush_dte(dev_data); +} + +/* + * If a device is not yet associated with a domain, this function does + * assigns it visible for the hardware + */ +static int __attach_device(struct iommu_dev_data *dev_data, + struct protection_domain *domain) +{ + struct iommu_dev_data *head, *entry; + int ret; + + /* lock domain */ + spin_lock(&domain->lock); + + head = dev_data; + + if (head->alias_data != NULL) + head = head->alias_data; + + /* Now we have the root of the alias group, if any */ + + ret = -EBUSY; + if (head->domain != NULL) + goto out_unlock; + + /* Attach alias group root */ + do_attach(head, domain); + + /* Attach other devices in the alias group */ + list_for_each_entry(entry, &head->alias_list, alias_list) + do_attach(entry, domain); + + ret = 0; + +out_unlock: + + /* ready */ + spin_unlock(&domain->lock); + + return ret; +} + + +static void pdev_iommuv2_disable(struct pci_dev *pdev) +{ + pci_disable_ats(pdev); + pci_disable_pri(pdev); + pci_disable_pasid(pdev); +} + +/* FIXME: Change generic reset-function to do the same */ +static int pri_reset_while_enabled(struct pci_dev *pdev) +{ + u16 control; + int pos; + + pos = pci_find_ext_capability(pdev, PCI_EXT_CAP_ID_PRI); + if (!pos) + return -EINVAL; + + pci_read_config_word(pdev, pos + PCI_PRI_CTRL, &control); + control |= PCI_PRI_CTRL_RESET; + pci_write_config_word(pdev, pos + PCI_PRI_CTRL, control); + + return 0; +} + +static int pdev_iommuv2_enable(struct pci_dev *pdev) +{ + bool reset_enable; + int reqs, ret; + + /* FIXME: Hardcode number of outstanding requests for now */ + reqs = 32; + if (pdev_pri_erratum(pdev, AMD_PRI_DEV_ERRATUM_LIMIT_REQ_ONE)) + reqs = 1; + reset_enable = pdev_pri_erratum(pdev, AMD_PRI_DEV_ERRATUM_ENABLE_RESET); + + /* Only allow access to user-accessible pages */ + ret = pci_enable_pasid(pdev, 0); + if (ret) + goto out_err; + + /* First reset the PRI state of the device */ + ret = pci_reset_pri(pdev); + if (ret) + goto out_err; + + /* Enable PRI */ + ret = pci_enable_pri(pdev, reqs); + if (ret) + goto out_err; + + if (reset_enable) { + ret = pri_reset_while_enabled(pdev); + if (ret) + goto out_err; + } + + ret = pci_enable_ats(pdev, PAGE_SHIFT); + if (ret) + goto out_err; + + return 0; + +out_err: + pci_disable_pri(pdev); + pci_disable_pasid(pdev); + + return ret; +} + +/* FIXME: Move this to PCI code */ +#define PCI_PRI_TLP_OFF (1 << 15) + +static bool pci_pri_tlp_required(struct pci_dev *pdev) +{ + u16 status; + int pos; + + pos = pci_find_ext_capability(pdev, PCI_EXT_CAP_ID_PRI); + if (!pos) + return false; + + pci_read_config_word(pdev, pos + PCI_PRI_STATUS, &status); + + return (status & PCI_PRI_TLP_OFF) ? true : false; +} + +/* + * If a device is not yet associated with a domain, this function + * assigns it visible for the hardware + */ +static int attach_device(struct device *dev, + struct protection_domain *domain) +{ + struct pci_dev *pdev = to_pci_dev(dev); + struct iommu_dev_data *dev_data; + unsigned long flags; + int ret; + + dev_data = get_dev_data(dev); + + if (domain->flags & PD_IOMMUV2_MASK) { + if (!dev_data->iommu_v2 || !dev_data->passthrough) + return -EINVAL; + + if (pdev_iommuv2_enable(pdev) != 0) + return -EINVAL; + + dev_data->ats.enabled = true; + dev_data->ats.qdep = pci_ats_queue_depth(pdev); + dev_data->pri_tlp = pci_pri_tlp_required(pdev); + } else if (amd_iommu_iotlb_sup && + pci_enable_ats(pdev, PAGE_SHIFT) == 0) { + dev_data->ats.enabled = true; + dev_data->ats.qdep = pci_ats_queue_depth(pdev); + } + + write_lock_irqsave(&amd_iommu_devtable_lock, flags); + ret = __attach_device(dev_data, domain); + write_unlock_irqrestore(&amd_iommu_devtable_lock, flags); + + /* + * We might boot into a crash-kernel here. The crashed kernel + * left the caches in the IOMMU dirty. So we have to flush + * here to evict all dirty stuff. + */ + domain_flush_tlb_pde(domain); + + return ret; +} + +/* + * Removes a device from a protection domain (unlocked) + */ +static void __detach_device(struct iommu_dev_data *dev_data) +{ + struct iommu_dev_data *head, *entry; + struct protection_domain *domain; + unsigned long flags; + + BUG_ON(!dev_data->domain); + + domain = dev_data->domain; + + spin_lock_irqsave(&domain->lock, flags); + + head = dev_data; + if (head->alias_data != NULL) + head = head->alias_data; + + list_for_each_entry(entry, &head->alias_list, alias_list) + do_detach(entry); + + do_detach(head); + + spin_unlock_irqrestore(&domain->lock, flags); + + /* + * If we run in passthrough mode the device must be assigned to the + * passthrough domain if it is detached from any other domain. + * Make sure we can deassign from the pt_domain itself. + */ + if (dev_data->passthrough && + (dev_data->domain == NULL && domain != pt_domain)) + __attach_device(dev_data, pt_domain); +} + +/* + * Removes a device from a protection domain (with devtable_lock held) + */ +static void detach_device(struct device *dev) +{ + struct protection_domain *domain; + struct iommu_dev_data *dev_data; + unsigned long flags; + + dev_data = get_dev_data(dev); + domain = dev_data->domain; + + /* lock device table */ + write_lock_irqsave(&amd_iommu_devtable_lock, flags); + __detach_device(dev_data); + write_unlock_irqrestore(&amd_iommu_devtable_lock, flags); + + if (domain->flags & PD_IOMMUV2_MASK) + pdev_iommuv2_disable(to_pci_dev(dev)); + else if (dev_data->ats.enabled) + pci_disable_ats(to_pci_dev(dev)); + + dev_data->ats.enabled = false; +} + +/* + * Find out the protection domain structure for a given PCI device. This + * will give us the pointer to the page table root for example. + */ +static struct protection_domain *domain_for_device(struct device *dev) +{ + struct iommu_dev_data *dev_data; + struct protection_domain *dom = NULL; + unsigned long flags; + + dev_data = get_dev_data(dev); + + if (dev_data->domain) + return dev_data->domain; + + if (dev_data->alias_data != NULL) { + struct iommu_dev_data *alias_data = dev_data->alias_data; + + read_lock_irqsave(&amd_iommu_devtable_lock, flags); + if (alias_data->domain != NULL) { + __attach_device(dev_data, alias_data->domain); + dom = alias_data->domain; + } + read_unlock_irqrestore(&amd_iommu_devtable_lock, flags); + } + + return dom; +} + +static int device_change_notifier(struct notifier_block *nb, + unsigned long action, void *data) +{ + struct dma_ops_domain *dma_domain; + struct protection_domain *domain; + struct iommu_dev_data *dev_data; + struct device *dev = data; + struct amd_iommu *iommu; + unsigned long flags; + u16 devid; + + if (!check_device(dev)) + return 0; + + devid = get_device_id(dev); + iommu = amd_iommu_rlookup_table[devid]; + dev_data = get_dev_data(dev); + + switch (action) { + case BUS_NOTIFY_ADD_DEVICE: + + iommu_init_device(dev); + init_iommu_group(dev); + + /* + * dev_data is still NULL and + * got initialized in iommu_init_device + */ + dev_data = get_dev_data(dev); + + if (iommu_pass_through || dev_data->iommu_v2) { + dev_data->passthrough = true; + attach_device(dev, pt_domain); + break; + } + + domain = domain_for_device(dev); + + /* allocate a protection domain if a device is added */ + dma_domain = find_protection_domain(devid); + if (!dma_domain) { + dma_domain = dma_ops_domain_alloc(); + if (!dma_domain) + goto out; + dma_domain->target_dev = devid; + + spin_lock_irqsave(&iommu_pd_list_lock, flags); + list_add_tail(&dma_domain->list, &iommu_pd_list); + spin_unlock_irqrestore(&iommu_pd_list_lock, flags); + } + + dev->archdata.dma_ops = &amd_iommu_dma_ops; + + break; + case BUS_NOTIFY_REMOVED_DEVICE: + + iommu_uninit_device(dev); + + default: + goto out; + } + + iommu_completion_wait(iommu); + +out: + return 0; +} + +static struct notifier_block device_nb = { + .notifier_call = device_change_notifier, +}; + +void amd_iommu_init_notifier(void) +{ + bus_register_notifier(&pci_bus_type, &device_nb); +} + +/***************************************************************************** + * + * The next functions belong to the dma_ops mapping/unmapping code. + * + *****************************************************************************/ + +/* + * In the dma_ops path we only have the struct device. This function + * finds the corresponding IOMMU, the protection domain and the + * requestor id for a given device. + * If the device is not yet associated with a domain this is also done + * in this function. + */ +static struct protection_domain *get_domain(struct device *dev) +{ + struct protection_domain *domain; + struct dma_ops_domain *dma_dom; + u16 devid = get_device_id(dev); + + if (!check_device(dev)) + return ERR_PTR(-EINVAL); + + domain = domain_for_device(dev); + if (domain != NULL && !dma_ops_domain(domain)) + return ERR_PTR(-EBUSY); + + if (domain != NULL) + return domain; + + /* Device not bound yet - bind it */ + dma_dom = find_protection_domain(devid); + if (!dma_dom) + dma_dom = amd_iommu_rlookup_table[devid]->default_dom; + attach_device(dev, &dma_dom->domain); + DUMP_printk("Using protection domain %d for device %s\n", + dma_dom->domain.id, dev_name(dev)); + + return &dma_dom->domain; +} + +static void update_device_table(struct protection_domain *domain) +{ + struct iommu_dev_data *dev_data; + + list_for_each_entry(dev_data, &domain->dev_list, list) + set_dte_entry(dev_data->devid, domain, dev_data->ats.enabled); +} + +static void update_domain(struct protection_domain *domain) +{ + if (!domain->updated) + return; + + update_device_table(domain); + + domain_flush_devices(domain); + domain_flush_tlb_pde(domain); + + domain->updated = false; +} + +/* + * This function fetches the PTE for a given address in the aperture + */ +static u64* dma_ops_get_pte(struct dma_ops_domain *dom, + unsigned long address) +{ + struct aperture_range *aperture; + u64 *pte, *pte_page; + + aperture = dom->aperture[APERTURE_RANGE_INDEX(address)]; + if (!aperture) + return NULL; + + pte = aperture->pte_pages[APERTURE_PAGE_INDEX(address)]; + if (!pte) { + pte = alloc_pte(&dom->domain, address, PAGE_SIZE, &pte_page, + GFP_ATOMIC); + aperture->pte_pages[APERTURE_PAGE_INDEX(address)] = pte_page; + } else + pte += PM_LEVEL_INDEX(0, address); + + update_domain(&dom->domain); + + return pte; +} + +/* + * This is the generic map function. It maps one 4kb page at paddr to + * the given address in the DMA address space for the domain. + */ +static dma_addr_t dma_ops_domain_map(struct dma_ops_domain *dom, + unsigned long address, + phys_addr_t paddr, + int direction) +{ + u64 *pte, __pte; + + WARN_ON(address > dom->aperture_size); + + paddr &= PAGE_MASK; + + pte = dma_ops_get_pte(dom, address); + if (!pte) + return DMA_ERROR_CODE; + + __pte = paddr | IOMMU_PTE_P | IOMMU_PTE_FC; + + if (direction == DMA_TO_DEVICE) + __pte |= IOMMU_PTE_IR; + else if (direction == DMA_FROM_DEVICE) + __pte |= IOMMU_PTE_IW; + else if (direction == DMA_BIDIRECTIONAL) + __pte |= IOMMU_PTE_IR | IOMMU_PTE_IW; + + WARN_ON(*pte); + + *pte = __pte; + + return (dma_addr_t)address; +} + +/* + * The generic unmapping function for on page in the DMA address space. + */ +static void dma_ops_domain_unmap(struct dma_ops_domain *dom, + unsigned long address) +{ + struct aperture_range *aperture; + u64 *pte; + + if (address >= dom->aperture_size) + return; + + aperture = dom->aperture[APERTURE_RANGE_INDEX(address)]; + if (!aperture) + return; + + pte = aperture->pte_pages[APERTURE_PAGE_INDEX(address)]; + if (!pte) + return; + + pte += PM_LEVEL_INDEX(0, address); + + WARN_ON(!*pte); + + *pte = 0ULL; +} + +/* + * This function contains common code for mapping of a physically + * contiguous memory region into DMA address space. It is used by all + * mapping functions provided with this IOMMU driver. + * Must be called with the domain lock held. + */ +static dma_addr_t __map_single(struct device *dev, + struct dma_ops_domain *dma_dom, + phys_addr_t paddr, + size_t size, + int dir, + bool align, + u64 dma_mask) +{ + dma_addr_t offset = paddr & ~PAGE_MASK; + dma_addr_t address, start, ret; + unsigned int pages; + unsigned long align_mask = 0; + int i; + + pages = iommu_num_pages(paddr, size, PAGE_SIZE); + paddr &= PAGE_MASK; + + INC_STATS_COUNTER(total_map_requests); + + if (pages > 1) + INC_STATS_COUNTER(cross_page); + + if (align) + align_mask = (1UL << get_order(size)) - 1; + +retry: + address = dma_ops_alloc_addresses(dev, dma_dom, pages, align_mask, + dma_mask); + if (unlikely(address == DMA_ERROR_CODE)) { + /* + * setting next_address here will let the address + * allocator only scan the new allocated range in the + * first run. This is a small optimization. + */ + dma_dom->next_address = dma_dom->aperture_size; + + if (alloc_new_range(dma_dom, false, GFP_ATOMIC)) + goto out; + + /* + * aperture was successfully enlarged by 128 MB, try + * allocation again + */ + goto retry; + } + + start = address; + for (i = 0; i < pages; ++i) { + ret = dma_ops_domain_map(dma_dom, start, paddr, dir); + if (ret == DMA_ERROR_CODE) + goto out_unmap; + + paddr += PAGE_SIZE; + start += PAGE_SIZE; + } + address += offset; + + ADD_STATS_COUNTER(alloced_io_mem, size); + + if (unlikely(dma_dom->need_flush && !amd_iommu_unmap_flush)) { + domain_flush_tlb(&dma_dom->domain); + dma_dom->need_flush = false; + } else if (unlikely(amd_iommu_np_cache)) + domain_flush_pages(&dma_dom->domain, address, size); + +out: + return address; + +out_unmap: + + for (--i; i >= 0; --i) { + start -= PAGE_SIZE; + dma_ops_domain_unmap(dma_dom, start); + } + + dma_ops_free_addresses(dma_dom, address, pages); + + return DMA_ERROR_CODE; +} + +/* + * Does the reverse of the __map_single function. Must be called with + * the domain lock held too + */ +static void __unmap_single(struct dma_ops_domain *dma_dom, + dma_addr_t dma_addr, + size_t size, + int dir) +{ + dma_addr_t flush_addr; + dma_addr_t i, start; + unsigned int pages; + + if ((dma_addr == DMA_ERROR_CODE) || + (dma_addr + size > dma_dom->aperture_size)) + return; + + flush_addr = dma_addr; + pages = iommu_num_pages(dma_addr, size, PAGE_SIZE); + dma_addr &= PAGE_MASK; + start = dma_addr; + + for (i = 0; i < pages; ++i) { + dma_ops_domain_unmap(dma_dom, start); + start += PAGE_SIZE; + } + + SUB_STATS_COUNTER(alloced_io_mem, size); + + dma_ops_free_addresses(dma_dom, dma_addr, pages); + + if (amd_iommu_unmap_flush || dma_dom->need_flush) { + domain_flush_pages(&dma_dom->domain, flush_addr, size); + dma_dom->need_flush = false; + } +} + +/* + * The exported map_single function for dma_ops. + */ +static dma_addr_t map_page(struct device *dev, struct page *page, + unsigned long offset, size_t size, + enum dma_data_direction dir, + struct dma_attrs *attrs) +{ + unsigned long flags; + struct protection_domain *domain; + dma_addr_t addr; + u64 dma_mask; + phys_addr_t paddr = page_to_phys(page) + offset; + + INC_STATS_COUNTER(cnt_map_single); + + domain = get_domain(dev); + if (PTR_ERR(domain) == -EINVAL) + return (dma_addr_t)paddr; + else if (IS_ERR(domain)) + return DMA_ERROR_CODE; + + dma_mask = *dev->dma_mask; + + spin_lock_irqsave(&domain->lock, flags); + + addr = __map_single(dev, domain->priv, paddr, size, dir, false, + dma_mask); + if (addr == DMA_ERROR_CODE) + goto out; + + domain_flush_complete(domain); + +out: + spin_unlock_irqrestore(&domain->lock, flags); + + return addr; +} + +/* + * The exported unmap_single function for dma_ops. + */ +static void unmap_page(struct device *dev, dma_addr_t dma_addr, size_t size, + enum dma_data_direction dir, struct dma_attrs *attrs) +{ + unsigned long flags; + struct protection_domain *domain; + + INC_STATS_COUNTER(cnt_unmap_single); + + domain = get_domain(dev); + if (IS_ERR(domain)) + return; + + spin_lock_irqsave(&domain->lock, flags); + + __unmap_single(domain->priv, dma_addr, size, dir); + + domain_flush_complete(domain); + + spin_unlock_irqrestore(&domain->lock, flags); +} + +/* + * The exported map_sg function for dma_ops (handles scatter-gather + * lists). + */ +static int map_sg(struct device *dev, struct scatterlist *sglist, + int nelems, enum dma_data_direction dir, + struct dma_attrs *attrs) +{ + unsigned long flags; + struct protection_domain *domain; + int i; + struct scatterlist *s; + phys_addr_t paddr; + int mapped_elems = 0; + u64 dma_mask; + + INC_STATS_COUNTER(cnt_map_sg); + + domain = get_domain(dev); + if (IS_ERR(domain)) + return 0; + + dma_mask = *dev->dma_mask; + + spin_lock_irqsave(&domain->lock, flags); + + for_each_sg(sglist, s, nelems, i) { + paddr = sg_phys(s); + + s->dma_address = __map_single(dev, domain->priv, + paddr, s->length, dir, false, + dma_mask); + + if (s->dma_address) { + s->dma_length = s->length; + mapped_elems++; + } else + goto unmap; + } + + domain_flush_complete(domain); + +out: + spin_unlock_irqrestore(&domain->lock, flags); + + return mapped_elems; +unmap: + for_each_sg(sglist, s, mapped_elems, i) { + if (s->dma_address) + __unmap_single(domain->priv, s->dma_address, + s->dma_length, dir); + s->dma_address = s->dma_length = 0; + } + + mapped_elems = 0; + + goto out; +} + +/* + * The exported map_sg function for dma_ops (handles scatter-gather + * lists). + */ +static void unmap_sg(struct device *dev, struct scatterlist *sglist, + int nelems, enum dma_data_direction dir, + struct dma_attrs *attrs) +{ + unsigned long flags; + struct protection_domain *domain; + struct scatterlist *s; + int i; + + INC_STATS_COUNTER(cnt_unmap_sg); + + domain = get_domain(dev); + if (IS_ERR(domain)) + return; + + spin_lock_irqsave(&domain->lock, flags); + + for_each_sg(sglist, s, nelems, i) { + __unmap_single(domain->priv, s->dma_address, + s->dma_length, dir); + s->dma_address = s->dma_length = 0; + } + + domain_flush_complete(domain); + + spin_unlock_irqrestore(&domain->lock, flags); +} + +/* + * The exported alloc_coherent function for dma_ops. + */ +static void *alloc_coherent(struct device *dev, size_t size, + dma_addr_t *dma_addr, gfp_t flag, + struct dma_attrs *attrs) +{ + u64 dma_mask = dev->coherent_dma_mask; + struct protection_domain *domain; + unsigned long flags; + struct page *page; + + INC_STATS_COUNTER(cnt_alloc_coherent); + + domain = get_domain(dev); + if (PTR_ERR(domain) == -EINVAL) { + page = alloc_pages(flag, get_order(size)); + *dma_addr = page_to_phys(page); + return page_address(page); + } else if (IS_ERR(domain)) + return NULL; + + size = PAGE_ALIGN(size); + dma_mask = dev->coherent_dma_mask; + flag &= ~(__GFP_DMA | __GFP_HIGHMEM | __GFP_DMA32); + flag |= __GFP_ZERO; + + page = alloc_pages(flag | __GFP_NOWARN, get_order(size)); + if (!page) { + if (!(flag & __GFP_WAIT)) + return NULL; + + page = dma_alloc_from_contiguous(dev, size >> PAGE_SHIFT, + get_order(size)); + if (!page) + return NULL; + } + + if (!dma_mask) + dma_mask = *dev->dma_mask; + + spin_lock_irqsave(&domain->lock, flags); + + *dma_addr = __map_single(dev, domain->priv, page_to_phys(page), + size, DMA_BIDIRECTIONAL, true, dma_mask); + + if (*dma_addr == DMA_ERROR_CODE) { + spin_unlock_irqrestore(&domain->lock, flags); + goto out_free; + } + + domain_flush_complete(domain); + + spin_unlock_irqrestore(&domain->lock, flags); + + return page_address(page); + +out_free: + + if (!dma_release_from_contiguous(dev, page, size >> PAGE_SHIFT)) + __free_pages(page, get_order(size)); + + return NULL; +} + +/* + * The exported free_coherent function for dma_ops. + */ +static void free_coherent(struct device *dev, size_t size, + void *virt_addr, dma_addr_t dma_addr, + struct dma_attrs *attrs) +{ + struct protection_domain *domain; + unsigned long flags; + struct page *page; + + INC_STATS_COUNTER(cnt_free_coherent); + + page = virt_to_page(virt_addr); + size = PAGE_ALIGN(size); + + domain = get_domain(dev); + if (IS_ERR(domain)) + goto free_mem; + + spin_lock_irqsave(&domain->lock, flags); + + __unmap_single(domain->priv, dma_addr, size, DMA_BIDIRECTIONAL); + + domain_flush_complete(domain); + + spin_unlock_irqrestore(&domain->lock, flags); + +free_mem: + if (!dma_release_from_contiguous(dev, page, size >> PAGE_SHIFT)) + __free_pages(page, get_order(size)); +} + +/* + * This function is called by the DMA layer to find out if we can handle a + * particular device. It is part of the dma_ops. + */ +static int amd_iommu_dma_supported(struct device *dev, u64 mask) +{ + return check_device(dev); +} + +/* + * The function for pre-allocating protection domains. + * + * If the driver core informs the DMA layer if a driver grabs a device + * we don't need to preallocate the protection domains anymore. + * For now we have to. + */ +static void __init prealloc_protection_domains(void) +{ + struct iommu_dev_data *dev_data; + struct dma_ops_domain *dma_dom; + struct pci_dev *dev = NULL; + u16 devid; + + for_each_pci_dev(dev) { + + /* Do we handle this device? */ + if (!check_device(&dev->dev)) + continue; + + dev_data = get_dev_data(&dev->dev); + if (!amd_iommu_force_isolation && dev_data->iommu_v2) { + /* Make sure passthrough domain is allocated */ + alloc_passthrough_domain(); + dev_data->passthrough = true; + attach_device(&dev->dev, pt_domain); + pr_info("AMD-Vi: Using passthrough domain for device %s\n", + dev_name(&dev->dev)); + } + + /* Is there already any domain for it? */ + if (domain_for_device(&dev->dev)) + continue; + + devid = get_device_id(&dev->dev); + + dma_dom = dma_ops_domain_alloc(); + if (!dma_dom) + continue; + init_unity_mappings_for_device(dma_dom, devid); + dma_dom->target_dev = devid; + + attach_device(&dev->dev, &dma_dom->domain); + + list_add_tail(&dma_dom->list, &iommu_pd_list); + } +} + +static struct dma_map_ops amd_iommu_dma_ops = { + .alloc = alloc_coherent, + .free = free_coherent, + .map_page = map_page, + .unmap_page = unmap_page, + .map_sg = map_sg, + .unmap_sg = unmap_sg, + .dma_supported = amd_iommu_dma_supported, +}; + +static unsigned device_dma_ops_init(void) +{ + struct iommu_dev_data *dev_data; + struct pci_dev *pdev = NULL; + unsigned unhandled = 0; + + for_each_pci_dev(pdev) { + if (!check_device(&pdev->dev)) { + + iommu_ignore_device(&pdev->dev); + + unhandled += 1; + continue; + } + + dev_data = get_dev_data(&pdev->dev); + + if (!dev_data->passthrough) + pdev->dev.archdata.dma_ops = &amd_iommu_dma_ops; + else + pdev->dev.archdata.dma_ops = &nommu_dma_ops; + } + + return unhandled; +} + +/* + * The function which clues the AMD IOMMU driver into dma_ops. + */ + +void __init amd_iommu_init_api(void) +{ + bus_set_iommu(&pci_bus_type, &amd_iommu_ops); +} + +int __init amd_iommu_init_dma_ops(void) +{ + struct amd_iommu *iommu; + int ret, unhandled; + + /* + * first allocate a default protection domain for every IOMMU we + * found in the system. Devices not assigned to any other + * protection domain will be assigned to the default one. + */ + for_each_iommu(iommu) { + iommu->default_dom = dma_ops_domain_alloc(); + if (iommu->default_dom == NULL) + return -ENOMEM; + iommu->default_dom->domain.flags |= PD_DEFAULT_MASK; + ret = iommu_init_unity_mappings(iommu); + if (ret) + goto free_domains; + } + + /* + * Pre-allocate the protection domains for each device. + */ + prealloc_protection_domains(); + + iommu_detected = 1; + swiotlb = 0; + + /* Make the driver finally visible to the drivers */ + unhandled = device_dma_ops_init(); + if (unhandled && max_pfn > MAX_DMA32_PFN) { + /* There are unhandled devices - initialize swiotlb for them */ + swiotlb = 1; + } + + amd_iommu_stats_init(); + + if (amd_iommu_unmap_flush) + pr_info("AMD-Vi: IO/TLB flush on unmap enabled\n"); + else + pr_info("AMD-Vi: Lazy IO/TLB flushing enabled\n"); + + return 0; + +free_domains: + + for_each_iommu(iommu) { + dma_ops_domain_free(iommu->default_dom); + } + + return ret; +} + +/***************************************************************************** + * + * The following functions belong to the exported interface of AMD IOMMU + * + * This interface allows access to lower level functions of the IOMMU + * like protection domain handling and assignement of devices to domains + * which is not possible with the dma_ops interface. + * + *****************************************************************************/ + +static void cleanup_domain(struct protection_domain *domain) +{ + struct iommu_dev_data *entry; + unsigned long flags; + + write_lock_irqsave(&amd_iommu_devtable_lock, flags); + + while (!list_empty(&domain->dev_list)) { + entry = list_first_entry(&domain->dev_list, + struct iommu_dev_data, list); + __detach_device(entry); + } + + write_unlock_irqrestore(&amd_iommu_devtable_lock, flags); +} + +static void protection_domain_free(struct protection_domain *domain) +{ + if (!domain) + return; + + del_domain_from_list(domain); + + if (domain->id) + domain_id_free(domain->id); + + kfree(domain); +} + +static struct protection_domain *protection_domain_alloc(void) +{ + struct protection_domain *domain; + + domain = kzalloc(sizeof(*domain), GFP_KERNEL); + if (!domain) + return NULL; + + spin_lock_init(&domain->lock); + mutex_init(&domain->api_lock); + domain->id = domain_id_alloc(); + if (!domain->id) + goto out_err; + INIT_LIST_HEAD(&domain->dev_list); + + add_domain_to_list(domain); + + return domain; + +out_err: + kfree(domain); + + return NULL; +} + +static int __init alloc_passthrough_domain(void) +{ + if (pt_domain != NULL) + return 0; + + /* allocate passthrough domain */ + pt_domain = protection_domain_alloc(); + if (!pt_domain) + return -ENOMEM; + + pt_domain->mode = PAGE_MODE_NONE; + + return 0; +} + +static struct iommu_domain *amd_iommu_domain_alloc(unsigned type) +{ + struct protection_domain *pdomain; + + /* We only support unmanaged domains for now */ + if (type != IOMMU_DOMAIN_UNMANAGED) + return NULL; + + pdomain = protection_domain_alloc(); + if (!pdomain) + goto out_free; + + pdomain->mode = PAGE_MODE_3_LEVEL; + pdomain->pt_root = (void *)get_zeroed_page(GFP_KERNEL); + if (!pdomain->pt_root) + goto out_free; + + pdomain->domain.geometry.aperture_start = 0; + pdomain->domain.geometry.aperture_end = ~0ULL; + pdomain->domain.geometry.force_aperture = true; + + return &pdomain->domain; + +out_free: + protection_domain_free(pdomain); + + return NULL; +} + +static void amd_iommu_domain_free(struct iommu_domain *dom) +{ + struct protection_domain *domain; + + if (!dom) + return; + + domain = to_pdomain(dom); + + if (domain->dev_cnt > 0) + cleanup_domain(domain); + + BUG_ON(domain->dev_cnt != 0); + + if (domain->mode != PAGE_MODE_NONE) + free_pagetable(domain); + + if (domain->flags & PD_IOMMUV2_MASK) + free_gcr3_table(domain); + + protection_domain_free(domain); +} + +static void amd_iommu_detach_device(struct iommu_domain *dom, + struct device *dev) +{ + struct iommu_dev_data *dev_data = dev->archdata.iommu; + struct amd_iommu *iommu; + u16 devid; + + if (!check_device(dev)) + return; + + devid = get_device_id(dev); + + if (dev_data->domain != NULL) + detach_device(dev); + + iommu = amd_iommu_rlookup_table[devid]; + if (!iommu) + return; + + iommu_completion_wait(iommu); +} + +static int amd_iommu_attach_device(struct iommu_domain *dom, + struct device *dev) +{ + struct protection_domain *domain = to_pdomain(dom); + struct iommu_dev_data *dev_data; + struct amd_iommu *iommu; + int ret; + + if (!check_device(dev)) + return -EINVAL; + + dev_data = dev->archdata.iommu; + + iommu = amd_iommu_rlookup_table[dev_data->devid]; + if (!iommu) + return -EINVAL; + + if (dev_data->domain) + detach_device(dev); + + ret = attach_device(dev, domain); + + iommu_completion_wait(iommu); + + return ret; +} + +static int amd_iommu_map(struct iommu_domain *dom, unsigned long iova, + phys_addr_t paddr, size_t page_size, int iommu_prot) +{ + struct protection_domain *domain = to_pdomain(dom); + int prot = 0; + int ret; + + if (domain->mode == PAGE_MODE_NONE) + return -EINVAL; + + if (iommu_prot & IOMMU_READ) + prot |= IOMMU_PROT_IR; + if (iommu_prot & IOMMU_WRITE) + prot |= IOMMU_PROT_IW; + + mutex_lock(&domain->api_lock); + ret = iommu_map_page(domain, iova, paddr, prot, page_size); + mutex_unlock(&domain->api_lock); + + return ret; +} + +static size_t amd_iommu_unmap(struct iommu_domain *dom, unsigned long iova, + size_t page_size) +{ + struct protection_domain *domain = to_pdomain(dom); + size_t unmap_size; + + if (domain->mode == PAGE_MODE_NONE) + return -EINVAL; + + mutex_lock(&domain->api_lock); + unmap_size = iommu_unmap_page(domain, iova, page_size); + mutex_unlock(&domain->api_lock); + + domain_flush_tlb_pde(domain); + + return unmap_size; +} + +static phys_addr_t amd_iommu_iova_to_phys(struct iommu_domain *dom, + dma_addr_t iova) +{ + struct protection_domain *domain = to_pdomain(dom); + unsigned long offset_mask, pte_pgsize; + u64 *pte, __pte; + + if (domain->mode == PAGE_MODE_NONE) + return iova; + + pte = fetch_pte(domain, iova, &pte_pgsize); + + if (!pte || !IOMMU_PTE_PRESENT(*pte)) + return 0; + + offset_mask = pte_pgsize - 1; + __pte = *pte & PM_ADDR_MASK; + + return (__pte & ~offset_mask) | (iova & offset_mask); +} + +static bool amd_iommu_capable(enum iommu_cap cap) +{ + switch (cap) { + case IOMMU_CAP_CACHE_COHERENCY: + return true; + case IOMMU_CAP_INTR_REMAP: + return (irq_remapping_enabled == 1); + case IOMMU_CAP_NOEXEC: + return false; + } + + return false; +} + +static const struct iommu_ops amd_iommu_ops = { + .capable = amd_iommu_capable, + .domain_alloc = amd_iommu_domain_alloc, + .domain_free = amd_iommu_domain_free, + .attach_dev = amd_iommu_attach_device, + .detach_dev = amd_iommu_detach_device, + .map = amd_iommu_map, + .unmap = amd_iommu_unmap, + .map_sg = default_iommu_map_sg, + .iova_to_phys = amd_iommu_iova_to_phys, + .pgsize_bitmap = AMD_IOMMU_PGSIZES, +}; + +/***************************************************************************** + * + * The next functions do a basic initialization of IOMMU for pass through + * mode + * + * In passthrough mode the IOMMU is initialized and enabled but not used for + * DMA-API translation. + * + *****************************************************************************/ + +int __init amd_iommu_init_passthrough(void) +{ + struct iommu_dev_data *dev_data; + struct pci_dev *dev = NULL; + int ret; + + ret = alloc_passthrough_domain(); + if (ret) + return ret; + + for_each_pci_dev(dev) { + if (!check_device(&dev->dev)) + continue; + + dev_data = get_dev_data(&dev->dev); + dev_data->passthrough = true; + + attach_device(&dev->dev, pt_domain); + } + + amd_iommu_stats_init(); + + pr_info("AMD-Vi: Initialized for Passthrough Mode\n"); + + return 0; +} + +/* IOMMUv2 specific functions */ +int amd_iommu_register_ppr_notifier(struct notifier_block *nb) +{ + return atomic_notifier_chain_register(&ppr_notifier, nb); +} +EXPORT_SYMBOL(amd_iommu_register_ppr_notifier); + +int amd_iommu_unregister_ppr_notifier(struct notifier_block *nb) +{ + return atomic_notifier_chain_unregister(&ppr_notifier, nb); +} +EXPORT_SYMBOL(amd_iommu_unregister_ppr_notifier); + +void amd_iommu_domain_direct_map(struct iommu_domain *dom) +{ + struct protection_domain *domain = to_pdomain(dom); + unsigned long flags; + + spin_lock_irqsave(&domain->lock, flags); + + /* Update data structure */ + domain->mode = PAGE_MODE_NONE; + domain->updated = true; + + /* Make changes visible to IOMMUs */ + update_domain(domain); + + /* Page-table is not visible to IOMMU anymore, so free it */ + free_pagetable(domain); + + spin_unlock_irqrestore(&domain->lock, flags); +} +EXPORT_SYMBOL(amd_iommu_domain_direct_map); + +int amd_iommu_domain_enable_v2(struct iommu_domain *dom, int pasids) +{ + struct protection_domain *domain = to_pdomain(dom); + unsigned long flags; + int levels, ret; + + if (pasids <= 0 || pasids > (PASID_MASK + 1)) + return -EINVAL; + + /* Number of GCR3 table levels required */ + for (levels = 0; (pasids - 1) & ~0x1ff; pasids >>= 9) + levels += 1; + + if (levels > amd_iommu_max_glx_val) + return -EINVAL; + + spin_lock_irqsave(&domain->lock, flags); + + /* + * Save us all sanity checks whether devices already in the + * domain support IOMMUv2. Just force that the domain has no + * devices attached when it is switched into IOMMUv2 mode. + */ + ret = -EBUSY; + if (domain->dev_cnt > 0 || domain->flags & PD_IOMMUV2_MASK) + goto out; + + ret = -ENOMEM; + domain->gcr3_tbl = (void *)get_zeroed_page(GFP_ATOMIC); + if (domain->gcr3_tbl == NULL) + goto out; + + domain->glx = levels; + domain->flags |= PD_IOMMUV2_MASK; + domain->updated = true; + + update_domain(domain); + + ret = 0; + +out: + spin_unlock_irqrestore(&domain->lock, flags); + + return ret; +} +EXPORT_SYMBOL(amd_iommu_domain_enable_v2); + +static int __flush_pasid(struct protection_domain *domain, int pasid, + u64 address, bool size) +{ + struct iommu_dev_data *dev_data; + struct iommu_cmd cmd; + int i, ret; + + if (!(domain->flags & PD_IOMMUV2_MASK)) + return -EINVAL; + + build_inv_iommu_pasid(&cmd, domain->id, pasid, address, size); + + /* + * IOMMU TLB needs to be flushed before Device TLB to + * prevent device TLB refill from IOMMU TLB + */ + for (i = 0; i < amd_iommus_present; ++i) { + if (domain->dev_iommu[i] == 0) + continue; + + ret = iommu_queue_command(amd_iommus[i], &cmd); + if (ret != 0) + goto out; + } + + /* Wait until IOMMU TLB flushes are complete */ + domain_flush_complete(domain); + + /* Now flush device TLBs */ + list_for_each_entry(dev_data, &domain->dev_list, list) { + struct amd_iommu *iommu; + int qdep; + + BUG_ON(!dev_data->ats.enabled); + + qdep = dev_data->ats.qdep; + iommu = amd_iommu_rlookup_table[dev_data->devid]; + + build_inv_iotlb_pasid(&cmd, dev_data->devid, pasid, + qdep, address, size); + + ret = iommu_queue_command(iommu, &cmd); + if (ret != 0) + goto out; + } + + /* Wait until all device TLBs are flushed */ + domain_flush_complete(domain); + + ret = 0; + +out: + + return ret; +} + +static int __amd_iommu_flush_page(struct protection_domain *domain, int pasid, + u64 address) +{ + INC_STATS_COUNTER(invalidate_iotlb); + + return __flush_pasid(domain, pasid, address, false); +} + +int amd_iommu_flush_page(struct iommu_domain *dom, int pasid, + u64 address) +{ + struct protection_domain *domain = to_pdomain(dom); + unsigned long flags; + int ret; + + spin_lock_irqsave(&domain->lock, flags); + ret = __amd_iommu_flush_page(domain, pasid, address); + spin_unlock_irqrestore(&domain->lock, flags); + + return ret; +} +EXPORT_SYMBOL(amd_iommu_flush_page); + +static int __amd_iommu_flush_tlb(struct protection_domain *domain, int pasid) +{ + INC_STATS_COUNTER(invalidate_iotlb_all); + + return __flush_pasid(domain, pasid, CMD_INV_IOMMU_ALL_PAGES_ADDRESS, + true); +} + +int amd_iommu_flush_tlb(struct iommu_domain *dom, int pasid) +{ + struct protection_domain *domain = to_pdomain(dom); + unsigned long flags; + int ret; + + spin_lock_irqsave(&domain->lock, flags); + ret = __amd_iommu_flush_tlb(domain, pasid); + spin_unlock_irqrestore(&domain->lock, flags); + + return ret; +} +EXPORT_SYMBOL(amd_iommu_flush_tlb); + +static u64 *__get_gcr3_pte(u64 *root, int level, int pasid, bool alloc) +{ + int index; + u64 *pte; + + while (true) { + + index = (pasid >> (9 * level)) & 0x1ff; + pte = &root[index]; + + if (level == 0) + break; + + if (!(*pte & GCR3_VALID)) { + if (!alloc) + return NULL; + + root = (void *)get_zeroed_page(GFP_ATOMIC); + if (root == NULL) + return NULL; + + *pte = __pa(root) | GCR3_VALID; + } + + root = __va(*pte & PAGE_MASK); + + level -= 1; + } + + return pte; +} + +static int __set_gcr3(struct protection_domain *domain, int pasid, + unsigned long cr3) +{ + u64 *pte; + + if (domain->mode != PAGE_MODE_NONE) + return -EINVAL; + + pte = __get_gcr3_pte(domain->gcr3_tbl, domain->glx, pasid, true); + if (pte == NULL) + return -ENOMEM; + + *pte = (cr3 & PAGE_MASK) | GCR3_VALID; + + return __amd_iommu_flush_tlb(domain, pasid); +} + +static int __clear_gcr3(struct protection_domain *domain, int pasid) +{ + u64 *pte; + + if (domain->mode != PAGE_MODE_NONE) + return -EINVAL; + + pte = __get_gcr3_pte(domain->gcr3_tbl, domain->glx, pasid, false); + if (pte == NULL) + return 0; + + *pte = 0; + + return __amd_iommu_flush_tlb(domain, pasid); +} + +int amd_iommu_domain_set_gcr3(struct iommu_domain *dom, int pasid, + unsigned long cr3) +{ + struct protection_domain *domain = to_pdomain(dom); + unsigned long flags; + int ret; + + spin_lock_irqsave(&domain->lock, flags); + ret = __set_gcr3(domain, pasid, cr3); + spin_unlock_irqrestore(&domain->lock, flags); + + return ret; +} +EXPORT_SYMBOL(amd_iommu_domain_set_gcr3); + +int amd_iommu_domain_clear_gcr3(struct iommu_domain *dom, int pasid) +{ + struct protection_domain *domain = to_pdomain(dom); + unsigned long flags; + int ret; + + spin_lock_irqsave(&domain->lock, flags); + ret = __clear_gcr3(domain, pasid); + spin_unlock_irqrestore(&domain->lock, flags); + + return ret; +} +EXPORT_SYMBOL(amd_iommu_domain_clear_gcr3); + +int amd_iommu_complete_ppr(struct pci_dev *pdev, int pasid, + int status, int tag) +{ + struct iommu_dev_data *dev_data; + struct amd_iommu *iommu; + struct iommu_cmd cmd; + + INC_STATS_COUNTER(complete_ppr); + + dev_data = get_dev_data(&pdev->dev); + iommu = amd_iommu_rlookup_table[dev_data->devid]; + + build_complete_ppr(&cmd, dev_data->devid, pasid, status, + tag, dev_data->pri_tlp); + + return iommu_queue_command(iommu, &cmd); +} +EXPORT_SYMBOL(amd_iommu_complete_ppr); + +struct iommu_domain *amd_iommu_get_v2_domain(struct pci_dev *pdev) +{ + struct protection_domain *pdomain; + + pdomain = get_domain(&pdev->dev); + if (IS_ERR(pdomain)) + return NULL; + + /* Only return IOMMUv2 domains */ + if (!(pdomain->flags & PD_IOMMUV2_MASK)) + return NULL; + + return &pdomain->domain; +} +EXPORT_SYMBOL(amd_iommu_get_v2_domain); + +void amd_iommu_enable_device_erratum(struct pci_dev *pdev, u32 erratum) +{ + struct iommu_dev_data *dev_data; + + if (!amd_iommu_v2_supported()) + return; + + dev_data = get_dev_data(&pdev->dev); + dev_data->errata |= (1 << erratum); +} +EXPORT_SYMBOL(amd_iommu_enable_device_erratum); + +int amd_iommu_device_info(struct pci_dev *pdev, + struct amd_iommu_device_info *info) +{ + int max_pasids; + int pos; + + if (pdev == NULL || info == NULL) + return -EINVAL; + + if (!amd_iommu_v2_supported()) + return -EINVAL; + + memset(info, 0, sizeof(*info)); + + pos = pci_find_ext_capability(pdev, PCI_EXT_CAP_ID_ATS); + if (pos) + info->flags |= AMD_IOMMU_DEVICE_FLAG_ATS_SUP; + + pos = pci_find_ext_capability(pdev, PCI_EXT_CAP_ID_PRI); + if (pos) + info->flags |= AMD_IOMMU_DEVICE_FLAG_PRI_SUP; + + pos = pci_find_ext_capability(pdev, PCI_EXT_CAP_ID_PASID); + if (pos) { + int features; + + max_pasids = 1 << (9 * (amd_iommu_max_glx_val + 1)); + max_pasids = min(max_pasids, (1 << 20)); + + info->flags |= AMD_IOMMU_DEVICE_FLAG_PASID_SUP; + info->max_pasids = min(pci_max_pasids(pdev), max_pasids); + + features = pci_pasid_features(pdev); + if (features & PCI_PASID_CAP_EXEC) + info->flags |= AMD_IOMMU_DEVICE_FLAG_EXEC_SUP; + if (features & PCI_PASID_CAP_PRIV) + info->flags |= AMD_IOMMU_DEVICE_FLAG_PRIV_SUP; + } + + return 0; +} +EXPORT_SYMBOL(amd_iommu_device_info); + +#ifdef CONFIG_IRQ_REMAP + +/***************************************************************************** + * + * Interrupt Remapping Implementation + * + *****************************************************************************/ + +union irte { + u32 val; + struct { + u32 valid : 1, + no_fault : 1, + int_type : 3, + rq_eoi : 1, + dm : 1, + rsvd_1 : 1, + destination : 8, + vector : 8, + rsvd_2 : 8; + } fields; +}; + +#define DTE_IRQ_PHYS_ADDR_MASK (((1ULL << 45)-1) << 6) +#define DTE_IRQ_REMAP_INTCTL (2ULL << 60) +#define DTE_IRQ_TABLE_LEN (8ULL << 1) +#define DTE_IRQ_REMAP_ENABLE 1ULL + +static void set_dte_irq_entry(u16 devid, struct irq_remap_table *table) +{ + u64 dte; + + dte = amd_iommu_dev_table[devid].data[2]; + dte &= ~DTE_IRQ_PHYS_ADDR_MASK; + dte |= virt_to_phys(table->table); + dte |= DTE_IRQ_REMAP_INTCTL; + dte |= DTE_IRQ_TABLE_LEN; + dte |= DTE_IRQ_REMAP_ENABLE; + + amd_iommu_dev_table[devid].data[2] = dte; +} + +#define IRTE_ALLOCATED (~1U) + +static struct irq_remap_table *get_irq_table(u16 devid, bool ioapic) +{ + struct irq_remap_table *table = NULL; + struct amd_iommu *iommu; + unsigned long flags; + u16 alias; + + write_lock_irqsave(&amd_iommu_devtable_lock, flags); + + iommu = amd_iommu_rlookup_table[devid]; + if (!iommu) + goto out_unlock; + + table = irq_lookup_table[devid]; + if (table) + goto out; + + alias = amd_iommu_alias_table[devid]; + table = irq_lookup_table[alias]; + if (table) { + irq_lookup_table[devid] = table; + set_dte_irq_entry(devid, table); + iommu_flush_dte(iommu, devid); + goto out; + } + + /* Nothing there yet, allocate new irq remapping table */ + table = kzalloc(sizeof(*table), GFP_ATOMIC); + if (!table) + goto out; + + /* Initialize table spin-lock */ + spin_lock_init(&table->lock); + + if (ioapic) + /* Keep the first 32 indexes free for IOAPIC interrupts */ + table->min_index = 32; + + table->table = kmem_cache_alloc(amd_iommu_irq_cache, GFP_ATOMIC); + if (!table->table) { + kfree(table); + table = NULL; + goto out; + } + + memset(table->table, 0, MAX_IRQS_PER_TABLE * sizeof(u32)); + + if (ioapic) { + int i; + + for (i = 0; i < 32; ++i) + table->table[i] = IRTE_ALLOCATED; + } + + irq_lookup_table[devid] = table; + set_dte_irq_entry(devid, table); + iommu_flush_dte(iommu, devid); + if (devid != alias) { + irq_lookup_table[alias] = table; + set_dte_irq_entry(alias, table); + iommu_flush_dte(iommu, alias); + } + +out: + iommu_completion_wait(iommu); + +out_unlock: + write_unlock_irqrestore(&amd_iommu_devtable_lock, flags); + + return table; +} + +static int alloc_irq_index(struct irq_cfg *cfg, u16 devid, int count) +{ + struct irq_remap_table *table; + unsigned long flags; + int index, c; + + table = get_irq_table(devid, false); + if (!table) + return -ENODEV; + + spin_lock_irqsave(&table->lock, flags); + + /* Scan table for free entries */ + for (c = 0, index = table->min_index; + index < MAX_IRQS_PER_TABLE; + ++index) { + if (table->table[index] == 0) + c += 1; + else + c = 0; + + if (c == count) { + struct irq_2_irte *irte_info; + + for (; c != 0; --c) + table->table[index - c + 1] = IRTE_ALLOCATED; + + index -= count - 1; + + cfg->remapped = 1; + irte_info = &cfg->irq_2_irte; + irte_info->devid = devid; + irte_info->index = index; + + goto out; + } + } + + index = -ENOSPC; + +out: + spin_unlock_irqrestore(&table->lock, flags); + + return index; +} + +static int get_irte(u16 devid, int index, union irte *irte) +{ + struct irq_remap_table *table; + unsigned long flags; + + table = get_irq_table(devid, false); + if (!table) + return -ENOMEM; + + spin_lock_irqsave(&table->lock, flags); + irte->val = table->table[index]; + spin_unlock_irqrestore(&table->lock, flags); + + return 0; +} + +static int modify_irte(u16 devid, int index, union irte irte) +{ + struct irq_remap_table *table; + struct amd_iommu *iommu; + unsigned long flags; + + iommu = amd_iommu_rlookup_table[devid]; + if (iommu == NULL) + return -EINVAL; + + table = get_irq_table(devid, false); + if (!table) + return -ENOMEM; + + spin_lock_irqsave(&table->lock, flags); + table->table[index] = irte.val; + spin_unlock_irqrestore(&table->lock, flags); + + iommu_flush_irt(iommu, devid); + iommu_completion_wait(iommu); + + return 0; +} + +static void free_irte(u16 devid, int index) +{ + struct irq_remap_table *table; + struct amd_iommu *iommu; + unsigned long flags; + + iommu = amd_iommu_rlookup_table[devid]; + if (iommu == NULL) + return; + + table = get_irq_table(devid, false); + if (!table) + return; + + spin_lock_irqsave(&table->lock, flags); + table->table[index] = 0; + spin_unlock_irqrestore(&table->lock, flags); + + iommu_flush_irt(iommu, devid); + iommu_completion_wait(iommu); +} + +static int setup_ioapic_entry(int irq, struct IO_APIC_route_entry *entry, + unsigned int destination, int vector, + struct io_apic_irq_attr *attr) +{ + struct irq_remap_table *table; + struct irq_2_irte *irte_info; + struct irq_cfg *cfg; + union irte irte; + int ioapic_id; + int index; + int devid; + int ret; + + cfg = irq_cfg(irq); + if (!cfg) + return -EINVAL; + + irte_info = &cfg->irq_2_irte; + ioapic_id = mpc_ioapic_id(attr->ioapic); + devid = get_ioapic_devid(ioapic_id); + + if (devid < 0) + return devid; + + table = get_irq_table(devid, true); + if (table == NULL) + return -ENOMEM; + + index = attr->ioapic_pin; + + /* Setup IRQ remapping info */ + cfg->remapped = 1; + irte_info->devid = devid; + irte_info->index = index; + + /* Setup IRTE for IOMMU */ + irte.val = 0; + irte.fields.vector = vector; + irte.fields.int_type = apic->irq_delivery_mode; + irte.fields.destination = destination; + irte.fields.dm = apic->irq_dest_mode; + irte.fields.valid = 1; + + ret = modify_irte(devid, index, irte); + if (ret) + return ret; + + /* Setup IOAPIC entry */ + memset(entry, 0, sizeof(*entry)); + + entry->vector = index; + entry->mask = 0; + entry->trigger = attr->trigger; + entry->polarity = attr->polarity; + + /* + * Mask level triggered irqs. + */ + if (attr->trigger) + entry->mask = 1; + + return 0; +} + +static int set_affinity(struct irq_data *data, const struct cpumask *mask, + bool force) +{ + struct irq_2_irte *irte_info; + unsigned int dest, irq; + struct irq_cfg *cfg; + union irte irte; + int err; + + if (!config_enabled(CONFIG_SMP)) + return -1; + + cfg = irqd_cfg(data); + irq = data->irq; + irte_info = &cfg->irq_2_irte; + + if (!cpumask_intersects(mask, cpu_online_mask)) + return -EINVAL; + + if (get_irte(irte_info->devid, irte_info->index, &irte)) + return -EBUSY; + + if (assign_irq_vector(irq, cfg, mask)) + return -EBUSY; + + err = apic->cpu_mask_to_apicid_and(cfg->domain, mask, &dest); + if (err) { + if (assign_irq_vector(irq, cfg, data->affinity)) + pr_err("AMD-Vi: Failed to recover vector for irq %d\n", irq); + return err; + } + + irte.fields.vector = cfg->vector; + irte.fields.destination = dest; + + modify_irte(irte_info->devid, irte_info->index, irte); + + if (cfg->move_in_progress) + send_cleanup_vector(cfg); + + cpumask_copy(data->affinity, mask); + + return 0; +} + +static int free_irq(int irq) +{ + struct irq_2_irte *irte_info; + struct irq_cfg *cfg; + + cfg = irq_cfg(irq); + if (!cfg) + return -EINVAL; + + irte_info = &cfg->irq_2_irte; + + free_irte(irte_info->devid, irte_info->index); + + return 0; +} + +static void compose_msi_msg(struct pci_dev *pdev, + unsigned int irq, unsigned int dest, + struct msi_msg *msg, u8 hpet_id) +{ + struct irq_2_irte *irte_info; + struct irq_cfg *cfg; + union irte irte; + + cfg = irq_cfg(irq); + if (!cfg) + return; + + irte_info = &cfg->irq_2_irte; + + irte.val = 0; + irte.fields.vector = cfg->vector; + irte.fields.int_type = apic->irq_delivery_mode; + irte.fields.destination = dest; + irte.fields.dm = apic->irq_dest_mode; + irte.fields.valid = 1; + + modify_irte(irte_info->devid, irte_info->index, irte); + + msg->address_hi = MSI_ADDR_BASE_HI; + msg->address_lo = MSI_ADDR_BASE_LO; + msg->data = irte_info->index; +} + +static int msi_alloc_irq(struct pci_dev *pdev, int irq, int nvec) +{ + struct irq_cfg *cfg; + int index; + u16 devid; + + if (!pdev) + return -EINVAL; + + cfg = irq_cfg(irq); + if (!cfg) + return -EINVAL; + + devid = get_device_id(&pdev->dev); + index = alloc_irq_index(cfg, devid, nvec); + + return index < 0 ? MAX_IRQS_PER_TABLE : index; +} + +static int msi_setup_irq(struct pci_dev *pdev, unsigned int irq, + int index, int offset) +{ + struct irq_2_irte *irte_info; + struct irq_cfg *cfg; + u16 devid; + + if (!pdev) + return -EINVAL; + + cfg = irq_cfg(irq); + if (!cfg) + return -EINVAL; + + if (index >= MAX_IRQS_PER_TABLE) + return 0; + + devid = get_device_id(&pdev->dev); + irte_info = &cfg->irq_2_irte; + + cfg->remapped = 1; + irte_info->devid = devid; + irte_info->index = index + offset; + + return 0; +} + +static int alloc_hpet_msi(unsigned int irq, unsigned int id) +{ + struct irq_2_irte *irte_info; + struct irq_cfg *cfg; + int index, devid; + + cfg = irq_cfg(irq); + if (!cfg) + return -EINVAL; + + irte_info = &cfg->irq_2_irte; + devid = get_hpet_devid(id); + if (devid < 0) + return devid; + + index = alloc_irq_index(cfg, devid, 1); + if (index < 0) + return index; + + cfg->remapped = 1; + irte_info->devid = devid; + irte_info->index = index; + + return 0; +} + +struct irq_remap_ops amd_iommu_irq_ops = { + .prepare = amd_iommu_prepare, + .enable = amd_iommu_enable, + .disable = amd_iommu_disable, + .reenable = amd_iommu_reenable, + .enable_faulting = amd_iommu_enable_faulting, + .setup_ioapic_entry = setup_ioapic_entry, + .set_affinity = set_affinity, + .free_irq = free_irq, + .compose_msi_msg = compose_msi_msg, + .msi_alloc_irq = msi_alloc_irq, + .msi_setup_irq = msi_setup_irq, + .alloc_hpet_msi = alloc_hpet_msi, +}; +#endif diff --git a/drivers/iommu/amd_iommu_init.c b/drivers/iommu/amd_iommu_init.c new file mode 100644 index 000000000..450ef5001 --- /dev/null +++ b/drivers/iommu/amd_iommu_init.c @@ -0,0 +1,2403 @@ +/* + * Copyright (C) 2007-2010 Advanced Micro Devices, Inc. + * Author: Joerg Roedel <jroedel@suse.de> + * Leo Duran <leo.duran@amd.com> + * + * This program is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 as published + * by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + */ + +#include <linux/pci.h> +#include <linux/acpi.h> +#include <linux/list.h> +#include <linux/slab.h> +#include <linux/syscore_ops.h> +#include <linux/interrupt.h> +#include <linux/msi.h> +#include <linux/amd-iommu.h> +#include <linux/export.h> +#include <linux/iommu.h> +#include <asm/pci-direct.h> +#include <asm/iommu.h> +#include <asm/gart.h> +#include <asm/x86_init.h> +#include <asm/iommu_table.h> +#include <asm/io_apic.h> +#include <asm/irq_remapping.h> + +#include "amd_iommu_proto.h" +#include "amd_iommu_types.h" +#include "irq_remapping.h" + +/* + * definitions for the ACPI scanning code + */ +#define IVRS_HEADER_LENGTH 48 + +#define ACPI_IVHD_TYPE 0x10 +#define ACPI_IVMD_TYPE_ALL 0x20 +#define ACPI_IVMD_TYPE 0x21 +#define ACPI_IVMD_TYPE_RANGE 0x22 + +#define IVHD_DEV_ALL 0x01 +#define IVHD_DEV_SELECT 0x02 +#define IVHD_DEV_SELECT_RANGE_START 0x03 +#define IVHD_DEV_RANGE_END 0x04 +#define IVHD_DEV_ALIAS 0x42 +#define IVHD_DEV_ALIAS_RANGE 0x43 +#define IVHD_DEV_EXT_SELECT 0x46 +#define IVHD_DEV_EXT_SELECT_RANGE 0x47 +#define IVHD_DEV_SPECIAL 0x48 + +#define IVHD_SPECIAL_IOAPIC 1 +#define IVHD_SPECIAL_HPET 2 + +#define IVHD_FLAG_HT_TUN_EN_MASK 0x01 +#define IVHD_FLAG_PASSPW_EN_MASK 0x02 +#define IVHD_FLAG_RESPASSPW_EN_MASK 0x04 +#define IVHD_FLAG_ISOC_EN_MASK 0x08 + +#define IVMD_FLAG_EXCL_RANGE 0x08 +#define IVMD_FLAG_UNITY_MAP 0x01 + +#define ACPI_DEVFLAG_INITPASS 0x01 +#define ACPI_DEVFLAG_EXTINT 0x02 +#define ACPI_DEVFLAG_NMI 0x04 +#define ACPI_DEVFLAG_SYSMGT1 0x10 +#define ACPI_DEVFLAG_SYSMGT2 0x20 +#define ACPI_DEVFLAG_LINT0 0x40 +#define ACPI_DEVFLAG_LINT1 0x80 +#define ACPI_DEVFLAG_ATSDIS 0x10000000 + +/* + * ACPI table definitions + * + * These data structures are laid over the table to parse the important values + * out of it. + */ + +/* + * structure describing one IOMMU in the ACPI table. Typically followed by one + * or more ivhd_entrys. + */ +struct ivhd_header { + u8 type; + u8 flags; + u16 length; + u16 devid; + u16 cap_ptr; + u64 mmio_phys; + u16 pci_seg; + u16 info; + u32 efr; +} __attribute__((packed)); + +/* + * A device entry describing which devices a specific IOMMU translates and + * which requestor ids they use. + */ +struct ivhd_entry { + u8 type; + u16 devid; + u8 flags; + u32 ext; +} __attribute__((packed)); + +/* + * An AMD IOMMU memory definition structure. It defines things like exclusion + * ranges for devices and regions that should be unity mapped. + */ +struct ivmd_header { + u8 type; + u8 flags; + u16 length; + u16 devid; + u16 aux; + u64 resv; + u64 range_start; + u64 range_length; +} __attribute__((packed)); + +bool amd_iommu_dump; +bool amd_iommu_irq_remap __read_mostly; + +static bool amd_iommu_detected; +static bool __initdata amd_iommu_disabled; + +u16 amd_iommu_last_bdf; /* largest PCI device id we have + to handle */ +LIST_HEAD(amd_iommu_unity_map); /* a list of required unity mappings + we find in ACPI */ +u32 amd_iommu_unmap_flush; /* if true, flush on every unmap */ + +LIST_HEAD(amd_iommu_list); /* list of all AMD IOMMUs in the + system */ + +/* Array to assign indices to IOMMUs*/ +struct amd_iommu *amd_iommus[MAX_IOMMUS]; +int amd_iommus_present; + +/* IOMMUs have a non-present cache? */ +bool amd_iommu_np_cache __read_mostly; +bool amd_iommu_iotlb_sup __read_mostly = true; + +u32 amd_iommu_max_pasid __read_mostly = ~0; + +bool amd_iommu_v2_present __read_mostly; +bool amd_iommu_pc_present __read_mostly; + +bool amd_iommu_force_isolation __read_mostly; + +/* + * List of protection domains - used during resume + */ +LIST_HEAD(amd_iommu_pd_list); +spinlock_t amd_iommu_pd_lock; + +/* + * Pointer to the device table which is shared by all AMD IOMMUs + * it is indexed by the PCI device id or the HT unit id and contains + * information about the domain the device belongs to as well as the + * page table root pointer. + */ +struct dev_table_entry *amd_iommu_dev_table; + +/* + * The alias table is a driver specific data structure which contains the + * mappings of the PCI device ids to the actual requestor ids on the IOMMU. + * More than one device can share the same requestor id. + */ +u16 *amd_iommu_alias_table; + +/* + * The rlookup table is used to find the IOMMU which is responsible + * for a specific device. It is also indexed by the PCI device id. + */ +struct amd_iommu **amd_iommu_rlookup_table; + +/* + * This table is used to find the irq remapping table for a given device id + * quickly. + */ +struct irq_remap_table **irq_lookup_table; + +/* + * AMD IOMMU allows up to 2^16 different protection domains. This is a bitmap + * to know which ones are already in use. + */ +unsigned long *amd_iommu_pd_alloc_bitmap; + +static u32 dev_table_size; /* size of the device table */ +static u32 alias_table_size; /* size of the alias table */ +static u32 rlookup_table_size; /* size if the rlookup table */ + +enum iommu_init_state { + IOMMU_START_STATE, + IOMMU_IVRS_DETECTED, + IOMMU_ACPI_FINISHED, + IOMMU_ENABLED, + IOMMU_PCI_INIT, + IOMMU_INTERRUPTS_EN, + IOMMU_DMA_OPS, + IOMMU_INITIALIZED, + IOMMU_NOT_FOUND, + IOMMU_INIT_ERROR, +}; + +/* Early ioapic and hpet maps from kernel command line */ +#define EARLY_MAP_SIZE 4 +static struct devid_map __initdata early_ioapic_map[EARLY_MAP_SIZE]; +static struct devid_map __initdata early_hpet_map[EARLY_MAP_SIZE]; +static int __initdata early_ioapic_map_size; +static int __initdata early_hpet_map_size; +static bool __initdata cmdline_maps; + +static enum iommu_init_state init_state = IOMMU_START_STATE; + +static int amd_iommu_enable_interrupts(void); +static int __init iommu_go_to_state(enum iommu_init_state state); + +static inline void update_last_devid(u16 devid) +{ + if (devid > amd_iommu_last_bdf) + amd_iommu_last_bdf = devid; +} + +static inline unsigned long tbl_size(int entry_size) +{ + unsigned shift = PAGE_SHIFT + + get_order(((int)amd_iommu_last_bdf + 1) * entry_size); + + return 1UL << shift; +} + +/* Access to l1 and l2 indexed register spaces */ + +static u32 iommu_read_l1(struct amd_iommu *iommu, u16 l1, u8 address) +{ + u32 val; + + pci_write_config_dword(iommu->dev, 0xf8, (address | l1 << 16)); + pci_read_config_dword(iommu->dev, 0xfc, &val); + return val; +} + +static void iommu_write_l1(struct amd_iommu *iommu, u16 l1, u8 address, u32 val) +{ + pci_write_config_dword(iommu->dev, 0xf8, (address | l1 << 16 | 1 << 31)); + pci_write_config_dword(iommu->dev, 0xfc, val); + pci_write_config_dword(iommu->dev, 0xf8, (address | l1 << 16)); +} + +static u32 iommu_read_l2(struct amd_iommu *iommu, u8 address) +{ + u32 val; + + pci_write_config_dword(iommu->dev, 0xf0, address); + pci_read_config_dword(iommu->dev, 0xf4, &val); + return val; +} + +static void iommu_write_l2(struct amd_iommu *iommu, u8 address, u32 val) +{ + pci_write_config_dword(iommu->dev, 0xf0, (address | 1 << 8)); + pci_write_config_dword(iommu->dev, 0xf4, val); +} + +/**************************************************************************** + * + * AMD IOMMU MMIO register space handling functions + * + * These functions are used to program the IOMMU device registers in + * MMIO space required for that driver. + * + ****************************************************************************/ + +/* + * This function set the exclusion range in the IOMMU. DMA accesses to the + * exclusion range are passed through untranslated + */ +static void iommu_set_exclusion_range(struct amd_iommu *iommu) +{ + u64 start = iommu->exclusion_start & PAGE_MASK; + u64 limit = (start + iommu->exclusion_length) & PAGE_MASK; + u64 entry; + + if (!iommu->exclusion_start) + return; + + entry = start | MMIO_EXCL_ENABLE_MASK; + memcpy_toio(iommu->mmio_base + MMIO_EXCL_BASE_OFFSET, + &entry, sizeof(entry)); + + entry = limit; + memcpy_toio(iommu->mmio_base + MMIO_EXCL_LIMIT_OFFSET, + &entry, sizeof(entry)); +} + +/* Programs the physical address of the device table into the IOMMU hardware */ +static void iommu_set_device_table(struct amd_iommu *iommu) +{ + u64 entry; + + BUG_ON(iommu->mmio_base == NULL); + + entry = virt_to_phys(amd_iommu_dev_table); + entry |= (dev_table_size >> 12) - 1; + memcpy_toio(iommu->mmio_base + MMIO_DEV_TABLE_OFFSET, + &entry, sizeof(entry)); +} + +/* Generic functions to enable/disable certain features of the IOMMU. */ +static void iommu_feature_enable(struct amd_iommu *iommu, u8 bit) +{ + u32 ctrl; + + ctrl = readl(iommu->mmio_base + MMIO_CONTROL_OFFSET); + ctrl |= (1 << bit); + writel(ctrl, iommu->mmio_base + MMIO_CONTROL_OFFSET); +} + +static void iommu_feature_disable(struct amd_iommu *iommu, u8 bit) +{ + u32 ctrl; + + ctrl = readl(iommu->mmio_base + MMIO_CONTROL_OFFSET); + ctrl &= ~(1 << bit); + writel(ctrl, iommu->mmio_base + MMIO_CONTROL_OFFSET); +} + +static void iommu_set_inv_tlb_timeout(struct amd_iommu *iommu, int timeout) +{ + u32 ctrl; + + ctrl = readl(iommu->mmio_base + MMIO_CONTROL_OFFSET); + ctrl &= ~CTRL_INV_TO_MASK; + ctrl |= (timeout << CONTROL_INV_TIMEOUT) & CTRL_INV_TO_MASK; + writel(ctrl, iommu->mmio_base + MMIO_CONTROL_OFFSET); +} + +/* Function to enable the hardware */ +static void iommu_enable(struct amd_iommu *iommu) +{ + iommu_feature_enable(iommu, CONTROL_IOMMU_EN); +} + +static void iommu_disable(struct amd_iommu *iommu) +{ + /* Disable command buffer */ + iommu_feature_disable(iommu, CONTROL_CMDBUF_EN); + + /* Disable event logging and event interrupts */ + iommu_feature_disable(iommu, CONTROL_EVT_INT_EN); + iommu_feature_disable(iommu, CONTROL_EVT_LOG_EN); + + /* Disable IOMMU hardware itself */ + iommu_feature_disable(iommu, CONTROL_IOMMU_EN); +} + +/* + * mapping and unmapping functions for the IOMMU MMIO space. Each AMD IOMMU in + * the system has one. + */ +static u8 __iomem * __init iommu_map_mmio_space(u64 address, u64 end) +{ + if (!request_mem_region(address, end, "amd_iommu")) { + pr_err("AMD-Vi: Can not reserve memory region %llx-%llx for mmio\n", + address, end); + pr_err("AMD-Vi: This is a BIOS bug. Please contact your hardware vendor\n"); + return NULL; + } + + return (u8 __iomem *)ioremap_nocache(address, end); +} + +static void __init iommu_unmap_mmio_space(struct amd_iommu *iommu) +{ + if (iommu->mmio_base) + iounmap(iommu->mmio_base); + release_mem_region(iommu->mmio_phys, iommu->mmio_phys_end); +} + +/**************************************************************************** + * + * The functions below belong to the first pass of AMD IOMMU ACPI table + * parsing. In this pass we try to find out the highest device id this + * code has to handle. Upon this information the size of the shared data + * structures is determined later. + * + ****************************************************************************/ + +/* + * This function calculates the length of a given IVHD entry + */ +static inline int ivhd_entry_length(u8 *ivhd) +{ + return 0x04 << (*ivhd >> 6); +} + +/* + * This function reads the last device id the IOMMU has to handle from the PCI + * capability header for this IOMMU + */ +static int __init find_last_devid_on_pci(int bus, int dev, int fn, int cap_ptr) +{ + u32 cap; + + cap = read_pci_config(bus, dev, fn, cap_ptr+MMIO_RANGE_OFFSET); + update_last_devid(PCI_DEVID(MMIO_GET_BUS(cap), MMIO_GET_LD(cap))); + + return 0; +} + +/* + * After reading the highest device id from the IOMMU PCI capability header + * this function looks if there is a higher device id defined in the ACPI table + */ +static int __init find_last_devid_from_ivhd(struct ivhd_header *h) +{ + u8 *p = (void *)h, *end = (void *)h; + struct ivhd_entry *dev; + + p += sizeof(*h); + end += h->length; + + find_last_devid_on_pci(PCI_BUS_NUM(h->devid), + PCI_SLOT(h->devid), + PCI_FUNC(h->devid), + h->cap_ptr); + + while (p < end) { + dev = (struct ivhd_entry *)p; + switch (dev->type) { + case IVHD_DEV_SELECT: + case IVHD_DEV_RANGE_END: + case IVHD_DEV_ALIAS: + case IVHD_DEV_EXT_SELECT: + /* all the above subfield types refer to device ids */ + update_last_devid(dev->devid); + break; + default: + break; + } + p += ivhd_entry_length(p); + } + + WARN_ON(p != end); + + return 0; +} + +/* + * Iterate over all IVHD entries in the ACPI table and find the highest device + * id which we need to handle. This is the first of three functions which parse + * the ACPI table. So we check the checksum here. + */ +static int __init find_last_devid_acpi(struct acpi_table_header *table) +{ + int i; + u8 checksum = 0, *p = (u8 *)table, *end = (u8 *)table; + struct ivhd_header *h; + + /* + * Validate checksum here so we don't need to do it when + * we actually parse the table + */ + for (i = 0; i < table->length; ++i) + checksum += p[i]; + if (checksum != 0) + /* ACPI table corrupt */ + return -ENODEV; + + p += IVRS_HEADER_LENGTH; + + end += table->length; + while (p < end) { + h = (struct ivhd_header *)p; + switch (h->type) { + case ACPI_IVHD_TYPE: + find_last_devid_from_ivhd(h); + break; + default: + break; + } + p += h->length; + } + WARN_ON(p != end); + + return 0; +} + +/**************************************************************************** + * + * The following functions belong to the code path which parses the ACPI table + * the second time. In this ACPI parsing iteration we allocate IOMMU specific + * data structures, initialize the device/alias/rlookup table and also + * basically initialize the hardware. + * + ****************************************************************************/ + +/* + * Allocates the command buffer. This buffer is per AMD IOMMU. We can + * write commands to that buffer later and the IOMMU will execute them + * asynchronously + */ +static u8 * __init alloc_command_buffer(struct amd_iommu *iommu) +{ + u8 *cmd_buf = (u8 *)__get_free_pages(GFP_KERNEL | __GFP_ZERO, + get_order(CMD_BUFFER_SIZE)); + + if (cmd_buf == NULL) + return NULL; + + iommu->cmd_buf_size = CMD_BUFFER_SIZE | CMD_BUFFER_UNINITIALIZED; + + return cmd_buf; +} + +/* + * This function resets the command buffer if the IOMMU stopped fetching + * commands from it. + */ +void amd_iommu_reset_cmd_buffer(struct amd_iommu *iommu) +{ + iommu_feature_disable(iommu, CONTROL_CMDBUF_EN); + + writel(0x00, iommu->mmio_base + MMIO_CMD_HEAD_OFFSET); + writel(0x00, iommu->mmio_base + MMIO_CMD_TAIL_OFFSET); + + iommu_feature_enable(iommu, CONTROL_CMDBUF_EN); +} + +/* + * This function writes the command buffer address to the hardware and + * enables it. + */ +static void iommu_enable_command_buffer(struct amd_iommu *iommu) +{ + u64 entry; + + BUG_ON(iommu->cmd_buf == NULL); + + entry = (u64)virt_to_phys(iommu->cmd_buf); + entry |= MMIO_CMD_SIZE_512; + + memcpy_toio(iommu->mmio_base + MMIO_CMD_BUF_OFFSET, + &entry, sizeof(entry)); + + amd_iommu_reset_cmd_buffer(iommu); + iommu->cmd_buf_size &= ~(CMD_BUFFER_UNINITIALIZED); +} + +static void __init free_command_buffer(struct amd_iommu *iommu) +{ + free_pages((unsigned long)iommu->cmd_buf, + get_order(iommu->cmd_buf_size & ~(CMD_BUFFER_UNINITIALIZED))); +} + +/* allocates the memory where the IOMMU will log its events to */ +static u8 * __init alloc_event_buffer(struct amd_iommu *iommu) +{ + iommu->evt_buf = (u8 *)__get_free_pages(GFP_KERNEL | __GFP_ZERO, + get_order(EVT_BUFFER_SIZE)); + + if (iommu->evt_buf == NULL) + return NULL; + + iommu->evt_buf_size = EVT_BUFFER_SIZE; + + return iommu->evt_buf; +} + +static void iommu_enable_event_buffer(struct amd_iommu *iommu) +{ + u64 entry; + + BUG_ON(iommu->evt_buf == NULL); + + entry = (u64)virt_to_phys(iommu->evt_buf) | EVT_LEN_MASK; + + memcpy_toio(iommu->mmio_base + MMIO_EVT_BUF_OFFSET, + &entry, sizeof(entry)); + + /* set head and tail to zero manually */ + writel(0x00, iommu->mmio_base + MMIO_EVT_HEAD_OFFSET); + writel(0x00, iommu->mmio_base + MMIO_EVT_TAIL_OFFSET); + + iommu_feature_enable(iommu, CONTROL_EVT_LOG_EN); +} + +static void __init free_event_buffer(struct amd_iommu *iommu) +{ + free_pages((unsigned long)iommu->evt_buf, get_order(EVT_BUFFER_SIZE)); +} + +/* allocates the memory where the IOMMU will log its events to */ +static u8 * __init alloc_ppr_log(struct amd_iommu *iommu) +{ + iommu->ppr_log = (u8 *)__get_free_pages(GFP_KERNEL | __GFP_ZERO, + get_order(PPR_LOG_SIZE)); + + if (iommu->ppr_log == NULL) + return NULL; + + return iommu->ppr_log; +} + +static void iommu_enable_ppr_log(struct amd_iommu *iommu) +{ + u64 entry; + + if (iommu->ppr_log == NULL) + return; + + entry = (u64)virt_to_phys(iommu->ppr_log) | PPR_LOG_SIZE_512; + + memcpy_toio(iommu->mmio_base + MMIO_PPR_LOG_OFFSET, + &entry, sizeof(entry)); + + /* set head and tail to zero manually */ + writel(0x00, iommu->mmio_base + MMIO_PPR_HEAD_OFFSET); + writel(0x00, iommu->mmio_base + MMIO_PPR_TAIL_OFFSET); + + iommu_feature_enable(iommu, CONTROL_PPFLOG_EN); + iommu_feature_enable(iommu, CONTROL_PPR_EN); +} + +static void __init free_ppr_log(struct amd_iommu *iommu) +{ + if (iommu->ppr_log == NULL) + return; + + free_pages((unsigned long)iommu->ppr_log, get_order(PPR_LOG_SIZE)); +} + +static void iommu_enable_gt(struct amd_iommu *iommu) +{ + if (!iommu_feature(iommu, FEATURE_GT)) + return; + + iommu_feature_enable(iommu, CONTROL_GT_EN); +} + +/* sets a specific bit in the device table entry. */ +static void set_dev_entry_bit(u16 devid, u8 bit) +{ + int i = (bit >> 6) & 0x03; + int _bit = bit & 0x3f; + + amd_iommu_dev_table[devid].data[i] |= (1UL << _bit); +} + +static int get_dev_entry_bit(u16 devid, u8 bit) +{ + int i = (bit >> 6) & 0x03; + int _bit = bit & 0x3f; + + return (amd_iommu_dev_table[devid].data[i] & (1UL << _bit)) >> _bit; +} + + +void amd_iommu_apply_erratum_63(u16 devid) +{ + int sysmgt; + + sysmgt = get_dev_entry_bit(devid, DEV_ENTRY_SYSMGT1) | + (get_dev_entry_bit(devid, DEV_ENTRY_SYSMGT2) << 1); + + if (sysmgt == 0x01) + set_dev_entry_bit(devid, DEV_ENTRY_IW); +} + +/* Writes the specific IOMMU for a device into the rlookup table */ +static void __init set_iommu_for_device(struct amd_iommu *iommu, u16 devid) +{ + amd_iommu_rlookup_table[devid] = iommu; +} + +/* + * This function takes the device specific flags read from the ACPI + * table and sets up the device table entry with that information + */ +static void __init set_dev_entry_from_acpi(struct amd_iommu *iommu, + u16 devid, u32 flags, u32 ext_flags) +{ + if (flags & ACPI_DEVFLAG_INITPASS) + set_dev_entry_bit(devid, DEV_ENTRY_INIT_PASS); + if (flags & ACPI_DEVFLAG_EXTINT) + set_dev_entry_bit(devid, DEV_ENTRY_EINT_PASS); + if (flags & ACPI_DEVFLAG_NMI) + set_dev_entry_bit(devid, DEV_ENTRY_NMI_PASS); + if (flags & ACPI_DEVFLAG_SYSMGT1) + set_dev_entry_bit(devid, DEV_ENTRY_SYSMGT1); + if (flags & ACPI_DEVFLAG_SYSMGT2) + set_dev_entry_bit(devid, DEV_ENTRY_SYSMGT2); + if (flags & ACPI_DEVFLAG_LINT0) + set_dev_entry_bit(devid, DEV_ENTRY_LINT0_PASS); + if (flags & ACPI_DEVFLAG_LINT1) + set_dev_entry_bit(devid, DEV_ENTRY_LINT1_PASS); + + amd_iommu_apply_erratum_63(devid); + + set_iommu_for_device(iommu, devid); +} + +static int __init add_special_device(u8 type, u8 id, u16 *devid, bool cmd_line) +{ + struct devid_map *entry; + struct list_head *list; + + if (type == IVHD_SPECIAL_IOAPIC) + list = &ioapic_map; + else if (type == IVHD_SPECIAL_HPET) + list = &hpet_map; + else + return -EINVAL; + + list_for_each_entry(entry, list, list) { + if (!(entry->id == id && entry->cmd_line)) + continue; + + pr_info("AMD-Vi: Command-line override present for %s id %d - ignoring\n", + type == IVHD_SPECIAL_IOAPIC ? "IOAPIC" : "HPET", id); + + *devid = entry->devid; + + return 0; + } + + entry = kzalloc(sizeof(*entry), GFP_KERNEL); + if (!entry) + return -ENOMEM; + + entry->id = id; + entry->devid = *devid; + entry->cmd_line = cmd_line; + + list_add_tail(&entry->list, list); + + return 0; +} + +static int __init add_early_maps(void) +{ + int i, ret; + + for (i = 0; i < early_ioapic_map_size; ++i) { + ret = add_special_device(IVHD_SPECIAL_IOAPIC, + early_ioapic_map[i].id, + &early_ioapic_map[i].devid, + early_ioapic_map[i].cmd_line); + if (ret) + return ret; + } + + for (i = 0; i < early_hpet_map_size; ++i) { + ret = add_special_device(IVHD_SPECIAL_HPET, + early_hpet_map[i].id, + &early_hpet_map[i].devid, + early_hpet_map[i].cmd_line); + if (ret) + return ret; + } + + return 0; +} + +/* + * Reads the device exclusion range from ACPI and initializes the IOMMU with + * it + */ +static void __init set_device_exclusion_range(u16 devid, struct ivmd_header *m) +{ + struct amd_iommu *iommu = amd_iommu_rlookup_table[devid]; + + if (!(m->flags & IVMD_FLAG_EXCL_RANGE)) + return; + + if (iommu) { + /* + * We only can configure exclusion ranges per IOMMU, not + * per device. But we can enable the exclusion range per + * device. This is done here + */ + set_dev_entry_bit(devid, DEV_ENTRY_EX); + iommu->exclusion_start = m->range_start; + iommu->exclusion_length = m->range_length; + } +} + +/* + * Takes a pointer to an AMD IOMMU entry in the ACPI table and + * initializes the hardware and our data structures with it. + */ +static int __init init_iommu_from_acpi(struct amd_iommu *iommu, + struct ivhd_header *h) +{ + u8 *p = (u8 *)h; + u8 *end = p, flags = 0; + u16 devid = 0, devid_start = 0, devid_to = 0; + u32 dev_i, ext_flags = 0; + bool alias = false; + struct ivhd_entry *e; + int ret; + + + ret = add_early_maps(); + if (ret) + return ret; + + /* + * First save the recommended feature enable bits from ACPI + */ + iommu->acpi_flags = h->flags; + + /* + * Done. Now parse the device entries + */ + p += sizeof(struct ivhd_header); + end += h->length; + + + while (p < end) { + e = (struct ivhd_entry *)p; + switch (e->type) { + case IVHD_DEV_ALL: + + DUMP_printk(" DEV_ALL\t\t\t first devid: %02x:%02x.%x" + " last device %02x:%02x.%x flags: %02x\n", + PCI_BUS_NUM(iommu->first_device), + PCI_SLOT(iommu->first_device), + PCI_FUNC(iommu->first_device), + PCI_BUS_NUM(iommu->last_device), + PCI_SLOT(iommu->last_device), + PCI_FUNC(iommu->last_device), + e->flags); + + for (dev_i = iommu->first_device; + dev_i <= iommu->last_device; ++dev_i) + set_dev_entry_from_acpi(iommu, dev_i, + e->flags, 0); + break; + case IVHD_DEV_SELECT: + + DUMP_printk(" DEV_SELECT\t\t\t devid: %02x:%02x.%x " + "flags: %02x\n", + PCI_BUS_NUM(e->devid), + PCI_SLOT(e->devid), + PCI_FUNC(e->devid), + e->flags); + + devid = e->devid; + set_dev_entry_from_acpi(iommu, devid, e->flags, 0); + break; + case IVHD_DEV_SELECT_RANGE_START: + + DUMP_printk(" DEV_SELECT_RANGE_START\t " + "devid: %02x:%02x.%x flags: %02x\n", + PCI_BUS_NUM(e->devid), + PCI_SLOT(e->devid), + PCI_FUNC(e->devid), + e->flags); + + devid_start = e->devid; + flags = e->flags; + ext_flags = 0; + alias = false; + break; + case IVHD_DEV_ALIAS: + + DUMP_printk(" DEV_ALIAS\t\t\t devid: %02x:%02x.%x " + "flags: %02x devid_to: %02x:%02x.%x\n", + PCI_BUS_NUM(e->devid), + PCI_SLOT(e->devid), + PCI_FUNC(e->devid), + e->flags, + PCI_BUS_NUM(e->ext >> 8), + PCI_SLOT(e->ext >> 8), + PCI_FUNC(e->ext >> 8)); + + devid = e->devid; + devid_to = e->ext >> 8; + set_dev_entry_from_acpi(iommu, devid , e->flags, 0); + set_dev_entry_from_acpi(iommu, devid_to, e->flags, 0); + amd_iommu_alias_table[devid] = devid_to; + break; + case IVHD_DEV_ALIAS_RANGE: + + DUMP_printk(" DEV_ALIAS_RANGE\t\t " + "devid: %02x:%02x.%x flags: %02x " + "devid_to: %02x:%02x.%x\n", + PCI_BUS_NUM(e->devid), + PCI_SLOT(e->devid), + PCI_FUNC(e->devid), + e->flags, + PCI_BUS_NUM(e->ext >> 8), + PCI_SLOT(e->ext >> 8), + PCI_FUNC(e->ext >> 8)); + + devid_start = e->devid; + flags = e->flags; + devid_to = e->ext >> 8; + ext_flags = 0; + alias = true; + break; + case IVHD_DEV_EXT_SELECT: + + DUMP_printk(" DEV_EXT_SELECT\t\t devid: %02x:%02x.%x " + "flags: %02x ext: %08x\n", + PCI_BUS_NUM(e->devid), + PCI_SLOT(e->devid), + PCI_FUNC(e->devid), + e->flags, e->ext); + + devid = e->devid; + set_dev_entry_from_acpi(iommu, devid, e->flags, + e->ext); + break; + case IVHD_DEV_EXT_SELECT_RANGE: + + DUMP_printk(" DEV_EXT_SELECT_RANGE\t devid: " + "%02x:%02x.%x flags: %02x ext: %08x\n", + PCI_BUS_NUM(e->devid), + PCI_SLOT(e->devid), + PCI_FUNC(e->devid), + e->flags, e->ext); + + devid_start = e->devid; + flags = e->flags; + ext_flags = e->ext; + alias = false; + break; + case IVHD_DEV_RANGE_END: + + DUMP_printk(" DEV_RANGE_END\t\t devid: %02x:%02x.%x\n", + PCI_BUS_NUM(e->devid), + PCI_SLOT(e->devid), + PCI_FUNC(e->devid)); + + devid = e->devid; + for (dev_i = devid_start; dev_i <= devid; ++dev_i) { + if (alias) { + amd_iommu_alias_table[dev_i] = devid_to; + set_dev_entry_from_acpi(iommu, + devid_to, flags, ext_flags); + } + set_dev_entry_from_acpi(iommu, dev_i, + flags, ext_flags); + } + break; + case IVHD_DEV_SPECIAL: { + u8 handle, type; + const char *var; + u16 devid; + int ret; + + handle = e->ext & 0xff; + devid = (e->ext >> 8) & 0xffff; + type = (e->ext >> 24) & 0xff; + + if (type == IVHD_SPECIAL_IOAPIC) + var = "IOAPIC"; + else if (type == IVHD_SPECIAL_HPET) + var = "HPET"; + else + var = "UNKNOWN"; + + DUMP_printk(" DEV_SPECIAL(%s[%d])\t\tdevid: %02x:%02x.%x\n", + var, (int)handle, + PCI_BUS_NUM(devid), + PCI_SLOT(devid), + PCI_FUNC(devid)); + + ret = add_special_device(type, handle, &devid, false); + if (ret) + return ret; + + /* + * add_special_device might update the devid in case a + * command-line override is present. So call + * set_dev_entry_from_acpi after add_special_device. + */ + set_dev_entry_from_acpi(iommu, devid, e->flags, 0); + + break; + } + default: + break; + } + + p += ivhd_entry_length(p); + } + + return 0; +} + +/* Initializes the device->iommu mapping for the driver */ +static int __init init_iommu_devices(struct amd_iommu *iommu) +{ + u32 i; + + for (i = iommu->first_device; i <= iommu->last_device; ++i) + set_iommu_for_device(iommu, i); + + return 0; +} + +static void __init free_iommu_one(struct amd_iommu *iommu) +{ + free_command_buffer(iommu); + free_event_buffer(iommu); + free_ppr_log(iommu); + iommu_unmap_mmio_space(iommu); +} + +static void __init free_iommu_all(void) +{ + struct amd_iommu *iommu, *next; + + for_each_iommu_safe(iommu, next) { + list_del(&iommu->list); + free_iommu_one(iommu); + kfree(iommu); + } +} + +/* + * Family15h Model 10h-1fh erratum 746 (IOMMU Logging May Stall Translations) + * Workaround: + * BIOS should disable L2B micellaneous clock gating by setting + * L2_L2B_CK_GATE_CONTROL[CKGateL2BMiscDisable](D0F2xF4_x90[2]) = 1b + */ +static void amd_iommu_erratum_746_workaround(struct amd_iommu *iommu) +{ + u32 value; + + if ((boot_cpu_data.x86 != 0x15) || + (boot_cpu_data.x86_model < 0x10) || + (boot_cpu_data.x86_model > 0x1f)) + return; + + pci_write_config_dword(iommu->dev, 0xf0, 0x90); + pci_read_config_dword(iommu->dev, 0xf4, &value); + + if (value & BIT(2)) + return; + + /* Select NB indirect register 0x90 and enable writing */ + pci_write_config_dword(iommu->dev, 0xf0, 0x90 | (1 << 8)); + + pci_write_config_dword(iommu->dev, 0xf4, value | 0x4); + pr_info("AMD-Vi: Applying erratum 746 workaround for IOMMU at %s\n", + dev_name(&iommu->dev->dev)); + + /* Clear the enable writing bit */ + pci_write_config_dword(iommu->dev, 0xf0, 0x90); +} + +/* + * This function clues the initialization function for one IOMMU + * together and also allocates the command buffer and programs the + * hardware. It does NOT enable the IOMMU. This is done afterwards. + */ +static int __init init_iommu_one(struct amd_iommu *iommu, struct ivhd_header *h) +{ + int ret; + + spin_lock_init(&iommu->lock); + + /* Add IOMMU to internal data structures */ + list_add_tail(&iommu->list, &amd_iommu_list); + iommu->index = amd_iommus_present++; + + if (unlikely(iommu->index >= MAX_IOMMUS)) { + WARN(1, "AMD-Vi: System has more IOMMUs than supported by this driver\n"); + return -ENOSYS; + } + + /* Index is fine - add IOMMU to the array */ + amd_iommus[iommu->index] = iommu; + + /* + * Copy data from ACPI table entry to the iommu struct + */ + iommu->devid = h->devid; + iommu->cap_ptr = h->cap_ptr; + iommu->pci_seg = h->pci_seg; + iommu->mmio_phys = h->mmio_phys; + + /* Check if IVHD EFR contains proper max banks/counters */ + if ((h->efr != 0) && + ((h->efr & (0xF << 13)) != 0) && + ((h->efr & (0x3F << 17)) != 0)) { + iommu->mmio_phys_end = MMIO_REG_END_OFFSET; + } else { + iommu->mmio_phys_end = MMIO_CNTR_CONF_OFFSET; + } + + iommu->mmio_base = iommu_map_mmio_space(iommu->mmio_phys, + iommu->mmio_phys_end); + if (!iommu->mmio_base) + return -ENOMEM; + + iommu->cmd_buf = alloc_command_buffer(iommu); + if (!iommu->cmd_buf) + return -ENOMEM; + + iommu->evt_buf = alloc_event_buffer(iommu); + if (!iommu->evt_buf) + return -ENOMEM; + + iommu->int_enabled = false; + + ret = init_iommu_from_acpi(iommu, h); + if (ret) + return ret; + + /* + * Make sure IOMMU is not considered to translate itself. The IVRS + * table tells us so, but this is a lie! + */ + amd_iommu_rlookup_table[iommu->devid] = NULL; + + init_iommu_devices(iommu); + + return 0; +} + +/* + * Iterates over all IOMMU entries in the ACPI table, allocates the + * IOMMU structure and initializes it with init_iommu_one() + */ +static int __init init_iommu_all(struct acpi_table_header *table) +{ + u8 *p = (u8 *)table, *end = (u8 *)table; + struct ivhd_header *h; + struct amd_iommu *iommu; + int ret; + + end += table->length; + p += IVRS_HEADER_LENGTH; + + while (p < end) { + h = (struct ivhd_header *)p; + switch (*p) { + case ACPI_IVHD_TYPE: + + DUMP_printk("device: %02x:%02x.%01x cap: %04x " + "seg: %d flags: %01x info %04x\n", + PCI_BUS_NUM(h->devid), PCI_SLOT(h->devid), + PCI_FUNC(h->devid), h->cap_ptr, + h->pci_seg, h->flags, h->info); + DUMP_printk(" mmio-addr: %016llx\n", + h->mmio_phys); + + iommu = kzalloc(sizeof(struct amd_iommu), GFP_KERNEL); + if (iommu == NULL) + return -ENOMEM; + + ret = init_iommu_one(iommu, h); + if (ret) + return ret; + break; + default: + break; + } + p += h->length; + + } + WARN_ON(p != end); + + return 0; +} + + +static void init_iommu_perf_ctr(struct amd_iommu *iommu) +{ + u64 val = 0xabcd, val2 = 0; + + if (!iommu_feature(iommu, FEATURE_PC)) + return; + + amd_iommu_pc_present = true; + + /* Check if the performance counters can be written to */ + if ((0 != amd_iommu_pc_get_set_reg_val(0, 0, 0, 0, &val, true)) || + (0 != amd_iommu_pc_get_set_reg_val(0, 0, 0, 0, &val2, false)) || + (val != val2)) { + pr_err("AMD-Vi: Unable to write to IOMMU perf counter.\n"); + amd_iommu_pc_present = false; + return; + } + + pr_info("AMD-Vi: IOMMU performance counters supported\n"); + + val = readl(iommu->mmio_base + MMIO_CNTR_CONF_OFFSET); + iommu->max_banks = (u8) ((val >> 12) & 0x3f); + iommu->max_counters = (u8) ((val >> 7) & 0xf); +} + +static ssize_t amd_iommu_show_cap(struct device *dev, + struct device_attribute *attr, + char *buf) +{ + struct amd_iommu *iommu = dev_get_drvdata(dev); + return sprintf(buf, "%x\n", iommu->cap); +} +static DEVICE_ATTR(cap, S_IRUGO, amd_iommu_show_cap, NULL); + +static ssize_t amd_iommu_show_features(struct device *dev, + struct device_attribute *attr, + char *buf) +{ + struct amd_iommu *iommu = dev_get_drvdata(dev); + return sprintf(buf, "%llx\n", iommu->features); +} +static DEVICE_ATTR(features, S_IRUGO, amd_iommu_show_features, NULL); + +static struct attribute *amd_iommu_attrs[] = { + &dev_attr_cap.attr, + &dev_attr_features.attr, + NULL, +}; + +static struct attribute_group amd_iommu_group = { + .name = "amd-iommu", + .attrs = amd_iommu_attrs, +}; + +static const struct attribute_group *amd_iommu_groups[] = { + &amd_iommu_group, + NULL, +}; + +static int iommu_init_pci(struct amd_iommu *iommu) +{ + int cap_ptr = iommu->cap_ptr; + u32 range, misc, low, high; + + iommu->dev = pci_get_bus_and_slot(PCI_BUS_NUM(iommu->devid), + iommu->devid & 0xff); + if (!iommu->dev) + return -ENODEV; + + pci_read_config_dword(iommu->dev, cap_ptr + MMIO_CAP_HDR_OFFSET, + &iommu->cap); + pci_read_config_dword(iommu->dev, cap_ptr + MMIO_RANGE_OFFSET, + &range); + pci_read_config_dword(iommu->dev, cap_ptr + MMIO_MISC_OFFSET, + &misc); + + iommu->first_device = PCI_DEVID(MMIO_GET_BUS(range), + MMIO_GET_FD(range)); + iommu->last_device = PCI_DEVID(MMIO_GET_BUS(range), + MMIO_GET_LD(range)); + + if (!(iommu->cap & (1 << IOMMU_CAP_IOTLB))) + amd_iommu_iotlb_sup = false; + + /* read extended feature bits */ + low = readl(iommu->mmio_base + MMIO_EXT_FEATURES); + high = readl(iommu->mmio_base + MMIO_EXT_FEATURES + 4); + + iommu->features = ((u64)high << 32) | low; + + if (iommu_feature(iommu, FEATURE_GT)) { + int glxval; + u32 max_pasid; + u64 pasmax; + + pasmax = iommu->features & FEATURE_PASID_MASK; + pasmax >>= FEATURE_PASID_SHIFT; + max_pasid = (1 << (pasmax + 1)) - 1; + + amd_iommu_max_pasid = min(amd_iommu_max_pasid, max_pasid); + + BUG_ON(amd_iommu_max_pasid & ~PASID_MASK); + + glxval = iommu->features & FEATURE_GLXVAL_MASK; + glxval >>= FEATURE_GLXVAL_SHIFT; + + if (amd_iommu_max_glx_val == -1) + amd_iommu_max_glx_val = glxval; + else + amd_iommu_max_glx_val = min(amd_iommu_max_glx_val, glxval); + } + + if (iommu_feature(iommu, FEATURE_GT) && + iommu_feature(iommu, FEATURE_PPR)) { + iommu->is_iommu_v2 = true; + amd_iommu_v2_present = true; + } + + if (iommu_feature(iommu, FEATURE_PPR)) { + iommu->ppr_log = alloc_ppr_log(iommu); + if (!iommu->ppr_log) + return -ENOMEM; + } + + if (iommu->cap & (1UL << IOMMU_CAP_NPCACHE)) + amd_iommu_np_cache = true; + + init_iommu_perf_ctr(iommu); + + if (is_rd890_iommu(iommu->dev)) { + int i, j; + + iommu->root_pdev = pci_get_bus_and_slot(iommu->dev->bus->number, + PCI_DEVFN(0, 0)); + + /* + * Some rd890 systems may not be fully reconfigured by the + * BIOS, so it's necessary for us to store this information so + * it can be reprogrammed on resume + */ + pci_read_config_dword(iommu->dev, iommu->cap_ptr + 4, + &iommu->stored_addr_lo); + pci_read_config_dword(iommu->dev, iommu->cap_ptr + 8, + &iommu->stored_addr_hi); + + /* Low bit locks writes to configuration space */ + iommu->stored_addr_lo &= ~1; + + for (i = 0; i < 6; i++) + for (j = 0; j < 0x12; j++) + iommu->stored_l1[i][j] = iommu_read_l1(iommu, i, j); + + for (i = 0; i < 0x83; i++) + iommu->stored_l2[i] = iommu_read_l2(iommu, i); + } + + amd_iommu_erratum_746_workaround(iommu); + + iommu->iommu_dev = iommu_device_create(&iommu->dev->dev, iommu, + amd_iommu_groups, "ivhd%d", + iommu->index); + + return pci_enable_device(iommu->dev); +} + +static void print_iommu_info(void) +{ + static const char * const feat_str[] = { + "PreF", "PPR", "X2APIC", "NX", "GT", "[5]", + "IA", "GA", "HE", "PC" + }; + struct amd_iommu *iommu; + + for_each_iommu(iommu) { + int i; + + pr_info("AMD-Vi: Found IOMMU at %s cap 0x%hx\n", + dev_name(&iommu->dev->dev), iommu->cap_ptr); + + if (iommu->cap & (1 << IOMMU_CAP_EFR)) { + pr_info("AMD-Vi: Extended features: "); + for (i = 0; i < ARRAY_SIZE(feat_str); ++i) { + if (iommu_feature(iommu, (1ULL << i))) + pr_cont(" %s", feat_str[i]); + } + pr_cont("\n"); + } + } + if (irq_remapping_enabled) + pr_info("AMD-Vi: Interrupt remapping enabled\n"); +} + +static int __init amd_iommu_init_pci(void) +{ + struct amd_iommu *iommu; + int ret = 0; + + for_each_iommu(iommu) { + ret = iommu_init_pci(iommu); + if (ret) + break; + } + + ret = amd_iommu_init_devices(); + + print_iommu_info(); + + return ret; +} + +/**************************************************************************** + * + * The following functions initialize the MSI interrupts for all IOMMUs + * in the system. It's a bit challenging because there could be multiple + * IOMMUs per PCI BDF but we can call pci_enable_msi(x) only once per + * pci_dev. + * + ****************************************************************************/ + +static int iommu_setup_msi(struct amd_iommu *iommu) +{ + int r; + + r = pci_enable_msi(iommu->dev); + if (r) + return r; + + r = request_threaded_irq(iommu->dev->irq, + amd_iommu_int_handler, + amd_iommu_int_thread, + 0, "AMD-Vi", + iommu); + + if (r) { + pci_disable_msi(iommu->dev); + return r; + } + + iommu->int_enabled = true; + + return 0; +} + +static int iommu_init_msi(struct amd_iommu *iommu) +{ + int ret; + + if (iommu->int_enabled) + goto enable_faults; + + if (iommu->dev->msi_cap) + ret = iommu_setup_msi(iommu); + else + ret = -ENODEV; + + if (ret) + return ret; + +enable_faults: + iommu_feature_enable(iommu, CONTROL_EVT_INT_EN); + + if (iommu->ppr_log != NULL) + iommu_feature_enable(iommu, CONTROL_PPFINT_EN); + + return 0; +} + +/**************************************************************************** + * + * The next functions belong to the third pass of parsing the ACPI + * table. In this last pass the memory mapping requirements are + * gathered (like exclusion and unity mapping ranges). + * + ****************************************************************************/ + +static void __init free_unity_maps(void) +{ + struct unity_map_entry *entry, *next; + + list_for_each_entry_safe(entry, next, &amd_iommu_unity_map, list) { + list_del(&entry->list); + kfree(entry); + } +} + +/* called when we find an exclusion range definition in ACPI */ +static int __init init_exclusion_range(struct ivmd_header *m) +{ + int i; + + switch (m->type) { + case ACPI_IVMD_TYPE: + set_device_exclusion_range(m->devid, m); + break; + case ACPI_IVMD_TYPE_ALL: + for (i = 0; i <= amd_iommu_last_bdf; ++i) + set_device_exclusion_range(i, m); + break; + case ACPI_IVMD_TYPE_RANGE: + for (i = m->devid; i <= m->aux; ++i) + set_device_exclusion_range(i, m); + break; + default: + break; + } + + return 0; +} + +/* called for unity map ACPI definition */ +static int __init init_unity_map_range(struct ivmd_header *m) +{ + struct unity_map_entry *e = NULL; + char *s; + + e = kzalloc(sizeof(*e), GFP_KERNEL); + if (e == NULL) + return -ENOMEM; + + switch (m->type) { + default: + kfree(e); + return 0; + case ACPI_IVMD_TYPE: + s = "IVMD_TYPEi\t\t\t"; + e->devid_start = e->devid_end = m->devid; + break; + case ACPI_IVMD_TYPE_ALL: + s = "IVMD_TYPE_ALL\t\t"; + e->devid_start = 0; + e->devid_end = amd_iommu_last_bdf; + break; + case ACPI_IVMD_TYPE_RANGE: + s = "IVMD_TYPE_RANGE\t\t"; + e->devid_start = m->devid; + e->devid_end = m->aux; + break; + } + e->address_start = PAGE_ALIGN(m->range_start); + e->address_end = e->address_start + PAGE_ALIGN(m->range_length); + e->prot = m->flags >> 1; + + DUMP_printk("%s devid_start: %02x:%02x.%x devid_end: %02x:%02x.%x" + " range_start: %016llx range_end: %016llx flags: %x\n", s, + PCI_BUS_NUM(e->devid_start), PCI_SLOT(e->devid_start), + PCI_FUNC(e->devid_start), PCI_BUS_NUM(e->devid_end), + PCI_SLOT(e->devid_end), PCI_FUNC(e->devid_end), + e->address_start, e->address_end, m->flags); + + list_add_tail(&e->list, &amd_iommu_unity_map); + + return 0; +} + +/* iterates over all memory definitions we find in the ACPI table */ +static int __init init_memory_definitions(struct acpi_table_header *table) +{ + u8 *p = (u8 *)table, *end = (u8 *)table; + struct ivmd_header *m; + + end += table->length; + p += IVRS_HEADER_LENGTH; + + while (p < end) { + m = (struct ivmd_header *)p; + if (m->flags & IVMD_FLAG_EXCL_RANGE) + init_exclusion_range(m); + else if (m->flags & IVMD_FLAG_UNITY_MAP) + init_unity_map_range(m); + + p += m->length; + } + + return 0; +} + +/* + * Init the device table to not allow DMA access for devices and + * suppress all page faults + */ +static void init_device_table_dma(void) +{ + u32 devid; + + for (devid = 0; devid <= amd_iommu_last_bdf; ++devid) { + set_dev_entry_bit(devid, DEV_ENTRY_VALID); + set_dev_entry_bit(devid, DEV_ENTRY_TRANSLATION); + } +} + +static void __init uninit_device_table_dma(void) +{ + u32 devid; + + for (devid = 0; devid <= amd_iommu_last_bdf; ++devid) { + amd_iommu_dev_table[devid].data[0] = 0ULL; + amd_iommu_dev_table[devid].data[1] = 0ULL; + } +} + +static void init_device_table(void) +{ + u32 devid; + + if (!amd_iommu_irq_remap) + return; + + for (devid = 0; devid <= amd_iommu_last_bdf; ++devid) + set_dev_entry_bit(devid, DEV_ENTRY_IRQ_TBL_EN); +} + +static void iommu_init_flags(struct amd_iommu *iommu) +{ + iommu->acpi_flags & IVHD_FLAG_HT_TUN_EN_MASK ? + iommu_feature_enable(iommu, CONTROL_HT_TUN_EN) : + iommu_feature_disable(iommu, CONTROL_HT_TUN_EN); + + iommu->acpi_flags & IVHD_FLAG_PASSPW_EN_MASK ? + iommu_feature_enable(iommu, CONTROL_PASSPW_EN) : + iommu_feature_disable(iommu, CONTROL_PASSPW_EN); + + iommu->acpi_flags & IVHD_FLAG_RESPASSPW_EN_MASK ? + iommu_feature_enable(iommu, CONTROL_RESPASSPW_EN) : + iommu_feature_disable(iommu, CONTROL_RESPASSPW_EN); + + iommu->acpi_flags & IVHD_FLAG_ISOC_EN_MASK ? + iommu_feature_enable(iommu, CONTROL_ISOC_EN) : + iommu_feature_disable(iommu, CONTROL_ISOC_EN); + + /* + * make IOMMU memory accesses cache coherent + */ + iommu_feature_enable(iommu, CONTROL_COHERENT_EN); + + /* Set IOTLB invalidation timeout to 1s */ + iommu_set_inv_tlb_timeout(iommu, CTRL_INV_TO_1S); +} + +static void iommu_apply_resume_quirks(struct amd_iommu *iommu) +{ + int i, j; + u32 ioc_feature_control; + struct pci_dev *pdev = iommu->root_pdev; + + /* RD890 BIOSes may not have completely reconfigured the iommu */ + if (!is_rd890_iommu(iommu->dev) || !pdev) + return; + + /* + * First, we need to ensure that the iommu is enabled. This is + * controlled by a register in the northbridge + */ + + /* Select Northbridge indirect register 0x75 and enable writing */ + pci_write_config_dword(pdev, 0x60, 0x75 | (1 << 7)); + pci_read_config_dword(pdev, 0x64, &ioc_feature_control); + + /* Enable the iommu */ + if (!(ioc_feature_control & 0x1)) + pci_write_config_dword(pdev, 0x64, ioc_feature_control | 1); + + /* Restore the iommu BAR */ + pci_write_config_dword(iommu->dev, iommu->cap_ptr + 4, + iommu->stored_addr_lo); + pci_write_config_dword(iommu->dev, iommu->cap_ptr + 8, + iommu->stored_addr_hi); + + /* Restore the l1 indirect regs for each of the 6 l1s */ + for (i = 0; i < 6; i++) + for (j = 0; j < 0x12; j++) + iommu_write_l1(iommu, i, j, iommu->stored_l1[i][j]); + + /* Restore the l2 indirect regs */ + for (i = 0; i < 0x83; i++) + iommu_write_l2(iommu, i, iommu->stored_l2[i]); + + /* Lock PCI setup registers */ + pci_write_config_dword(iommu->dev, iommu->cap_ptr + 4, + iommu->stored_addr_lo | 1); +} + +/* + * This function finally enables all IOMMUs found in the system after + * they have been initialized + */ +static void early_enable_iommus(void) +{ + struct amd_iommu *iommu; + + for_each_iommu(iommu) { + iommu_disable(iommu); + iommu_init_flags(iommu); + iommu_set_device_table(iommu); + iommu_enable_command_buffer(iommu); + iommu_enable_event_buffer(iommu); + iommu_set_exclusion_range(iommu); + iommu_enable(iommu); + iommu_flush_all_caches(iommu); + } +} + +static void enable_iommus_v2(void) +{ + struct amd_iommu *iommu; + + for_each_iommu(iommu) { + iommu_enable_ppr_log(iommu); + iommu_enable_gt(iommu); + } +} + +static void enable_iommus(void) +{ + early_enable_iommus(); + + enable_iommus_v2(); +} + +static void disable_iommus(void) +{ + struct amd_iommu *iommu; + + for_each_iommu(iommu) + iommu_disable(iommu); +} + +/* + * Suspend/Resume support + * disable suspend until real resume implemented + */ + +static void amd_iommu_resume(void) +{ + struct amd_iommu *iommu; + + for_each_iommu(iommu) + iommu_apply_resume_quirks(iommu); + + /* re-load the hardware */ + enable_iommus(); + + amd_iommu_enable_interrupts(); +} + +static int amd_iommu_suspend(void) +{ + /* disable IOMMUs to go out of the way for BIOS */ + disable_iommus(); + + return 0; +} + +static struct syscore_ops amd_iommu_syscore_ops = { + .suspend = amd_iommu_suspend, + .resume = amd_iommu_resume, +}; + +static void __init free_on_init_error(void) +{ + free_pages((unsigned long)irq_lookup_table, + get_order(rlookup_table_size)); + + if (amd_iommu_irq_cache) { + kmem_cache_destroy(amd_iommu_irq_cache); + amd_iommu_irq_cache = NULL; + + } + + free_pages((unsigned long)amd_iommu_rlookup_table, + get_order(rlookup_table_size)); + + free_pages((unsigned long)amd_iommu_alias_table, + get_order(alias_table_size)); + + free_pages((unsigned long)amd_iommu_dev_table, + get_order(dev_table_size)); + + free_iommu_all(); + +#ifdef CONFIG_GART_IOMMU + /* + * We failed to initialize the AMD IOMMU - try fallback to GART + * if possible. + */ + gart_iommu_init(); + +#endif +} + +/* SB IOAPIC is always on this device in AMD systems */ +#define IOAPIC_SB_DEVID ((0x00 << 8) | PCI_DEVFN(0x14, 0)) + +static bool __init check_ioapic_information(void) +{ + const char *fw_bug = FW_BUG; + bool ret, has_sb_ioapic; + int idx; + + has_sb_ioapic = false; + ret = false; + + /* + * If we have map overrides on the kernel command line the + * messages in this function might not describe firmware bugs + * anymore - so be careful + */ + if (cmdline_maps) + fw_bug = ""; + + for (idx = 0; idx < nr_ioapics; idx++) { + int devid, id = mpc_ioapic_id(idx); + + devid = get_ioapic_devid(id); + if (devid < 0) { + pr_err("%sAMD-Vi: IOAPIC[%d] not in IVRS table\n", + fw_bug, id); + ret = false; + } else if (devid == IOAPIC_SB_DEVID) { + has_sb_ioapic = true; + ret = true; + } + } + + if (!has_sb_ioapic) { + /* + * We expect the SB IOAPIC to be listed in the IVRS + * table. The system timer is connected to the SB IOAPIC + * and if we don't have it in the list the system will + * panic at boot time. This situation usually happens + * when the BIOS is buggy and provides us the wrong + * device id for the IOAPIC in the system. + */ + pr_err("%sAMD-Vi: No southbridge IOAPIC found\n", fw_bug); + } + + if (!ret) + pr_err("AMD-Vi: Disabling interrupt remapping\n"); + + return ret; +} + +static void __init free_dma_resources(void) +{ + amd_iommu_uninit_devices(); + + free_pages((unsigned long)amd_iommu_pd_alloc_bitmap, + get_order(MAX_DOMAIN_ID/8)); + + free_unity_maps(); +} + +/* + * This is the hardware init function for AMD IOMMU in the system. + * This function is called either from amd_iommu_init or from the interrupt + * remapping setup code. + * + * This function basically parses the ACPI table for AMD IOMMU (IVRS) + * three times: + * + * 1 pass) Find the highest PCI device id the driver has to handle. + * Upon this information the size of the data structures is + * determined that needs to be allocated. + * + * 2 pass) Initialize the data structures just allocated with the + * information in the ACPI table about available AMD IOMMUs + * in the system. It also maps the PCI devices in the + * system to specific IOMMUs + * + * 3 pass) After the basic data structures are allocated and + * initialized we update them with information about memory + * remapping requirements parsed out of the ACPI table in + * this last pass. + * + * After everything is set up the IOMMUs are enabled and the necessary + * hotplug and suspend notifiers are registered. + */ +static int __init early_amd_iommu_init(void) +{ + struct acpi_table_header *ivrs_base; + acpi_size ivrs_size; + acpi_status status; + int i, ret = 0; + + if (!amd_iommu_detected) + return -ENODEV; + + status = acpi_get_table_with_size("IVRS", 0, &ivrs_base, &ivrs_size); + if (status == AE_NOT_FOUND) + return -ENODEV; + else if (ACPI_FAILURE(status)) { + const char *err = acpi_format_exception(status); + pr_err("AMD-Vi: IVRS table error: %s\n", err); + return -EINVAL; + } + + /* + * First parse ACPI tables to find the largest Bus/Dev/Func + * we need to handle. Upon this information the shared data + * structures for the IOMMUs in the system will be allocated + */ + ret = find_last_devid_acpi(ivrs_base); + if (ret) + goto out; + + dev_table_size = tbl_size(DEV_TABLE_ENTRY_SIZE); + alias_table_size = tbl_size(ALIAS_TABLE_ENTRY_SIZE); + rlookup_table_size = tbl_size(RLOOKUP_TABLE_ENTRY_SIZE); + + /* Device table - directly used by all IOMMUs */ + ret = -ENOMEM; + amd_iommu_dev_table = (void *)__get_free_pages(GFP_KERNEL | __GFP_ZERO, + get_order(dev_table_size)); + if (amd_iommu_dev_table == NULL) + goto out; + + /* + * Alias table - map PCI Bus/Dev/Func to Bus/Dev/Func the + * IOMMU see for that device + */ + amd_iommu_alias_table = (void *)__get_free_pages(GFP_KERNEL, + get_order(alias_table_size)); + if (amd_iommu_alias_table == NULL) + goto out; + + /* IOMMU rlookup table - find the IOMMU for a specific device */ + amd_iommu_rlookup_table = (void *)__get_free_pages( + GFP_KERNEL | __GFP_ZERO, + get_order(rlookup_table_size)); + if (amd_iommu_rlookup_table == NULL) + goto out; + + amd_iommu_pd_alloc_bitmap = (void *)__get_free_pages( + GFP_KERNEL | __GFP_ZERO, + get_order(MAX_DOMAIN_ID/8)); + if (amd_iommu_pd_alloc_bitmap == NULL) + goto out; + + /* + * let all alias entries point to itself + */ + for (i = 0; i <= amd_iommu_last_bdf; ++i) + amd_iommu_alias_table[i] = i; + + /* + * never allocate domain 0 because its used as the non-allocated and + * error value placeholder + */ + amd_iommu_pd_alloc_bitmap[0] = 1; + + spin_lock_init(&amd_iommu_pd_lock); + + /* + * now the data structures are allocated and basically initialized + * start the real acpi table scan + */ + ret = init_iommu_all(ivrs_base); + if (ret) + goto out; + + if (amd_iommu_irq_remap) + amd_iommu_irq_remap = check_ioapic_information(); + + if (amd_iommu_irq_remap) { + /* + * Interrupt remapping enabled, create kmem_cache for the + * remapping tables. + */ + ret = -ENOMEM; + amd_iommu_irq_cache = kmem_cache_create("irq_remap_cache", + MAX_IRQS_PER_TABLE * sizeof(u32), + IRQ_TABLE_ALIGNMENT, + 0, NULL); + if (!amd_iommu_irq_cache) + goto out; + + irq_lookup_table = (void *)__get_free_pages( + GFP_KERNEL | __GFP_ZERO, + get_order(rlookup_table_size)); + if (!irq_lookup_table) + goto out; + } + + ret = init_memory_definitions(ivrs_base); + if (ret) + goto out; + + /* init the device table */ + init_device_table(); + +out: + /* Don't leak any ACPI memory */ + early_acpi_os_unmap_memory((char __iomem *)ivrs_base, ivrs_size); + ivrs_base = NULL; + + return ret; +} + +static int amd_iommu_enable_interrupts(void) +{ + struct amd_iommu *iommu; + int ret = 0; + + for_each_iommu(iommu) { + ret = iommu_init_msi(iommu); + if (ret) + goto out; + } + +out: + return ret; +} + +static bool detect_ivrs(void) +{ + struct acpi_table_header *ivrs_base; + acpi_size ivrs_size; + acpi_status status; + + status = acpi_get_table_with_size("IVRS", 0, &ivrs_base, &ivrs_size); + if (status == AE_NOT_FOUND) + return false; + else if (ACPI_FAILURE(status)) { + const char *err = acpi_format_exception(status); + pr_err("AMD-Vi: IVRS table error: %s\n", err); + return false; + } + + early_acpi_os_unmap_memory((char __iomem *)ivrs_base, ivrs_size); + + /* Make sure ACS will be enabled during PCI probe */ + pci_request_acs(); + + return true; +} + +static int amd_iommu_init_dma(void) +{ + struct amd_iommu *iommu; + int ret; + + if (iommu_pass_through) + ret = amd_iommu_init_passthrough(); + else + ret = amd_iommu_init_dma_ops(); + + if (ret) + return ret; + + init_device_table_dma(); + + for_each_iommu(iommu) + iommu_flush_all_caches(iommu); + + amd_iommu_init_api(); + + amd_iommu_init_notifier(); + + return 0; +} + +/**************************************************************************** + * + * AMD IOMMU Initialization State Machine + * + ****************************************************************************/ + +static int __init state_next(void) +{ + int ret = 0; + + switch (init_state) { + case IOMMU_START_STATE: + if (!detect_ivrs()) { + init_state = IOMMU_NOT_FOUND; + ret = -ENODEV; + } else { + init_state = IOMMU_IVRS_DETECTED; + } + break; + case IOMMU_IVRS_DETECTED: + ret = early_amd_iommu_init(); + init_state = ret ? IOMMU_INIT_ERROR : IOMMU_ACPI_FINISHED; + break; + case IOMMU_ACPI_FINISHED: + early_enable_iommus(); + register_syscore_ops(&amd_iommu_syscore_ops); + x86_platform.iommu_shutdown = disable_iommus; + init_state = IOMMU_ENABLED; + break; + case IOMMU_ENABLED: + ret = amd_iommu_init_pci(); + init_state = ret ? IOMMU_INIT_ERROR : IOMMU_PCI_INIT; + enable_iommus_v2(); + break; + case IOMMU_PCI_INIT: + ret = amd_iommu_enable_interrupts(); + init_state = ret ? IOMMU_INIT_ERROR : IOMMU_INTERRUPTS_EN; + break; + case IOMMU_INTERRUPTS_EN: + ret = amd_iommu_init_dma(); + init_state = ret ? IOMMU_INIT_ERROR : IOMMU_DMA_OPS; + break; + case IOMMU_DMA_OPS: + init_state = IOMMU_INITIALIZED; + break; + case IOMMU_INITIALIZED: + /* Nothing to do */ + break; + case IOMMU_NOT_FOUND: + case IOMMU_INIT_ERROR: + /* Error states => do nothing */ + ret = -EINVAL; + break; + default: + /* Unknown state */ + BUG(); + } + + return ret; +} + +static int __init iommu_go_to_state(enum iommu_init_state state) +{ + int ret = 0; + + while (init_state != state) { + ret = state_next(); + if (init_state == IOMMU_NOT_FOUND || + init_state == IOMMU_INIT_ERROR) + break; + } + + return ret; +} + +#ifdef CONFIG_IRQ_REMAP +int __init amd_iommu_prepare(void) +{ + int ret; + + amd_iommu_irq_remap = true; + + ret = iommu_go_to_state(IOMMU_ACPI_FINISHED); + if (ret) + return ret; + return amd_iommu_irq_remap ? 0 : -ENODEV; +} + +int __init amd_iommu_enable(void) +{ + int ret; + + ret = iommu_go_to_state(IOMMU_ENABLED); + if (ret) + return ret; + + irq_remapping_enabled = 1; + + return 0; +} + +void amd_iommu_disable(void) +{ + amd_iommu_suspend(); +} + +int amd_iommu_reenable(int mode) +{ + amd_iommu_resume(); + + return 0; +} + +int __init amd_iommu_enable_faulting(void) +{ + /* We enable MSI later when PCI is initialized */ + return 0; +} +#endif + +/* + * This is the core init function for AMD IOMMU hardware in the system. + * This function is called from the generic x86 DMA layer initialization + * code. + */ +static int __init amd_iommu_init(void) +{ + int ret; + + ret = iommu_go_to_state(IOMMU_INITIALIZED); + if (ret) { + free_dma_resources(); + if (!irq_remapping_enabled) { + disable_iommus(); + free_on_init_error(); + } else { + struct amd_iommu *iommu; + + uninit_device_table_dma(); + for_each_iommu(iommu) + iommu_flush_all_caches(iommu); + } + } + + return ret; +} + +/**************************************************************************** + * + * Early detect code. This code runs at IOMMU detection time in the DMA + * layer. It just looks if there is an IVRS ACPI table to detect AMD + * IOMMUs + * + ****************************************************************************/ +int __init amd_iommu_detect(void) +{ + int ret; + + if (no_iommu || (iommu_detected && !gart_iommu_aperture)) + return -ENODEV; + + if (amd_iommu_disabled) + return -ENODEV; + + ret = iommu_go_to_state(IOMMU_IVRS_DETECTED); + if (ret) + return ret; + + amd_iommu_detected = true; + iommu_detected = 1; + x86_init.iommu.iommu_init = amd_iommu_init; + + return 0; +} + +/**************************************************************************** + * + * Parsing functions for the AMD IOMMU specific kernel command line + * options. + * + ****************************************************************************/ + +static int __init parse_amd_iommu_dump(char *str) +{ + amd_iommu_dump = true; + + return 1; +} + +static int __init parse_amd_iommu_options(char *str) +{ + for (; *str; ++str) { + if (strncmp(str, "fullflush", 9) == 0) + amd_iommu_unmap_flush = true; + if (strncmp(str, "off", 3) == 0) + amd_iommu_disabled = true; + if (strncmp(str, "force_isolation", 15) == 0) + amd_iommu_force_isolation = true; + } + + return 1; +} + +static int __init parse_ivrs_ioapic(char *str) +{ + unsigned int bus, dev, fn; + int ret, id, i; + u16 devid; + + ret = sscanf(str, "[%d]=%x:%x.%x", &id, &bus, &dev, &fn); + + if (ret != 4) { + pr_err("AMD-Vi: Invalid command line: ivrs_ioapic%s\n", str); + return 1; + } + + if (early_ioapic_map_size == EARLY_MAP_SIZE) { + pr_err("AMD-Vi: Early IOAPIC map overflow - ignoring ivrs_ioapic%s\n", + str); + return 1; + } + + devid = ((bus & 0xff) << 8) | ((dev & 0x1f) << 3) | (fn & 0x7); + + cmdline_maps = true; + i = early_ioapic_map_size++; + early_ioapic_map[i].id = id; + early_ioapic_map[i].devid = devid; + early_ioapic_map[i].cmd_line = true; + + return 1; +} + +static int __init parse_ivrs_hpet(char *str) +{ + unsigned int bus, dev, fn; + int ret, id, i; + u16 devid; + + ret = sscanf(str, "[%d]=%x:%x.%x", &id, &bus, &dev, &fn); + + if (ret != 4) { + pr_err("AMD-Vi: Invalid command line: ivrs_hpet%s\n", str); + return 1; + } + + if (early_hpet_map_size == EARLY_MAP_SIZE) { + pr_err("AMD-Vi: Early HPET map overflow - ignoring ivrs_hpet%s\n", + str); + return 1; + } + + devid = ((bus & 0xff) << 8) | ((dev & 0x1f) << 3) | (fn & 0x7); + + cmdline_maps = true; + i = early_hpet_map_size++; + early_hpet_map[i].id = id; + early_hpet_map[i].devid = devid; + early_hpet_map[i].cmd_line = true; + + return 1; +} + +__setup("amd_iommu_dump", parse_amd_iommu_dump); +__setup("amd_iommu=", parse_amd_iommu_options); +__setup("ivrs_ioapic", parse_ivrs_ioapic); +__setup("ivrs_hpet", parse_ivrs_hpet); + +IOMMU_INIT_FINISH(amd_iommu_detect, + gart_iommu_hole_init, + NULL, + NULL); + +bool amd_iommu_v2_supported(void) +{ + return amd_iommu_v2_present; +} +EXPORT_SYMBOL(amd_iommu_v2_supported); + +/**************************************************************************** + * + * IOMMU EFR Performance Counter support functionality. This code allows + * access to the IOMMU PC functionality. + * + ****************************************************************************/ + +u8 amd_iommu_pc_get_max_banks(u16 devid) +{ + struct amd_iommu *iommu; + u8 ret = 0; + + /* locate the iommu governing the devid */ + iommu = amd_iommu_rlookup_table[devid]; + if (iommu) + ret = iommu->max_banks; + + return ret; +} +EXPORT_SYMBOL(amd_iommu_pc_get_max_banks); + +bool amd_iommu_pc_supported(void) +{ + return amd_iommu_pc_present; +} +EXPORT_SYMBOL(amd_iommu_pc_supported); + +u8 amd_iommu_pc_get_max_counters(u16 devid) +{ + struct amd_iommu *iommu; + u8 ret = 0; + + /* locate the iommu governing the devid */ + iommu = amd_iommu_rlookup_table[devid]; + if (iommu) + ret = iommu->max_counters; + + return ret; +} +EXPORT_SYMBOL(amd_iommu_pc_get_max_counters); + +int amd_iommu_pc_get_set_reg_val(u16 devid, u8 bank, u8 cntr, u8 fxn, + u64 *value, bool is_write) +{ + struct amd_iommu *iommu; + u32 offset; + u32 max_offset_lim; + + /* Make sure the IOMMU PC resource is available */ + if (!amd_iommu_pc_present) + return -ENODEV; + + /* Locate the iommu associated with the device ID */ + iommu = amd_iommu_rlookup_table[devid]; + + /* Check for valid iommu and pc register indexing */ + if (WARN_ON((iommu == NULL) || (fxn > 0x28) || (fxn & 7))) + return -ENODEV; + + offset = (u32)(((0x40|bank) << 12) | (cntr << 8) | fxn); + + /* Limit the offset to the hw defined mmio region aperture */ + max_offset_lim = (u32)(((0x40|iommu->max_banks) << 12) | + (iommu->max_counters << 8) | 0x28); + if ((offset < MMIO_CNTR_REG_OFFSET) || + (offset > max_offset_lim)) + return -EINVAL; + + if (is_write) { + writel((u32)*value, iommu->mmio_base + offset); + writel((*value >> 32), iommu->mmio_base + offset + 4); + } else { + *value = readl(iommu->mmio_base + offset + 4); + *value <<= 32; + *value = readl(iommu->mmio_base + offset); + } + + return 0; +} +EXPORT_SYMBOL(amd_iommu_pc_get_set_reg_val); diff --git a/drivers/iommu/amd_iommu_proto.h b/drivers/iommu/amd_iommu_proto.h new file mode 100644 index 000000000..72b0fd455 --- /dev/null +++ b/drivers/iommu/amd_iommu_proto.h @@ -0,0 +1,92 @@ +/* + * Copyright (C) 2009-2010 Advanced Micro Devices, Inc. + * Author: Joerg Roedel <jroedel@suse.de> + * + * This program is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 as published + * by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + */ + +#ifndef _ASM_X86_AMD_IOMMU_PROTO_H +#define _ASM_X86_AMD_IOMMU_PROTO_H + +#include "amd_iommu_types.h" + +extern int amd_iommu_init_dma_ops(void); +extern int amd_iommu_init_passthrough(void); +extern irqreturn_t amd_iommu_int_thread(int irq, void *data); +extern irqreturn_t amd_iommu_int_handler(int irq, void *data); +extern void amd_iommu_apply_erratum_63(u16 devid); +extern void amd_iommu_reset_cmd_buffer(struct amd_iommu *iommu); +extern int amd_iommu_init_devices(void); +extern void amd_iommu_uninit_devices(void); +extern void amd_iommu_init_notifier(void); +extern void amd_iommu_init_api(void); + +/* Needed for interrupt remapping */ +extern int amd_iommu_prepare(void); +extern int amd_iommu_enable(void); +extern void amd_iommu_disable(void); +extern int amd_iommu_reenable(int); +extern int amd_iommu_enable_faulting(void); + +/* IOMMUv2 specific functions */ +struct iommu_domain; + +extern bool amd_iommu_v2_supported(void); +extern int amd_iommu_register_ppr_notifier(struct notifier_block *nb); +extern int amd_iommu_unregister_ppr_notifier(struct notifier_block *nb); +extern void amd_iommu_domain_direct_map(struct iommu_domain *dom); +extern int amd_iommu_domain_enable_v2(struct iommu_domain *dom, int pasids); +extern int amd_iommu_flush_page(struct iommu_domain *dom, int pasid, + u64 address); +extern int amd_iommu_flush_tlb(struct iommu_domain *dom, int pasid); +extern int amd_iommu_domain_set_gcr3(struct iommu_domain *dom, int pasid, + unsigned long cr3); +extern int amd_iommu_domain_clear_gcr3(struct iommu_domain *dom, int pasid); +extern struct iommu_domain *amd_iommu_get_v2_domain(struct pci_dev *pdev); + +/* IOMMU Performance Counter functions */ +extern bool amd_iommu_pc_supported(void); +extern u8 amd_iommu_pc_get_max_banks(u16 devid); +extern u8 amd_iommu_pc_get_max_counters(u16 devid); +extern int amd_iommu_pc_get_set_reg_val(u16 devid, u8 bank, u8 cntr, u8 fxn, + u64 *value, bool is_write); + +#define PPR_SUCCESS 0x0 +#define PPR_INVALID 0x1 +#define PPR_FAILURE 0xf + +extern int amd_iommu_complete_ppr(struct pci_dev *pdev, int pasid, + int status, int tag); + +#ifndef CONFIG_AMD_IOMMU_STATS + +static inline void amd_iommu_stats_init(void) { } + +#endif /* !CONFIG_AMD_IOMMU_STATS */ + +static inline bool is_rd890_iommu(struct pci_dev *pdev) +{ + return (pdev->vendor == PCI_VENDOR_ID_ATI) && + (pdev->device == PCI_DEVICE_ID_RD890_IOMMU); +} + +static inline bool iommu_feature(struct amd_iommu *iommu, u64 f) +{ + if (!(iommu->cap & (1 << IOMMU_CAP_EFR))) + return false; + + return !!(iommu->features & f); +} + +#endif /* _ASM_X86_AMD_IOMMU_PROTO_H */ diff --git a/drivers/iommu/amd_iommu_types.h b/drivers/iommu/amd_iommu_types.h new file mode 100644 index 000000000..05030e523 --- /dev/null +++ b/drivers/iommu/amd_iommu_types.h @@ -0,0 +1,751 @@ +/* + * Copyright (C) 2007-2010 Advanced Micro Devices, Inc. + * Author: Joerg Roedel <jroedel@suse.de> + * Leo Duran <leo.duran@amd.com> + * + * This program is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 as published + * by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + */ + +#ifndef _ASM_X86_AMD_IOMMU_TYPES_H +#define _ASM_X86_AMD_IOMMU_TYPES_H + +#include <linux/types.h> +#include <linux/mutex.h> +#include <linux/list.h> +#include <linux/spinlock.h> +#include <linux/pci.h> +#include <linux/irqreturn.h> + +/* + * Maximum number of IOMMUs supported + */ +#define MAX_IOMMUS 32 + +/* + * some size calculation constants + */ +#define DEV_TABLE_ENTRY_SIZE 32 +#define ALIAS_TABLE_ENTRY_SIZE 2 +#define RLOOKUP_TABLE_ENTRY_SIZE (sizeof(void *)) + +/* Capability offsets used by the driver */ +#define MMIO_CAP_HDR_OFFSET 0x00 +#define MMIO_RANGE_OFFSET 0x0c +#define MMIO_MISC_OFFSET 0x10 + +/* Masks, shifts and macros to parse the device range capability */ +#define MMIO_RANGE_LD_MASK 0xff000000 +#define MMIO_RANGE_FD_MASK 0x00ff0000 +#define MMIO_RANGE_BUS_MASK 0x0000ff00 +#define MMIO_RANGE_LD_SHIFT 24 +#define MMIO_RANGE_FD_SHIFT 16 +#define MMIO_RANGE_BUS_SHIFT 8 +#define MMIO_GET_LD(x) (((x) & MMIO_RANGE_LD_MASK) >> MMIO_RANGE_LD_SHIFT) +#define MMIO_GET_FD(x) (((x) & MMIO_RANGE_FD_MASK) >> MMIO_RANGE_FD_SHIFT) +#define MMIO_GET_BUS(x) (((x) & MMIO_RANGE_BUS_MASK) >> MMIO_RANGE_BUS_SHIFT) +#define MMIO_MSI_NUM(x) ((x) & 0x1f) + +/* Flag masks for the AMD IOMMU exclusion range */ +#define MMIO_EXCL_ENABLE_MASK 0x01ULL +#define MMIO_EXCL_ALLOW_MASK 0x02ULL + +/* Used offsets into the MMIO space */ +#define MMIO_DEV_TABLE_OFFSET 0x0000 +#define MMIO_CMD_BUF_OFFSET 0x0008 +#define MMIO_EVT_BUF_OFFSET 0x0010 +#define MMIO_CONTROL_OFFSET 0x0018 +#define MMIO_EXCL_BASE_OFFSET 0x0020 +#define MMIO_EXCL_LIMIT_OFFSET 0x0028 +#define MMIO_EXT_FEATURES 0x0030 +#define MMIO_PPR_LOG_OFFSET 0x0038 +#define MMIO_CMD_HEAD_OFFSET 0x2000 +#define MMIO_CMD_TAIL_OFFSET 0x2008 +#define MMIO_EVT_HEAD_OFFSET 0x2010 +#define MMIO_EVT_TAIL_OFFSET 0x2018 +#define MMIO_STATUS_OFFSET 0x2020 +#define MMIO_PPR_HEAD_OFFSET 0x2030 +#define MMIO_PPR_TAIL_OFFSET 0x2038 +#define MMIO_CNTR_CONF_OFFSET 0x4000 +#define MMIO_CNTR_REG_OFFSET 0x40000 +#define MMIO_REG_END_OFFSET 0x80000 + + + +/* Extended Feature Bits */ +#define FEATURE_PREFETCH (1ULL<<0) +#define FEATURE_PPR (1ULL<<1) +#define FEATURE_X2APIC (1ULL<<2) +#define FEATURE_NX (1ULL<<3) +#define FEATURE_GT (1ULL<<4) +#define FEATURE_IA (1ULL<<6) +#define FEATURE_GA (1ULL<<7) +#define FEATURE_HE (1ULL<<8) +#define FEATURE_PC (1ULL<<9) + +#define FEATURE_PASID_SHIFT 32 +#define FEATURE_PASID_MASK (0x1fULL << FEATURE_PASID_SHIFT) + +#define FEATURE_GLXVAL_SHIFT 14 +#define FEATURE_GLXVAL_MASK (0x03ULL << FEATURE_GLXVAL_SHIFT) + +/* Note: + * The current driver only support 16-bit PASID. + * Currently, hardware only implement upto 16-bit PASID + * even though the spec says it could have upto 20 bits. + */ +#define PASID_MASK 0x0000ffff + +/* MMIO status bits */ +#define MMIO_STATUS_EVT_INT_MASK (1 << 1) +#define MMIO_STATUS_COM_WAIT_INT_MASK (1 << 2) +#define MMIO_STATUS_PPR_INT_MASK (1 << 6) + +/* event logging constants */ +#define EVENT_ENTRY_SIZE 0x10 +#define EVENT_TYPE_SHIFT 28 +#define EVENT_TYPE_MASK 0xf +#define EVENT_TYPE_ILL_DEV 0x1 +#define EVENT_TYPE_IO_FAULT 0x2 +#define EVENT_TYPE_DEV_TAB_ERR 0x3 +#define EVENT_TYPE_PAGE_TAB_ERR 0x4 +#define EVENT_TYPE_ILL_CMD 0x5 +#define EVENT_TYPE_CMD_HARD_ERR 0x6 +#define EVENT_TYPE_IOTLB_INV_TO 0x7 +#define EVENT_TYPE_INV_DEV_REQ 0x8 +#define EVENT_DEVID_MASK 0xffff +#define EVENT_DEVID_SHIFT 0 +#define EVENT_DOMID_MASK 0xffff +#define EVENT_DOMID_SHIFT 0 +#define EVENT_FLAGS_MASK 0xfff +#define EVENT_FLAGS_SHIFT 0x10 + +/* feature control bits */ +#define CONTROL_IOMMU_EN 0x00ULL +#define CONTROL_HT_TUN_EN 0x01ULL +#define CONTROL_EVT_LOG_EN 0x02ULL +#define CONTROL_EVT_INT_EN 0x03ULL +#define CONTROL_COMWAIT_EN 0x04ULL +#define CONTROL_INV_TIMEOUT 0x05ULL +#define CONTROL_PASSPW_EN 0x08ULL +#define CONTROL_RESPASSPW_EN 0x09ULL +#define CONTROL_COHERENT_EN 0x0aULL +#define CONTROL_ISOC_EN 0x0bULL +#define CONTROL_CMDBUF_EN 0x0cULL +#define CONTROL_PPFLOG_EN 0x0dULL +#define CONTROL_PPFINT_EN 0x0eULL +#define CONTROL_PPR_EN 0x0fULL +#define CONTROL_GT_EN 0x10ULL + +#define CTRL_INV_TO_MASK (7 << CONTROL_INV_TIMEOUT) +#define CTRL_INV_TO_NONE 0 +#define CTRL_INV_TO_1MS 1 +#define CTRL_INV_TO_10MS 2 +#define CTRL_INV_TO_100MS 3 +#define CTRL_INV_TO_1S 4 +#define CTRL_INV_TO_10S 5 +#define CTRL_INV_TO_100S 6 + +/* command specific defines */ +#define CMD_COMPL_WAIT 0x01 +#define CMD_INV_DEV_ENTRY 0x02 +#define CMD_INV_IOMMU_PAGES 0x03 +#define CMD_INV_IOTLB_PAGES 0x04 +#define CMD_INV_IRT 0x05 +#define CMD_COMPLETE_PPR 0x07 +#define CMD_INV_ALL 0x08 + +#define CMD_COMPL_WAIT_STORE_MASK 0x01 +#define CMD_COMPL_WAIT_INT_MASK 0x02 +#define CMD_INV_IOMMU_PAGES_SIZE_MASK 0x01 +#define CMD_INV_IOMMU_PAGES_PDE_MASK 0x02 +#define CMD_INV_IOMMU_PAGES_GN_MASK 0x04 + +#define PPR_STATUS_MASK 0xf +#define PPR_STATUS_SHIFT 12 + +#define CMD_INV_IOMMU_ALL_PAGES_ADDRESS 0x7fffffffffffffffULL + +/* macros and definitions for device table entries */ +#define DEV_ENTRY_VALID 0x00 +#define DEV_ENTRY_TRANSLATION 0x01 +#define DEV_ENTRY_IR 0x3d +#define DEV_ENTRY_IW 0x3e +#define DEV_ENTRY_NO_PAGE_FAULT 0x62 +#define DEV_ENTRY_EX 0x67 +#define DEV_ENTRY_SYSMGT1 0x68 +#define DEV_ENTRY_SYSMGT2 0x69 +#define DEV_ENTRY_IRQ_TBL_EN 0x80 +#define DEV_ENTRY_INIT_PASS 0xb8 +#define DEV_ENTRY_EINT_PASS 0xb9 +#define DEV_ENTRY_NMI_PASS 0xba +#define DEV_ENTRY_LINT0_PASS 0xbe +#define DEV_ENTRY_LINT1_PASS 0xbf +#define DEV_ENTRY_MODE_MASK 0x07 +#define DEV_ENTRY_MODE_SHIFT 0x09 + +#define MAX_DEV_TABLE_ENTRIES 0xffff + +/* constants to configure the command buffer */ +#define CMD_BUFFER_SIZE 8192 +#define CMD_BUFFER_UNINITIALIZED 1 +#define CMD_BUFFER_ENTRIES 512 +#define MMIO_CMD_SIZE_SHIFT 56 +#define MMIO_CMD_SIZE_512 (0x9ULL << MMIO_CMD_SIZE_SHIFT) + +/* constants for event buffer handling */ +#define EVT_BUFFER_SIZE 8192 /* 512 entries */ +#define EVT_LEN_MASK (0x9ULL << 56) + +/* Constants for PPR Log handling */ +#define PPR_LOG_ENTRIES 512 +#define PPR_LOG_SIZE_SHIFT 56 +#define PPR_LOG_SIZE_512 (0x9ULL << PPR_LOG_SIZE_SHIFT) +#define PPR_ENTRY_SIZE 16 +#define PPR_LOG_SIZE (PPR_ENTRY_SIZE * PPR_LOG_ENTRIES) + +#define PPR_REQ_TYPE(x) (((x) >> 60) & 0xfULL) +#define PPR_FLAGS(x) (((x) >> 48) & 0xfffULL) +#define PPR_DEVID(x) ((x) & 0xffffULL) +#define PPR_TAG(x) (((x) >> 32) & 0x3ffULL) +#define PPR_PASID1(x) (((x) >> 16) & 0xffffULL) +#define PPR_PASID2(x) (((x) >> 42) & 0xfULL) +#define PPR_PASID(x) ((PPR_PASID2(x) << 16) | PPR_PASID1(x)) + +#define PPR_REQ_FAULT 0x01 + +#define PAGE_MODE_NONE 0x00 +#define PAGE_MODE_1_LEVEL 0x01 +#define PAGE_MODE_2_LEVEL 0x02 +#define PAGE_MODE_3_LEVEL 0x03 +#define PAGE_MODE_4_LEVEL 0x04 +#define PAGE_MODE_5_LEVEL 0x05 +#define PAGE_MODE_6_LEVEL 0x06 + +#define PM_LEVEL_SHIFT(x) (12 + ((x) * 9)) +#define PM_LEVEL_SIZE(x) (((x) < 6) ? \ + ((1ULL << PM_LEVEL_SHIFT((x))) - 1): \ + (0xffffffffffffffffULL)) +#define PM_LEVEL_INDEX(x, a) (((a) >> PM_LEVEL_SHIFT((x))) & 0x1ffULL) +#define PM_LEVEL_ENC(x) (((x) << 9) & 0xe00ULL) +#define PM_LEVEL_PDE(x, a) ((a) | PM_LEVEL_ENC((x)) | \ + IOMMU_PTE_P | IOMMU_PTE_IR | IOMMU_PTE_IW) +#define PM_PTE_LEVEL(pte) (((pte) >> 9) & 0x7ULL) + +#define PM_MAP_4k 0 +#define PM_ADDR_MASK 0x000ffffffffff000ULL +#define PM_MAP_MASK(lvl) (PM_ADDR_MASK & \ + (~((1ULL << (12 + ((lvl) * 9))) - 1))) +#define PM_ALIGNED(lvl, addr) ((PM_MAP_MASK(lvl) & (addr)) == (addr)) + +/* + * Returns the page table level to use for a given page size + * Pagesize is expected to be a power-of-two + */ +#define PAGE_SIZE_LEVEL(pagesize) \ + ((__ffs(pagesize) - 12) / 9) +/* + * Returns the number of ptes to use for a given page size + * Pagesize is expected to be a power-of-two + */ +#define PAGE_SIZE_PTE_COUNT(pagesize) \ + (1ULL << ((__ffs(pagesize) - 12) % 9)) + +/* + * Aligns a given io-virtual address to a given page size + * Pagesize is expected to be a power-of-two + */ +#define PAGE_SIZE_ALIGN(address, pagesize) \ + ((address) & ~((pagesize) - 1)) +/* + * Creates an IOMMU PTE for an address and a given pagesize + * The PTE has no permission bits set + * Pagesize is expected to be a power-of-two larger than 4096 + */ +#define PAGE_SIZE_PTE(address, pagesize) \ + (((address) | ((pagesize) - 1)) & \ + (~(pagesize >> 1)) & PM_ADDR_MASK) + +/* + * Takes a PTE value with mode=0x07 and returns the page size it maps + */ +#define PTE_PAGE_SIZE(pte) \ + (1ULL << (1 + ffz(((pte) | 0xfffULL)))) + +/* + * Takes a page-table level and returns the default page-size for this level + */ +#define PTE_LEVEL_PAGE_SIZE(level) \ + (1ULL << (12 + (9 * (level)))) + +#define IOMMU_PTE_P (1ULL << 0) +#define IOMMU_PTE_TV (1ULL << 1) +#define IOMMU_PTE_U (1ULL << 59) +#define IOMMU_PTE_FC (1ULL << 60) +#define IOMMU_PTE_IR (1ULL << 61) +#define IOMMU_PTE_IW (1ULL << 62) + +#define DTE_FLAG_IOTLB (0x01UL << 32) +#define DTE_FLAG_GV (0x01ULL << 55) +#define DTE_GLX_SHIFT (56) +#define DTE_GLX_MASK (3) + +#define DTE_GCR3_VAL_A(x) (((x) >> 12) & 0x00007ULL) +#define DTE_GCR3_VAL_B(x) (((x) >> 15) & 0x0ffffULL) +#define DTE_GCR3_VAL_C(x) (((x) >> 31) & 0xfffffULL) + +#define DTE_GCR3_INDEX_A 0 +#define DTE_GCR3_INDEX_B 1 +#define DTE_GCR3_INDEX_C 1 + +#define DTE_GCR3_SHIFT_A 58 +#define DTE_GCR3_SHIFT_B 16 +#define DTE_GCR3_SHIFT_C 43 + +#define GCR3_VALID 0x01ULL + +#define IOMMU_PAGE_MASK (((1ULL << 52) - 1) & ~0xfffULL) +#define IOMMU_PTE_PRESENT(pte) ((pte) & IOMMU_PTE_P) +#define IOMMU_PTE_PAGE(pte) (phys_to_virt((pte) & IOMMU_PAGE_MASK)) +#define IOMMU_PTE_MODE(pte) (((pte) >> 9) & 0x07) + +#define IOMMU_PROT_MASK 0x03 +#define IOMMU_PROT_IR 0x01 +#define IOMMU_PROT_IW 0x02 + +/* IOMMU capabilities */ +#define IOMMU_CAP_IOTLB 24 +#define IOMMU_CAP_NPCACHE 26 +#define IOMMU_CAP_EFR 27 + +#define MAX_DOMAIN_ID 65536 + +/* Protection domain flags */ +#define PD_DMA_OPS_MASK (1UL << 0) /* domain used for dma_ops */ +#define PD_DEFAULT_MASK (1UL << 1) /* domain is a default dma_ops + domain for an IOMMU */ +#define PD_PASSTHROUGH_MASK (1UL << 2) /* domain has no page + translation */ +#define PD_IOMMUV2_MASK (1UL << 3) /* domain has gcr3 table */ + +extern bool amd_iommu_dump; +#define DUMP_printk(format, arg...) \ + do { \ + if (amd_iommu_dump) \ + printk(KERN_INFO "AMD-Vi: " format, ## arg); \ + } while(0); + +/* global flag if IOMMUs cache non-present entries */ +extern bool amd_iommu_np_cache; +/* Only true if all IOMMUs support device IOTLBs */ +extern bool amd_iommu_iotlb_sup; + +#define MAX_IRQS_PER_TABLE 256 +#define IRQ_TABLE_ALIGNMENT 128 + +struct irq_remap_table { + spinlock_t lock; + unsigned min_index; + u32 *table; +}; + +extern struct irq_remap_table **irq_lookup_table; + +/* Interrupt remapping feature used? */ +extern bool amd_iommu_irq_remap; + +/* kmem_cache to get tables with 128 byte alignement */ +extern struct kmem_cache *amd_iommu_irq_cache; + +/* + * Make iterating over all IOMMUs easier + */ +#define for_each_iommu(iommu) \ + list_for_each_entry((iommu), &amd_iommu_list, list) +#define for_each_iommu_safe(iommu, next) \ + list_for_each_entry_safe((iommu), (next), &amd_iommu_list, list) + +#define APERTURE_RANGE_SHIFT 27 /* 128 MB */ +#define APERTURE_RANGE_SIZE (1ULL << APERTURE_RANGE_SHIFT) +#define APERTURE_RANGE_PAGES (APERTURE_RANGE_SIZE >> PAGE_SHIFT) +#define APERTURE_MAX_RANGES 32 /* allows 4GB of DMA address space */ +#define APERTURE_RANGE_INDEX(a) ((a) >> APERTURE_RANGE_SHIFT) +#define APERTURE_PAGE_INDEX(a) (((a) >> 21) & 0x3fULL) + + +/* + * This struct is used to pass information about + * incoming PPR faults around. + */ +struct amd_iommu_fault { + u64 address; /* IO virtual address of the fault*/ + u32 pasid; /* Address space identifier */ + u16 device_id; /* Originating PCI device id */ + u16 tag; /* PPR tag */ + u16 flags; /* Fault flags */ + +}; + + +struct iommu_domain; + +/* + * This structure contains generic data for IOMMU protection domains + * independent of their use. + */ +struct protection_domain { + struct list_head list; /* for list of all protection domains */ + struct list_head dev_list; /* List of all devices in this domain */ + struct iommu_domain domain; /* generic domain handle used by + iommu core code */ + spinlock_t lock; /* mostly used to lock the page table*/ + struct mutex api_lock; /* protect page tables in the iommu-api path */ + u16 id; /* the domain id written to the device table */ + int mode; /* paging mode (0-6 levels) */ + u64 *pt_root; /* page table root pointer */ + int glx; /* Number of levels for GCR3 table */ + u64 *gcr3_tbl; /* Guest CR3 table */ + unsigned long flags; /* flags to find out type of domain */ + bool updated; /* complete domain flush required */ + unsigned dev_cnt; /* devices assigned to this domain */ + unsigned dev_iommu[MAX_IOMMUS]; /* per-IOMMU reference count */ + void *priv; /* private data */ +}; + +/* + * For dynamic growth the aperture size is split into ranges of 128MB of + * DMA address space each. This struct represents one such range. + */ +struct aperture_range { + + /* address allocation bitmap */ + unsigned long *bitmap; + + /* + * Array of PTE pages for the aperture. In this array we save all the + * leaf pages of the domain page table used for the aperture. This way + * we don't need to walk the page table to find a specific PTE. We can + * just calculate its address in constant time. + */ + u64 *pte_pages[64]; + + unsigned long offset; +}; + +/* + * Data container for a dma_ops specific protection domain + */ +struct dma_ops_domain { + struct list_head list; + + /* generic protection domain information */ + struct protection_domain domain; + + /* size of the aperture for the mappings */ + unsigned long aperture_size; + + /* address we start to search for free addresses */ + unsigned long next_address; + + /* address space relevant data */ + struct aperture_range *aperture[APERTURE_MAX_RANGES]; + + /* This will be set to true when TLB needs to be flushed */ + bool need_flush; + + /* + * if this is a preallocated domain, keep the device for which it was + * preallocated in this variable + */ + u16 target_dev; +}; + +/* + * Structure where we save information about one hardware AMD IOMMU in the + * system. + */ +struct amd_iommu { + struct list_head list; + + /* Index within the IOMMU array */ + int index; + + /* locks the accesses to the hardware */ + spinlock_t lock; + + /* Pointer to PCI device of this IOMMU */ + struct pci_dev *dev; + + /* Cache pdev to root device for resume quirks */ + struct pci_dev *root_pdev; + + /* physical address of MMIO space */ + u64 mmio_phys; + + /* physical end address of MMIO space */ + u64 mmio_phys_end; + + /* virtual address of MMIO space */ + u8 __iomem *mmio_base; + + /* capabilities of that IOMMU read from ACPI */ + u32 cap; + + /* flags read from acpi table */ + u8 acpi_flags; + + /* Extended features */ + u64 features; + + /* IOMMUv2 */ + bool is_iommu_v2; + + /* PCI device id of the IOMMU device */ + u16 devid; + + /* + * Capability pointer. There could be more than one IOMMU per PCI + * device function if there are more than one AMD IOMMU capability + * pointers. + */ + u16 cap_ptr; + + /* pci domain of this IOMMU */ + u16 pci_seg; + + /* first device this IOMMU handles. read from PCI */ + u16 first_device; + /* last device this IOMMU handles. read from PCI */ + u16 last_device; + + /* start of exclusion range of that IOMMU */ + u64 exclusion_start; + /* length of exclusion range of that IOMMU */ + u64 exclusion_length; + + /* command buffer virtual address */ + u8 *cmd_buf; + /* size of command buffer */ + u32 cmd_buf_size; + + /* size of event buffer */ + u32 evt_buf_size; + /* event buffer virtual address */ + u8 *evt_buf; + + /* Base of the PPR log, if present */ + u8 *ppr_log; + + /* true if interrupts for this IOMMU are already enabled */ + bool int_enabled; + + /* if one, we need to send a completion wait command */ + bool need_sync; + + /* default dma_ops domain for that IOMMU */ + struct dma_ops_domain *default_dom; + + /* IOMMU sysfs device */ + struct device *iommu_dev; + + /* + * We can't rely on the BIOS to restore all values on reinit, so we + * need to stash them + */ + + /* The iommu BAR */ + u32 stored_addr_lo; + u32 stored_addr_hi; + + /* + * Each iommu has 6 l1s, each of which is documented as having 0x12 + * registers + */ + u32 stored_l1[6][0x12]; + + /* The l2 indirect registers */ + u32 stored_l2[0x83]; + + /* The maximum PC banks and counters/bank (PCSup=1) */ + u8 max_banks; + u8 max_counters; +}; + +struct devid_map { + struct list_head list; + u8 id; + u16 devid; + bool cmd_line; +}; + +/* Map HPET and IOAPIC ids to the devid used by the IOMMU */ +extern struct list_head ioapic_map; +extern struct list_head hpet_map; + +/* + * List with all IOMMUs in the system. This list is not locked because it is + * only written and read at driver initialization or suspend time + */ +extern struct list_head amd_iommu_list; + +/* + * Array with pointers to each IOMMU struct + * The indices are referenced in the protection domains + */ +extern struct amd_iommu *amd_iommus[MAX_IOMMUS]; + +/* Number of IOMMUs present in the system */ +extern int amd_iommus_present; + +/* + * Declarations for the global list of all protection domains + */ +extern spinlock_t amd_iommu_pd_lock; +extern struct list_head amd_iommu_pd_list; + +/* + * Structure defining one entry in the device table + */ +struct dev_table_entry { + u64 data[4]; +}; + +/* + * One entry for unity mappings parsed out of the ACPI table. + */ +struct unity_map_entry { + struct list_head list; + + /* starting device id this entry is used for (including) */ + u16 devid_start; + /* end device id this entry is used for (including) */ + u16 devid_end; + + /* start address to unity map (including) */ + u64 address_start; + /* end address to unity map (including) */ + u64 address_end; + + /* required protection */ + int prot; +}; + +/* + * List of all unity mappings. It is not locked because as runtime it is only + * read. It is created at ACPI table parsing time. + */ +extern struct list_head amd_iommu_unity_map; + +/* + * Data structures for device handling + */ + +/* + * Device table used by hardware. Read and write accesses by software are + * locked with the amd_iommu_pd_table lock. + */ +extern struct dev_table_entry *amd_iommu_dev_table; + +/* + * Alias table to find requestor ids to device ids. Not locked because only + * read on runtime. + */ +extern u16 *amd_iommu_alias_table; + +/* + * Reverse lookup table to find the IOMMU which translates a specific device. + */ +extern struct amd_iommu **amd_iommu_rlookup_table; + +/* size of the dma_ops aperture as power of 2 */ +extern unsigned amd_iommu_aperture_order; + +/* largest PCI device id we expect translation requests for */ +extern u16 amd_iommu_last_bdf; + +/* allocation bitmap for domain ids */ +extern unsigned long *amd_iommu_pd_alloc_bitmap; + +/* + * If true, the addresses will be flushed on unmap time, not when + * they are reused + */ +extern u32 amd_iommu_unmap_flush; + +/* Smallest max PASID supported by any IOMMU in the system */ +extern u32 amd_iommu_max_pasid; + +extern bool amd_iommu_v2_present; + +extern bool amd_iommu_force_isolation; + +/* Max levels of glxval supported */ +extern int amd_iommu_max_glx_val; + +/* + * This function flushes all internal caches of + * the IOMMU used by this driver. + */ +extern void iommu_flush_all_caches(struct amd_iommu *iommu); + +static inline int get_ioapic_devid(int id) +{ + struct devid_map *entry; + + list_for_each_entry(entry, &ioapic_map, list) { + if (entry->id == id) + return entry->devid; + } + + return -EINVAL; +} + +static inline int get_hpet_devid(int id) +{ + struct devid_map *entry; + + list_for_each_entry(entry, &hpet_map, list) { + if (entry->id == id) + return entry->devid; + } + + return -EINVAL; +} + +#ifdef CONFIG_AMD_IOMMU_STATS + +struct __iommu_counter { + char *name; + struct dentry *dent; + u64 value; +}; + +#define DECLARE_STATS_COUNTER(nm) \ + static struct __iommu_counter nm = { \ + .name = #nm, \ + } + +#define INC_STATS_COUNTER(name) name.value += 1 +#define ADD_STATS_COUNTER(name, x) name.value += (x) +#define SUB_STATS_COUNTER(name, x) name.value -= (x) + +#else /* CONFIG_AMD_IOMMU_STATS */ + +#define DECLARE_STATS_COUNTER(name) +#define INC_STATS_COUNTER(name) +#define ADD_STATS_COUNTER(name, x) +#define SUB_STATS_COUNTER(name, x) + +#endif /* CONFIG_AMD_IOMMU_STATS */ + +#endif /* _ASM_X86_AMD_IOMMU_TYPES_H */ diff --git a/drivers/iommu/amd_iommu_v2.c b/drivers/iommu/amd_iommu_v2.c new file mode 100644 index 000000000..3465faf18 --- /dev/null +++ b/drivers/iommu/amd_iommu_v2.c @@ -0,0 +1,970 @@ +/* + * Copyright (C) 2010-2012 Advanced Micro Devices, Inc. + * Author: Joerg Roedel <jroedel@suse.de> + * + * This program is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 as published + * by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + */ + +#include <linux/mmu_notifier.h> +#include <linux/amd-iommu.h> +#include <linux/mm_types.h> +#include <linux/profile.h> +#include <linux/module.h> +#include <linux/sched.h> +#include <linux/iommu.h> +#include <linux/wait.h> +#include <linux/pci.h> +#include <linux/gfp.h> + +#include "amd_iommu_types.h" +#include "amd_iommu_proto.h" + +MODULE_LICENSE("GPL v2"); +MODULE_AUTHOR("Joerg Roedel <jroedel@suse.de>"); + +#define MAX_DEVICES 0x10000 +#define PRI_QUEUE_SIZE 512 + +struct pri_queue { + atomic_t inflight; + bool finish; + int status; +}; + +struct pasid_state { + struct list_head list; /* For global state-list */ + atomic_t count; /* Reference count */ + unsigned mmu_notifier_count; /* Counting nested mmu_notifier + calls */ + struct mm_struct *mm; /* mm_struct for the faults */ + struct mmu_notifier mn; /* mmu_notifier handle */ + struct pri_queue pri[PRI_QUEUE_SIZE]; /* PRI tag states */ + struct device_state *device_state; /* Link to our device_state */ + int pasid; /* PASID index */ + bool invalid; /* Used during setup and + teardown of the pasid */ + spinlock_t lock; /* Protect pri_queues and + mmu_notifer_count */ + wait_queue_head_t wq; /* To wait for count == 0 */ +}; + +struct device_state { + struct list_head list; + u16 devid; + atomic_t count; + struct pci_dev *pdev; + struct pasid_state **states; + struct iommu_domain *domain; + int pasid_levels; + int max_pasids; + amd_iommu_invalid_ppr_cb inv_ppr_cb; + amd_iommu_invalidate_ctx inv_ctx_cb; + spinlock_t lock; + wait_queue_head_t wq; +}; + +struct fault { + struct work_struct work; + struct device_state *dev_state; + struct pasid_state *state; + struct mm_struct *mm; + u64 address; + u16 devid; + u16 pasid; + u16 tag; + u16 finish; + u16 flags; +}; + +static LIST_HEAD(state_list); +static spinlock_t state_lock; + +static struct workqueue_struct *iommu_wq; + +static void free_pasid_states(struct device_state *dev_state); + +static u16 device_id(struct pci_dev *pdev) +{ + u16 devid; + + devid = pdev->bus->number; + devid = (devid << 8) | pdev->devfn; + + return devid; +} + +static struct device_state *__get_device_state(u16 devid) +{ + struct device_state *dev_state; + + list_for_each_entry(dev_state, &state_list, list) { + if (dev_state->devid == devid) + return dev_state; + } + + return NULL; +} + +static struct device_state *get_device_state(u16 devid) +{ + struct device_state *dev_state; + unsigned long flags; + + spin_lock_irqsave(&state_lock, flags); + dev_state = __get_device_state(devid); + if (dev_state != NULL) + atomic_inc(&dev_state->count); + spin_unlock_irqrestore(&state_lock, flags); + + return dev_state; +} + +static void free_device_state(struct device_state *dev_state) +{ + /* + * First detach device from domain - No more PRI requests will arrive + * from that device after it is unbound from the IOMMUv2 domain. + */ + iommu_detach_device(dev_state->domain, &dev_state->pdev->dev); + + /* Everything is down now, free the IOMMUv2 domain */ + iommu_domain_free(dev_state->domain); + + /* Finally get rid of the device-state */ + kfree(dev_state); +} + +static void put_device_state(struct device_state *dev_state) +{ + if (atomic_dec_and_test(&dev_state->count)) + wake_up(&dev_state->wq); +} + +/* Must be called under dev_state->lock */ +static struct pasid_state **__get_pasid_state_ptr(struct device_state *dev_state, + int pasid, bool alloc) +{ + struct pasid_state **root, **ptr; + int level, index; + + level = dev_state->pasid_levels; + root = dev_state->states; + + while (true) { + + index = (pasid >> (9 * level)) & 0x1ff; + ptr = &root[index]; + + if (level == 0) + break; + + if (*ptr == NULL) { + if (!alloc) + return NULL; + + *ptr = (void *)get_zeroed_page(GFP_ATOMIC); + if (*ptr == NULL) + return NULL; + } + + root = (struct pasid_state **)*ptr; + level -= 1; + } + + return ptr; +} + +static int set_pasid_state(struct device_state *dev_state, + struct pasid_state *pasid_state, + int pasid) +{ + struct pasid_state **ptr; + unsigned long flags; + int ret; + + spin_lock_irqsave(&dev_state->lock, flags); + ptr = __get_pasid_state_ptr(dev_state, pasid, true); + + ret = -ENOMEM; + if (ptr == NULL) + goto out_unlock; + + ret = -ENOMEM; + if (*ptr != NULL) + goto out_unlock; + + *ptr = pasid_state; + + ret = 0; + +out_unlock: + spin_unlock_irqrestore(&dev_state->lock, flags); + + return ret; +} + +static void clear_pasid_state(struct device_state *dev_state, int pasid) +{ + struct pasid_state **ptr; + unsigned long flags; + + spin_lock_irqsave(&dev_state->lock, flags); + ptr = __get_pasid_state_ptr(dev_state, pasid, true); + + if (ptr == NULL) + goto out_unlock; + + *ptr = NULL; + +out_unlock: + spin_unlock_irqrestore(&dev_state->lock, flags); +} + +static struct pasid_state *get_pasid_state(struct device_state *dev_state, + int pasid) +{ + struct pasid_state **ptr, *ret = NULL; + unsigned long flags; + + spin_lock_irqsave(&dev_state->lock, flags); + ptr = __get_pasid_state_ptr(dev_state, pasid, false); + + if (ptr == NULL) + goto out_unlock; + + ret = *ptr; + if (ret) + atomic_inc(&ret->count); + +out_unlock: + spin_unlock_irqrestore(&dev_state->lock, flags); + + return ret; +} + +static void free_pasid_state(struct pasid_state *pasid_state) +{ + kfree(pasid_state); +} + +static void put_pasid_state(struct pasid_state *pasid_state) +{ + if (atomic_dec_and_test(&pasid_state->count)) + wake_up(&pasid_state->wq); +} + +static void put_pasid_state_wait(struct pasid_state *pasid_state) +{ + atomic_dec(&pasid_state->count); + wait_event(pasid_state->wq, !atomic_read(&pasid_state->count)); + free_pasid_state(pasid_state); +} + +static void unbind_pasid(struct pasid_state *pasid_state) +{ + struct iommu_domain *domain; + + domain = pasid_state->device_state->domain; + + /* + * Mark pasid_state as invalid, no more faults will we added to the + * work queue after this is visible everywhere. + */ + pasid_state->invalid = true; + + /* Make sure this is visible */ + smp_wmb(); + + /* After this the device/pasid can't access the mm anymore */ + amd_iommu_domain_clear_gcr3(domain, pasid_state->pasid); + + /* Make sure no more pending faults are in the queue */ + flush_workqueue(iommu_wq); +} + +static void free_pasid_states_level1(struct pasid_state **tbl) +{ + int i; + + for (i = 0; i < 512; ++i) { + if (tbl[i] == NULL) + continue; + + free_page((unsigned long)tbl[i]); + } +} + +static void free_pasid_states_level2(struct pasid_state **tbl) +{ + struct pasid_state **ptr; + int i; + + for (i = 0; i < 512; ++i) { + if (tbl[i] == NULL) + continue; + + ptr = (struct pasid_state **)tbl[i]; + free_pasid_states_level1(ptr); + } +} + +static void free_pasid_states(struct device_state *dev_state) +{ + struct pasid_state *pasid_state; + int i; + + for (i = 0; i < dev_state->max_pasids; ++i) { + pasid_state = get_pasid_state(dev_state, i); + if (pasid_state == NULL) + continue; + + put_pasid_state(pasid_state); + + /* + * This will call the mn_release function and + * unbind the PASID + */ + mmu_notifier_unregister(&pasid_state->mn, pasid_state->mm); + + put_pasid_state_wait(pasid_state); /* Reference taken in + amd_iommu_bind_pasid */ + + /* Drop reference taken in amd_iommu_bind_pasid */ + put_device_state(dev_state); + } + + if (dev_state->pasid_levels == 2) + free_pasid_states_level2(dev_state->states); + else if (dev_state->pasid_levels == 1) + free_pasid_states_level1(dev_state->states); + else if (dev_state->pasid_levels != 0) + BUG(); + + free_page((unsigned long)dev_state->states); +} + +static struct pasid_state *mn_to_state(struct mmu_notifier *mn) +{ + return container_of(mn, struct pasid_state, mn); +} + +static void __mn_flush_page(struct mmu_notifier *mn, + unsigned long address) +{ + struct pasid_state *pasid_state; + struct device_state *dev_state; + + pasid_state = mn_to_state(mn); + dev_state = pasid_state->device_state; + + amd_iommu_flush_page(dev_state->domain, pasid_state->pasid, address); +} + +static int mn_clear_flush_young(struct mmu_notifier *mn, + struct mm_struct *mm, + unsigned long start, + unsigned long end) +{ + for (; start < end; start += PAGE_SIZE) + __mn_flush_page(mn, start); + + return 0; +} + +static void mn_invalidate_page(struct mmu_notifier *mn, + struct mm_struct *mm, + unsigned long address) +{ + __mn_flush_page(mn, address); +} + +static void mn_invalidate_range(struct mmu_notifier *mn, + struct mm_struct *mm, + unsigned long start, unsigned long end) +{ + struct pasid_state *pasid_state; + struct device_state *dev_state; + + pasid_state = mn_to_state(mn); + dev_state = pasid_state->device_state; + + if ((start ^ (end - 1)) < PAGE_SIZE) + amd_iommu_flush_page(dev_state->domain, pasid_state->pasid, + start); + else + amd_iommu_flush_tlb(dev_state->domain, pasid_state->pasid); +} + +static void mn_release(struct mmu_notifier *mn, struct mm_struct *mm) +{ + struct pasid_state *pasid_state; + struct device_state *dev_state; + bool run_inv_ctx_cb; + + might_sleep(); + + pasid_state = mn_to_state(mn); + dev_state = pasid_state->device_state; + run_inv_ctx_cb = !pasid_state->invalid; + + if (run_inv_ctx_cb && dev_state->inv_ctx_cb) + dev_state->inv_ctx_cb(dev_state->pdev, pasid_state->pasid); + + unbind_pasid(pasid_state); +} + +static struct mmu_notifier_ops iommu_mn = { + .release = mn_release, + .clear_flush_young = mn_clear_flush_young, + .invalidate_page = mn_invalidate_page, + .invalidate_range = mn_invalidate_range, +}; + +static void set_pri_tag_status(struct pasid_state *pasid_state, + u16 tag, int status) +{ + unsigned long flags; + + spin_lock_irqsave(&pasid_state->lock, flags); + pasid_state->pri[tag].status = status; + spin_unlock_irqrestore(&pasid_state->lock, flags); +} + +static void finish_pri_tag(struct device_state *dev_state, + struct pasid_state *pasid_state, + u16 tag) +{ + unsigned long flags; + + spin_lock_irqsave(&pasid_state->lock, flags); + if (atomic_dec_and_test(&pasid_state->pri[tag].inflight) && + pasid_state->pri[tag].finish) { + amd_iommu_complete_ppr(dev_state->pdev, pasid_state->pasid, + pasid_state->pri[tag].status, tag); + pasid_state->pri[tag].finish = false; + pasid_state->pri[tag].status = PPR_SUCCESS; + } + spin_unlock_irqrestore(&pasid_state->lock, flags); +} + +static void handle_fault_error(struct fault *fault) +{ + int status; + + if (!fault->dev_state->inv_ppr_cb) { + set_pri_tag_status(fault->state, fault->tag, PPR_INVALID); + return; + } + + status = fault->dev_state->inv_ppr_cb(fault->dev_state->pdev, + fault->pasid, + fault->address, + fault->flags); + switch (status) { + case AMD_IOMMU_INV_PRI_RSP_SUCCESS: + set_pri_tag_status(fault->state, fault->tag, PPR_SUCCESS); + break; + case AMD_IOMMU_INV_PRI_RSP_INVALID: + set_pri_tag_status(fault->state, fault->tag, PPR_INVALID); + break; + case AMD_IOMMU_INV_PRI_RSP_FAIL: + set_pri_tag_status(fault->state, fault->tag, PPR_FAILURE); + break; + default: + BUG(); + } +} + +static void do_fault(struct work_struct *work) +{ + struct fault *fault = container_of(work, struct fault, work); + struct mm_struct *mm; + struct vm_area_struct *vma; + u64 address; + int ret, write; + + write = !!(fault->flags & PPR_FAULT_WRITE); + + mm = fault->state->mm; + address = fault->address; + + down_read(&mm->mmap_sem); + vma = find_extend_vma(mm, address); + if (!vma || address < vma->vm_start) { + /* failed to get a vma in the right range */ + up_read(&mm->mmap_sem); + handle_fault_error(fault); + goto out; + } + + ret = handle_mm_fault(mm, vma, address, write); + if (ret & VM_FAULT_ERROR) { + /* failed to service fault */ + up_read(&mm->mmap_sem); + handle_fault_error(fault); + goto out; + } + + up_read(&mm->mmap_sem); + +out: + finish_pri_tag(fault->dev_state, fault->state, fault->tag); + + put_pasid_state(fault->state); + + kfree(fault); +} + +static int ppr_notifier(struct notifier_block *nb, unsigned long e, void *data) +{ + struct amd_iommu_fault *iommu_fault; + struct pasid_state *pasid_state; + struct device_state *dev_state; + unsigned long flags; + struct fault *fault; + bool finish; + u16 tag; + int ret; + + iommu_fault = data; + tag = iommu_fault->tag & 0x1ff; + finish = (iommu_fault->tag >> 9) & 1; + + ret = NOTIFY_DONE; + dev_state = get_device_state(iommu_fault->device_id); + if (dev_state == NULL) + goto out; + + pasid_state = get_pasid_state(dev_state, iommu_fault->pasid); + if (pasid_state == NULL || pasid_state->invalid) { + /* We know the device but not the PASID -> send INVALID */ + amd_iommu_complete_ppr(dev_state->pdev, iommu_fault->pasid, + PPR_INVALID, tag); + goto out_drop_state; + } + + spin_lock_irqsave(&pasid_state->lock, flags); + atomic_inc(&pasid_state->pri[tag].inflight); + if (finish) + pasid_state->pri[tag].finish = true; + spin_unlock_irqrestore(&pasid_state->lock, flags); + + fault = kzalloc(sizeof(*fault), GFP_ATOMIC); + if (fault == NULL) { + /* We are OOM - send success and let the device re-fault */ + finish_pri_tag(dev_state, pasid_state, tag); + goto out_drop_state; + } + + fault->dev_state = dev_state; + fault->address = iommu_fault->address; + fault->state = pasid_state; + fault->tag = tag; + fault->finish = finish; + fault->pasid = iommu_fault->pasid; + fault->flags = iommu_fault->flags; + INIT_WORK(&fault->work, do_fault); + + queue_work(iommu_wq, &fault->work); + + ret = NOTIFY_OK; + +out_drop_state: + + if (ret != NOTIFY_OK && pasid_state) + put_pasid_state(pasid_state); + + put_device_state(dev_state); + +out: + return ret; +} + +static struct notifier_block ppr_nb = { + .notifier_call = ppr_notifier, +}; + +int amd_iommu_bind_pasid(struct pci_dev *pdev, int pasid, + struct task_struct *task) +{ + struct pasid_state *pasid_state; + struct device_state *dev_state; + struct mm_struct *mm; + u16 devid; + int ret; + + might_sleep(); + + if (!amd_iommu_v2_supported()) + return -ENODEV; + + devid = device_id(pdev); + dev_state = get_device_state(devid); + + if (dev_state == NULL) + return -EINVAL; + + ret = -EINVAL; + if (pasid < 0 || pasid >= dev_state->max_pasids) + goto out; + + ret = -ENOMEM; + pasid_state = kzalloc(sizeof(*pasid_state), GFP_KERNEL); + if (pasid_state == NULL) + goto out; + + + atomic_set(&pasid_state->count, 1); + init_waitqueue_head(&pasid_state->wq); + spin_lock_init(&pasid_state->lock); + + mm = get_task_mm(task); + pasid_state->mm = mm; + pasid_state->device_state = dev_state; + pasid_state->pasid = pasid; + pasid_state->invalid = true; /* Mark as valid only if we are + done with setting up the pasid */ + pasid_state->mn.ops = &iommu_mn; + + if (pasid_state->mm == NULL) + goto out_free; + + mmu_notifier_register(&pasid_state->mn, mm); + + ret = set_pasid_state(dev_state, pasid_state, pasid); + if (ret) + goto out_unregister; + + ret = amd_iommu_domain_set_gcr3(dev_state->domain, pasid, + __pa(pasid_state->mm->pgd)); + if (ret) + goto out_clear_state; + + /* Now we are ready to handle faults */ + pasid_state->invalid = false; + + /* + * Drop the reference to the mm_struct here. We rely on the + * mmu_notifier release call-back to inform us when the mm + * is going away. + */ + mmput(mm); + + return 0; + +out_clear_state: + clear_pasid_state(dev_state, pasid); + +out_unregister: + mmu_notifier_unregister(&pasid_state->mn, mm); + +out_free: + mmput(mm); + free_pasid_state(pasid_state); + +out: + put_device_state(dev_state); + + return ret; +} +EXPORT_SYMBOL(amd_iommu_bind_pasid); + +void amd_iommu_unbind_pasid(struct pci_dev *pdev, int pasid) +{ + struct pasid_state *pasid_state; + struct device_state *dev_state; + u16 devid; + + might_sleep(); + + if (!amd_iommu_v2_supported()) + return; + + devid = device_id(pdev); + dev_state = get_device_state(devid); + if (dev_state == NULL) + return; + + if (pasid < 0 || pasid >= dev_state->max_pasids) + goto out; + + pasid_state = get_pasid_state(dev_state, pasid); + if (pasid_state == NULL) + goto out; + /* + * Drop reference taken here. We are safe because we still hold + * the reference taken in the amd_iommu_bind_pasid function. + */ + put_pasid_state(pasid_state); + + /* Clear the pasid state so that the pasid can be re-used */ + clear_pasid_state(dev_state, pasid_state->pasid); + + /* + * Call mmu_notifier_unregister to drop our reference + * to pasid_state->mm + */ + mmu_notifier_unregister(&pasid_state->mn, pasid_state->mm); + + put_pasid_state_wait(pasid_state); /* Reference taken in + amd_iommu_bind_pasid */ +out: + /* Drop reference taken in this function */ + put_device_state(dev_state); + + /* Drop reference taken in amd_iommu_bind_pasid */ + put_device_state(dev_state); +} +EXPORT_SYMBOL(amd_iommu_unbind_pasid); + +int amd_iommu_init_device(struct pci_dev *pdev, int pasids) +{ + struct device_state *dev_state; + unsigned long flags; + int ret, tmp; + u16 devid; + + might_sleep(); + + if (!amd_iommu_v2_supported()) + return -ENODEV; + + if (pasids <= 0 || pasids > (PASID_MASK + 1)) + return -EINVAL; + + devid = device_id(pdev); + + dev_state = kzalloc(sizeof(*dev_state), GFP_KERNEL); + if (dev_state == NULL) + return -ENOMEM; + + spin_lock_init(&dev_state->lock); + init_waitqueue_head(&dev_state->wq); + dev_state->pdev = pdev; + dev_state->devid = devid; + + tmp = pasids; + for (dev_state->pasid_levels = 0; (tmp - 1) & ~0x1ff; tmp >>= 9) + dev_state->pasid_levels += 1; + + atomic_set(&dev_state->count, 1); + dev_state->max_pasids = pasids; + + ret = -ENOMEM; + dev_state->states = (void *)get_zeroed_page(GFP_KERNEL); + if (dev_state->states == NULL) + goto out_free_dev_state; + + dev_state->domain = iommu_domain_alloc(&pci_bus_type); + if (dev_state->domain == NULL) + goto out_free_states; + + amd_iommu_domain_direct_map(dev_state->domain); + + ret = amd_iommu_domain_enable_v2(dev_state->domain, pasids); + if (ret) + goto out_free_domain; + + ret = iommu_attach_device(dev_state->domain, &pdev->dev); + if (ret != 0) + goto out_free_domain; + + spin_lock_irqsave(&state_lock, flags); + + if (__get_device_state(devid) != NULL) { + spin_unlock_irqrestore(&state_lock, flags); + ret = -EBUSY; + goto out_free_domain; + } + + list_add_tail(&dev_state->list, &state_list); + + spin_unlock_irqrestore(&state_lock, flags); + + return 0; + +out_free_domain: + iommu_domain_free(dev_state->domain); + +out_free_states: + free_page((unsigned long)dev_state->states); + +out_free_dev_state: + kfree(dev_state); + + return ret; +} +EXPORT_SYMBOL(amd_iommu_init_device); + +void amd_iommu_free_device(struct pci_dev *pdev) +{ + struct device_state *dev_state; + unsigned long flags; + u16 devid; + + if (!amd_iommu_v2_supported()) + return; + + devid = device_id(pdev); + + spin_lock_irqsave(&state_lock, flags); + + dev_state = __get_device_state(devid); + if (dev_state == NULL) { + spin_unlock_irqrestore(&state_lock, flags); + return; + } + + list_del(&dev_state->list); + + spin_unlock_irqrestore(&state_lock, flags); + + /* Get rid of any remaining pasid states */ + free_pasid_states(dev_state); + + put_device_state(dev_state); + /* + * Wait until the last reference is dropped before freeing + * the device state. + */ + wait_event(dev_state->wq, !atomic_read(&dev_state->count)); + free_device_state(dev_state); +} +EXPORT_SYMBOL(amd_iommu_free_device); + +int amd_iommu_set_invalid_ppr_cb(struct pci_dev *pdev, + amd_iommu_invalid_ppr_cb cb) +{ + struct device_state *dev_state; + unsigned long flags; + u16 devid; + int ret; + + if (!amd_iommu_v2_supported()) + return -ENODEV; + + devid = device_id(pdev); + + spin_lock_irqsave(&state_lock, flags); + + ret = -EINVAL; + dev_state = __get_device_state(devid); + if (dev_state == NULL) + goto out_unlock; + + dev_state->inv_ppr_cb = cb; + + ret = 0; + +out_unlock: + spin_unlock_irqrestore(&state_lock, flags); + + return ret; +} +EXPORT_SYMBOL(amd_iommu_set_invalid_ppr_cb); + +int amd_iommu_set_invalidate_ctx_cb(struct pci_dev *pdev, + amd_iommu_invalidate_ctx cb) +{ + struct device_state *dev_state; + unsigned long flags; + u16 devid; + int ret; + + if (!amd_iommu_v2_supported()) + return -ENODEV; + + devid = device_id(pdev); + + spin_lock_irqsave(&state_lock, flags); + + ret = -EINVAL; + dev_state = __get_device_state(devid); + if (dev_state == NULL) + goto out_unlock; + + dev_state->inv_ctx_cb = cb; + + ret = 0; + +out_unlock: + spin_unlock_irqrestore(&state_lock, flags); + + return ret; +} +EXPORT_SYMBOL(amd_iommu_set_invalidate_ctx_cb); + +static int __init amd_iommu_v2_init(void) +{ + int ret; + + pr_info("AMD IOMMUv2 driver by Joerg Roedel <jroedel@suse.de>\n"); + + if (!amd_iommu_v2_supported()) { + pr_info("AMD IOMMUv2 functionality not available on this system\n"); + /* + * Load anyway to provide the symbols to other modules + * which may use AMD IOMMUv2 optionally. + */ + return 0; + } + + spin_lock_init(&state_lock); + + ret = -ENOMEM; + iommu_wq = create_workqueue("amd_iommu_v2"); + if (iommu_wq == NULL) + goto out; + + amd_iommu_register_ppr_notifier(&ppr_nb); + + return 0; + +out: + return ret; +} + +static void __exit amd_iommu_v2_exit(void) +{ + struct device_state *dev_state; + int i; + + if (!amd_iommu_v2_supported()) + return; + + amd_iommu_unregister_ppr_notifier(&ppr_nb); + + flush_workqueue(iommu_wq); + + /* + * The loop below might call flush_workqueue(), so call + * destroy_workqueue() after it + */ + for (i = 0; i < MAX_DEVICES; ++i) { + dev_state = get_device_state(i); + + if (dev_state == NULL) + continue; + + WARN_ON_ONCE(1); + + put_device_state(dev_state); + amd_iommu_free_device(dev_state->pdev); + } + + destroy_workqueue(iommu_wq); +} + +module_init(amd_iommu_v2_init); +module_exit(amd_iommu_v2_exit); diff --git a/drivers/iommu/arm-smmu.c b/drivers/iommu/arm-smmu.c new file mode 100644 index 000000000..65075ef75 --- /dev/null +++ b/drivers/iommu/arm-smmu.c @@ -0,0 +1,1916 @@ +/* + * IOMMU API for ARM architected SMMU implementations. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. + * + * Copyright (C) 2013 ARM Limited + * + * Author: Will Deacon <will.deacon@arm.com> + * + * This driver currently supports: + * - SMMUv1 and v2 implementations + * - Stream-matching and stream-indexing + * - v7/v8 long-descriptor format + * - Non-secure access to the SMMU + * - Context fault reporting + */ + +#define pr_fmt(fmt) "arm-smmu: " fmt + +#include <linux/delay.h> +#include <linux/dma-mapping.h> +#include <linux/err.h> +#include <linux/interrupt.h> +#include <linux/io.h> +#include <linux/iommu.h> +#include <linux/iopoll.h> +#include <linux/module.h> +#include <linux/of.h> +#include <linux/pci.h> +#include <linux/platform_device.h> +#include <linux/slab.h> +#include <linux/spinlock.h> + +#include <linux/amba/bus.h> + +#include "io-pgtable.h" + +/* Maximum number of stream IDs assigned to a single device */ +#define MAX_MASTER_STREAMIDS MAX_PHANDLE_ARGS + +/* Maximum number of context banks per SMMU */ +#define ARM_SMMU_MAX_CBS 128 + +/* Maximum number of mapping groups per SMMU */ +#define ARM_SMMU_MAX_SMRS 128 + +/* SMMU global address space */ +#define ARM_SMMU_GR0(smmu) ((smmu)->base) +#define ARM_SMMU_GR1(smmu) ((smmu)->base + (1 << (smmu)->pgshift)) + +/* + * SMMU global address space with conditional offset to access secure + * aliases of non-secure registers (e.g. nsCR0: 0x400, nsGFSR: 0x448, + * nsGFSYNR0: 0x450) + */ +#define ARM_SMMU_GR0_NS(smmu) \ + ((smmu)->base + \ + ((smmu->options & ARM_SMMU_OPT_SECURE_CFG_ACCESS) \ + ? 0x400 : 0)) + +/* Configuration registers */ +#define ARM_SMMU_GR0_sCR0 0x0 +#define sCR0_CLIENTPD (1 << 0) +#define sCR0_GFRE (1 << 1) +#define sCR0_GFIE (1 << 2) +#define sCR0_GCFGFRE (1 << 4) +#define sCR0_GCFGFIE (1 << 5) +#define sCR0_USFCFG (1 << 10) +#define sCR0_VMIDPNE (1 << 11) +#define sCR0_PTM (1 << 12) +#define sCR0_FB (1 << 13) +#define sCR0_BSU_SHIFT 14 +#define sCR0_BSU_MASK 0x3 + +/* Identification registers */ +#define ARM_SMMU_GR0_ID0 0x20 +#define ARM_SMMU_GR0_ID1 0x24 +#define ARM_SMMU_GR0_ID2 0x28 +#define ARM_SMMU_GR0_ID3 0x2c +#define ARM_SMMU_GR0_ID4 0x30 +#define ARM_SMMU_GR0_ID5 0x34 +#define ARM_SMMU_GR0_ID6 0x38 +#define ARM_SMMU_GR0_ID7 0x3c +#define ARM_SMMU_GR0_sGFSR 0x48 +#define ARM_SMMU_GR0_sGFSYNR0 0x50 +#define ARM_SMMU_GR0_sGFSYNR1 0x54 +#define ARM_SMMU_GR0_sGFSYNR2 0x58 + +#define ID0_S1TS (1 << 30) +#define ID0_S2TS (1 << 29) +#define ID0_NTS (1 << 28) +#define ID0_SMS (1 << 27) +#define ID0_ATOSNS (1 << 26) +#define ID0_CTTW (1 << 14) +#define ID0_NUMIRPT_SHIFT 16 +#define ID0_NUMIRPT_MASK 0xff +#define ID0_NUMSIDB_SHIFT 9 +#define ID0_NUMSIDB_MASK 0xf +#define ID0_NUMSMRG_SHIFT 0 +#define ID0_NUMSMRG_MASK 0xff + +#define ID1_PAGESIZE (1 << 31) +#define ID1_NUMPAGENDXB_SHIFT 28 +#define ID1_NUMPAGENDXB_MASK 7 +#define ID1_NUMS2CB_SHIFT 16 +#define ID1_NUMS2CB_MASK 0xff +#define ID1_NUMCB_SHIFT 0 +#define ID1_NUMCB_MASK 0xff + +#define ID2_OAS_SHIFT 4 +#define ID2_OAS_MASK 0xf +#define ID2_IAS_SHIFT 0 +#define ID2_IAS_MASK 0xf +#define ID2_UBS_SHIFT 8 +#define ID2_UBS_MASK 0xf +#define ID2_PTFS_4K (1 << 12) +#define ID2_PTFS_16K (1 << 13) +#define ID2_PTFS_64K (1 << 14) + +/* Global TLB invalidation */ +#define ARM_SMMU_GR0_TLBIVMID 0x64 +#define ARM_SMMU_GR0_TLBIALLNSNH 0x68 +#define ARM_SMMU_GR0_TLBIALLH 0x6c +#define ARM_SMMU_GR0_sTLBGSYNC 0x70 +#define ARM_SMMU_GR0_sTLBGSTATUS 0x74 +#define sTLBGSTATUS_GSACTIVE (1 << 0) +#define TLB_LOOP_TIMEOUT 1000000 /* 1s! */ + +/* Stream mapping registers */ +#define ARM_SMMU_GR0_SMR(n) (0x800 + ((n) << 2)) +#define SMR_VALID (1 << 31) +#define SMR_MASK_SHIFT 16 +#define SMR_MASK_MASK 0x7fff +#define SMR_ID_SHIFT 0 +#define SMR_ID_MASK 0x7fff + +#define ARM_SMMU_GR0_S2CR(n) (0xc00 + ((n) << 2)) +#define S2CR_CBNDX_SHIFT 0 +#define S2CR_CBNDX_MASK 0xff +#define S2CR_TYPE_SHIFT 16 +#define S2CR_TYPE_MASK 0x3 +#define S2CR_TYPE_TRANS (0 << S2CR_TYPE_SHIFT) +#define S2CR_TYPE_BYPASS (1 << S2CR_TYPE_SHIFT) +#define S2CR_TYPE_FAULT (2 << S2CR_TYPE_SHIFT) + +/* Context bank attribute registers */ +#define ARM_SMMU_GR1_CBAR(n) (0x0 + ((n) << 2)) +#define CBAR_VMID_SHIFT 0 +#define CBAR_VMID_MASK 0xff +#define CBAR_S1_BPSHCFG_SHIFT 8 +#define CBAR_S1_BPSHCFG_MASK 3 +#define CBAR_S1_BPSHCFG_NSH 3 +#define CBAR_S1_MEMATTR_SHIFT 12 +#define CBAR_S1_MEMATTR_MASK 0xf +#define CBAR_S1_MEMATTR_WB 0xf +#define CBAR_TYPE_SHIFT 16 +#define CBAR_TYPE_MASK 0x3 +#define CBAR_TYPE_S2_TRANS (0 << CBAR_TYPE_SHIFT) +#define CBAR_TYPE_S1_TRANS_S2_BYPASS (1 << CBAR_TYPE_SHIFT) +#define CBAR_TYPE_S1_TRANS_S2_FAULT (2 << CBAR_TYPE_SHIFT) +#define CBAR_TYPE_S1_TRANS_S2_TRANS (3 << CBAR_TYPE_SHIFT) +#define CBAR_IRPTNDX_SHIFT 24 +#define CBAR_IRPTNDX_MASK 0xff + +#define ARM_SMMU_GR1_CBA2R(n) (0x800 + ((n) << 2)) +#define CBA2R_RW64_32BIT (0 << 0) +#define CBA2R_RW64_64BIT (1 << 0) + +/* Translation context bank */ +#define ARM_SMMU_CB_BASE(smmu) ((smmu)->base + ((smmu)->size >> 1)) +#define ARM_SMMU_CB(smmu, n) ((n) * (1 << (smmu)->pgshift)) + +#define ARM_SMMU_CB_SCTLR 0x0 +#define ARM_SMMU_CB_RESUME 0x8 +#define ARM_SMMU_CB_TTBCR2 0x10 +#define ARM_SMMU_CB_TTBR0_LO 0x20 +#define ARM_SMMU_CB_TTBR0_HI 0x24 +#define ARM_SMMU_CB_TTBR1_LO 0x28 +#define ARM_SMMU_CB_TTBR1_HI 0x2c +#define ARM_SMMU_CB_TTBCR 0x30 +#define ARM_SMMU_CB_S1_MAIR0 0x38 +#define ARM_SMMU_CB_S1_MAIR1 0x3c +#define ARM_SMMU_CB_PAR_LO 0x50 +#define ARM_SMMU_CB_PAR_HI 0x54 +#define ARM_SMMU_CB_FSR 0x58 +#define ARM_SMMU_CB_FAR_LO 0x60 +#define ARM_SMMU_CB_FAR_HI 0x64 +#define ARM_SMMU_CB_FSYNR0 0x68 +#define ARM_SMMU_CB_S1_TLBIVA 0x600 +#define ARM_SMMU_CB_S1_TLBIASID 0x610 +#define ARM_SMMU_CB_S1_TLBIVAL 0x620 +#define ARM_SMMU_CB_S2_TLBIIPAS2 0x630 +#define ARM_SMMU_CB_S2_TLBIIPAS2L 0x638 +#define ARM_SMMU_CB_ATS1PR_LO 0x800 +#define ARM_SMMU_CB_ATS1PR_HI 0x804 +#define ARM_SMMU_CB_ATSR 0x8f0 + +#define SCTLR_S1_ASIDPNE (1 << 12) +#define SCTLR_CFCFG (1 << 7) +#define SCTLR_CFIE (1 << 6) +#define SCTLR_CFRE (1 << 5) +#define SCTLR_E (1 << 4) +#define SCTLR_AFE (1 << 2) +#define SCTLR_TRE (1 << 1) +#define SCTLR_M (1 << 0) +#define SCTLR_EAE_SBOP (SCTLR_AFE | SCTLR_TRE) + +#define CB_PAR_F (1 << 0) + +#define ATSR_ACTIVE (1 << 0) + +#define RESUME_RETRY (0 << 0) +#define RESUME_TERMINATE (1 << 0) + +#define TTBCR2_SEP_SHIFT 15 +#define TTBCR2_SEP_UPSTREAM (0x7 << TTBCR2_SEP_SHIFT) + +#define TTBRn_HI_ASID_SHIFT 16 + +#define FSR_MULTI (1 << 31) +#define FSR_SS (1 << 30) +#define FSR_UUT (1 << 8) +#define FSR_ASF (1 << 7) +#define FSR_TLBLKF (1 << 6) +#define FSR_TLBMCF (1 << 5) +#define FSR_EF (1 << 4) +#define FSR_PF (1 << 3) +#define FSR_AFF (1 << 2) +#define FSR_TF (1 << 1) + +#define FSR_IGN (FSR_AFF | FSR_ASF | \ + FSR_TLBMCF | FSR_TLBLKF) +#define FSR_FAULT (FSR_MULTI | FSR_SS | FSR_UUT | \ + FSR_EF | FSR_PF | FSR_TF | FSR_IGN) + +#define FSYNR0_WNR (1 << 4) + +static int force_stage; +module_param_named(force_stage, force_stage, int, S_IRUGO | S_IWUSR); +MODULE_PARM_DESC(force_stage, + "Force SMMU mappings to be installed at a particular stage of translation. A value of '1' or '2' forces the corresponding stage. All other values are ignored (i.e. no stage is forced). Note that selecting a specific stage will disable support for nested translation."); + +enum arm_smmu_arch_version { + ARM_SMMU_V1 = 1, + ARM_SMMU_V2, +}; + +struct arm_smmu_smr { + u8 idx; + u16 mask; + u16 id; +}; + +struct arm_smmu_master_cfg { + int num_streamids; + u16 streamids[MAX_MASTER_STREAMIDS]; + struct arm_smmu_smr *smrs; +}; + +struct arm_smmu_master { + struct device_node *of_node; + struct rb_node node; + struct arm_smmu_master_cfg cfg; +}; + +struct arm_smmu_device { + struct device *dev; + + void __iomem *base; + unsigned long size; + unsigned long pgshift; + +#define ARM_SMMU_FEAT_COHERENT_WALK (1 << 0) +#define ARM_SMMU_FEAT_STREAM_MATCH (1 << 1) +#define ARM_SMMU_FEAT_TRANS_S1 (1 << 2) +#define ARM_SMMU_FEAT_TRANS_S2 (1 << 3) +#define ARM_SMMU_FEAT_TRANS_NESTED (1 << 4) +#define ARM_SMMU_FEAT_TRANS_OPS (1 << 5) + u32 features; + +#define ARM_SMMU_OPT_SECURE_CFG_ACCESS (1 << 0) + u32 options; + enum arm_smmu_arch_version version; + + u32 num_context_banks; + u32 num_s2_context_banks; + DECLARE_BITMAP(context_map, ARM_SMMU_MAX_CBS); + atomic_t irptndx; + + u32 num_mapping_groups; + DECLARE_BITMAP(smr_map, ARM_SMMU_MAX_SMRS); + + unsigned long va_size; + unsigned long ipa_size; + unsigned long pa_size; + + u32 num_global_irqs; + u32 num_context_irqs; + unsigned int *irqs; + + struct list_head list; + struct rb_root masters; +}; + +struct arm_smmu_cfg { + u8 cbndx; + u8 irptndx; + u32 cbar; +}; +#define INVALID_IRPTNDX 0xff + +#define ARM_SMMU_CB_ASID(cfg) ((cfg)->cbndx) +#define ARM_SMMU_CB_VMID(cfg) ((cfg)->cbndx + 1) + +enum arm_smmu_domain_stage { + ARM_SMMU_DOMAIN_S1 = 0, + ARM_SMMU_DOMAIN_S2, + ARM_SMMU_DOMAIN_NESTED, +}; + +struct arm_smmu_domain { + struct arm_smmu_device *smmu; + struct io_pgtable_ops *pgtbl_ops; + spinlock_t pgtbl_lock; + struct arm_smmu_cfg cfg; + enum arm_smmu_domain_stage stage; + struct mutex init_mutex; /* Protects smmu pointer */ + struct iommu_domain domain; +}; + +static struct iommu_ops arm_smmu_ops; + +static DEFINE_SPINLOCK(arm_smmu_devices_lock); +static LIST_HEAD(arm_smmu_devices); + +struct arm_smmu_option_prop { + u32 opt; + const char *prop; +}; + +static struct arm_smmu_option_prop arm_smmu_options[] = { + { ARM_SMMU_OPT_SECURE_CFG_ACCESS, "calxeda,smmu-secure-config-access" }, + { 0, NULL}, +}; + +static struct arm_smmu_domain *to_smmu_domain(struct iommu_domain *dom) +{ + return container_of(dom, struct arm_smmu_domain, domain); +} + +static void parse_driver_options(struct arm_smmu_device *smmu) +{ + int i = 0; + + do { + if (of_property_read_bool(smmu->dev->of_node, + arm_smmu_options[i].prop)) { + smmu->options |= arm_smmu_options[i].opt; + dev_notice(smmu->dev, "option %s\n", + arm_smmu_options[i].prop); + } + } while (arm_smmu_options[++i].opt); +} + +static struct device_node *dev_get_dev_node(struct device *dev) +{ + if (dev_is_pci(dev)) { + struct pci_bus *bus = to_pci_dev(dev)->bus; + + while (!pci_is_root_bus(bus)) + bus = bus->parent; + return bus->bridge->parent->of_node; + } + + return dev->of_node; +} + +static struct arm_smmu_master *find_smmu_master(struct arm_smmu_device *smmu, + struct device_node *dev_node) +{ + struct rb_node *node = smmu->masters.rb_node; + + while (node) { + struct arm_smmu_master *master; + + master = container_of(node, struct arm_smmu_master, node); + + if (dev_node < master->of_node) + node = node->rb_left; + else if (dev_node > master->of_node) + node = node->rb_right; + else + return master; + } + + return NULL; +} + +static struct arm_smmu_master_cfg * +find_smmu_master_cfg(struct device *dev) +{ + struct arm_smmu_master_cfg *cfg = NULL; + struct iommu_group *group = iommu_group_get(dev); + + if (group) { + cfg = iommu_group_get_iommudata(group); + iommu_group_put(group); + } + + return cfg; +} + +static int insert_smmu_master(struct arm_smmu_device *smmu, + struct arm_smmu_master *master) +{ + struct rb_node **new, *parent; + + new = &smmu->masters.rb_node; + parent = NULL; + while (*new) { + struct arm_smmu_master *this + = container_of(*new, struct arm_smmu_master, node); + + parent = *new; + if (master->of_node < this->of_node) + new = &((*new)->rb_left); + else if (master->of_node > this->of_node) + new = &((*new)->rb_right); + else + return -EEXIST; + } + + rb_link_node(&master->node, parent, new); + rb_insert_color(&master->node, &smmu->masters); + return 0; +} + +static int register_smmu_master(struct arm_smmu_device *smmu, + struct device *dev, + struct of_phandle_args *masterspec) +{ + int i; + struct arm_smmu_master *master; + + master = find_smmu_master(smmu, masterspec->np); + if (master) { + dev_err(dev, + "rejecting multiple registrations for master device %s\n", + masterspec->np->name); + return -EBUSY; + } + + if (masterspec->args_count > MAX_MASTER_STREAMIDS) { + dev_err(dev, + "reached maximum number (%d) of stream IDs for master device %s\n", + MAX_MASTER_STREAMIDS, masterspec->np->name); + return -ENOSPC; + } + + master = devm_kzalloc(dev, sizeof(*master), GFP_KERNEL); + if (!master) + return -ENOMEM; + + master->of_node = masterspec->np; + master->cfg.num_streamids = masterspec->args_count; + + for (i = 0; i < master->cfg.num_streamids; ++i) { + u16 streamid = masterspec->args[i]; + + if (!(smmu->features & ARM_SMMU_FEAT_STREAM_MATCH) && + (streamid >= smmu->num_mapping_groups)) { + dev_err(dev, + "stream ID for master device %s greater than maximum allowed (%d)\n", + masterspec->np->name, smmu->num_mapping_groups); + return -ERANGE; + } + master->cfg.streamids[i] = streamid; + } + return insert_smmu_master(smmu, master); +} + +static struct arm_smmu_device *find_smmu_for_device(struct device *dev) +{ + struct arm_smmu_device *smmu; + struct arm_smmu_master *master = NULL; + struct device_node *dev_node = dev_get_dev_node(dev); + + spin_lock(&arm_smmu_devices_lock); + list_for_each_entry(smmu, &arm_smmu_devices, list) { + master = find_smmu_master(smmu, dev_node); + if (master) + break; + } + spin_unlock(&arm_smmu_devices_lock); + + return master ? smmu : NULL; +} + +static int __arm_smmu_alloc_bitmap(unsigned long *map, int start, int end) +{ + int idx; + + do { + idx = find_next_zero_bit(map, end, start); + if (idx == end) + return -ENOSPC; + } while (test_and_set_bit(idx, map)); + + return idx; +} + +static void __arm_smmu_free_bitmap(unsigned long *map, int idx) +{ + clear_bit(idx, map); +} + +/* Wait for any pending TLB invalidations to complete */ +static void __arm_smmu_tlb_sync(struct arm_smmu_device *smmu) +{ + int count = 0; + void __iomem *gr0_base = ARM_SMMU_GR0(smmu); + + writel_relaxed(0, gr0_base + ARM_SMMU_GR0_sTLBGSYNC); + while (readl_relaxed(gr0_base + ARM_SMMU_GR0_sTLBGSTATUS) + & sTLBGSTATUS_GSACTIVE) { + cpu_relax(); + if (++count == TLB_LOOP_TIMEOUT) { + dev_err_ratelimited(smmu->dev, + "TLB sync timed out -- SMMU may be deadlocked\n"); + return; + } + udelay(1); + } +} + +static void arm_smmu_tlb_sync(void *cookie) +{ + struct arm_smmu_domain *smmu_domain = cookie; + __arm_smmu_tlb_sync(smmu_domain->smmu); +} + +static void arm_smmu_tlb_inv_context(void *cookie) +{ + struct arm_smmu_domain *smmu_domain = cookie; + struct arm_smmu_cfg *cfg = &smmu_domain->cfg; + struct arm_smmu_device *smmu = smmu_domain->smmu; + bool stage1 = cfg->cbar != CBAR_TYPE_S2_TRANS; + void __iomem *base; + + if (stage1) { + base = ARM_SMMU_CB_BASE(smmu) + ARM_SMMU_CB(smmu, cfg->cbndx); + writel_relaxed(ARM_SMMU_CB_ASID(cfg), + base + ARM_SMMU_CB_S1_TLBIASID); + } else { + base = ARM_SMMU_GR0(smmu); + writel_relaxed(ARM_SMMU_CB_VMID(cfg), + base + ARM_SMMU_GR0_TLBIVMID); + } + + __arm_smmu_tlb_sync(smmu); +} + +static void arm_smmu_tlb_inv_range_nosync(unsigned long iova, size_t size, + bool leaf, void *cookie) +{ + struct arm_smmu_domain *smmu_domain = cookie; + struct arm_smmu_cfg *cfg = &smmu_domain->cfg; + struct arm_smmu_device *smmu = smmu_domain->smmu; + bool stage1 = cfg->cbar != CBAR_TYPE_S2_TRANS; + void __iomem *reg; + + if (stage1) { + reg = ARM_SMMU_CB_BASE(smmu) + ARM_SMMU_CB(smmu, cfg->cbndx); + reg += leaf ? ARM_SMMU_CB_S1_TLBIVAL : ARM_SMMU_CB_S1_TLBIVA; + + if (!IS_ENABLED(CONFIG_64BIT) || smmu->version == ARM_SMMU_V1) { + iova &= ~12UL; + iova |= ARM_SMMU_CB_ASID(cfg); + writel_relaxed(iova, reg); +#ifdef CONFIG_64BIT + } else { + iova >>= 12; + iova |= (u64)ARM_SMMU_CB_ASID(cfg) << 48; + writeq_relaxed(iova, reg); +#endif + } +#ifdef CONFIG_64BIT + } else if (smmu->version == ARM_SMMU_V2) { + reg = ARM_SMMU_CB_BASE(smmu) + ARM_SMMU_CB(smmu, cfg->cbndx); + reg += leaf ? ARM_SMMU_CB_S2_TLBIIPAS2L : + ARM_SMMU_CB_S2_TLBIIPAS2; + writeq_relaxed(iova >> 12, reg); +#endif + } else { + reg = ARM_SMMU_GR0(smmu) + ARM_SMMU_GR0_TLBIVMID; + writel_relaxed(ARM_SMMU_CB_VMID(cfg), reg); + } +} + +static void arm_smmu_flush_pgtable(void *addr, size_t size, void *cookie) +{ + struct arm_smmu_domain *smmu_domain = cookie; + struct arm_smmu_device *smmu = smmu_domain->smmu; + unsigned long offset = (unsigned long)addr & ~PAGE_MASK; + + + /* Ensure new page tables are visible to the hardware walker */ + if (smmu->features & ARM_SMMU_FEAT_COHERENT_WALK) { + dsb(ishst); + } else { + /* + * If the SMMU can't walk tables in the CPU caches, treat them + * like non-coherent DMA since we need to flush the new entries + * all the way out to memory. There's no possibility of + * recursion here as the SMMU table walker will not be wired + * through another SMMU. + */ + dma_map_page(smmu->dev, virt_to_page(addr), offset, size, + DMA_TO_DEVICE); + } +} + +static struct iommu_gather_ops arm_smmu_gather_ops = { + .tlb_flush_all = arm_smmu_tlb_inv_context, + .tlb_add_flush = arm_smmu_tlb_inv_range_nosync, + .tlb_sync = arm_smmu_tlb_sync, + .flush_pgtable = arm_smmu_flush_pgtable, +}; + +static irqreturn_t arm_smmu_context_fault(int irq, void *dev) +{ + int flags, ret; + u32 fsr, far, fsynr, resume; + unsigned long iova; + struct iommu_domain *domain = dev; + struct arm_smmu_domain *smmu_domain = to_smmu_domain(domain); + struct arm_smmu_cfg *cfg = &smmu_domain->cfg; + struct arm_smmu_device *smmu = smmu_domain->smmu; + void __iomem *cb_base; + + cb_base = ARM_SMMU_CB_BASE(smmu) + ARM_SMMU_CB(smmu, cfg->cbndx); + fsr = readl_relaxed(cb_base + ARM_SMMU_CB_FSR); + + if (!(fsr & FSR_FAULT)) + return IRQ_NONE; + + if (fsr & FSR_IGN) + dev_err_ratelimited(smmu->dev, + "Unexpected context fault (fsr 0x%x)\n", + fsr); + + fsynr = readl_relaxed(cb_base + ARM_SMMU_CB_FSYNR0); + flags = fsynr & FSYNR0_WNR ? IOMMU_FAULT_WRITE : IOMMU_FAULT_READ; + + far = readl_relaxed(cb_base + ARM_SMMU_CB_FAR_LO); + iova = far; +#ifdef CONFIG_64BIT + far = readl_relaxed(cb_base + ARM_SMMU_CB_FAR_HI); + iova |= ((unsigned long)far << 32); +#endif + + if (!report_iommu_fault(domain, smmu->dev, iova, flags)) { + ret = IRQ_HANDLED; + resume = RESUME_RETRY; + } else { + dev_err_ratelimited(smmu->dev, + "Unhandled context fault: iova=0x%08lx, fsynr=0x%x, cb=%d\n", + iova, fsynr, cfg->cbndx); + ret = IRQ_NONE; + resume = RESUME_TERMINATE; + } + + /* Clear the faulting FSR */ + writel(fsr, cb_base + ARM_SMMU_CB_FSR); + + /* Retry or terminate any stalled transactions */ + if (fsr & FSR_SS) + writel_relaxed(resume, cb_base + ARM_SMMU_CB_RESUME); + + return ret; +} + +static irqreturn_t arm_smmu_global_fault(int irq, void *dev) +{ + u32 gfsr, gfsynr0, gfsynr1, gfsynr2; + struct arm_smmu_device *smmu = dev; + void __iomem *gr0_base = ARM_SMMU_GR0_NS(smmu); + + gfsr = readl_relaxed(gr0_base + ARM_SMMU_GR0_sGFSR); + gfsynr0 = readl_relaxed(gr0_base + ARM_SMMU_GR0_sGFSYNR0); + gfsynr1 = readl_relaxed(gr0_base + ARM_SMMU_GR0_sGFSYNR1); + gfsynr2 = readl_relaxed(gr0_base + ARM_SMMU_GR0_sGFSYNR2); + + if (!gfsr) + return IRQ_NONE; + + dev_err_ratelimited(smmu->dev, + "Unexpected global fault, this could be serious\n"); + dev_err_ratelimited(smmu->dev, + "\tGFSR 0x%08x, GFSYNR0 0x%08x, GFSYNR1 0x%08x, GFSYNR2 0x%08x\n", + gfsr, gfsynr0, gfsynr1, gfsynr2); + + writel(gfsr, gr0_base + ARM_SMMU_GR0_sGFSR); + return IRQ_HANDLED; +} + +static void arm_smmu_init_context_bank(struct arm_smmu_domain *smmu_domain, + struct io_pgtable_cfg *pgtbl_cfg) +{ + u32 reg; + bool stage1; + struct arm_smmu_cfg *cfg = &smmu_domain->cfg; + struct arm_smmu_device *smmu = smmu_domain->smmu; + void __iomem *cb_base, *gr0_base, *gr1_base; + + gr0_base = ARM_SMMU_GR0(smmu); + gr1_base = ARM_SMMU_GR1(smmu); + stage1 = cfg->cbar != CBAR_TYPE_S2_TRANS; + cb_base = ARM_SMMU_CB_BASE(smmu) + ARM_SMMU_CB(smmu, cfg->cbndx); + + if (smmu->version > ARM_SMMU_V1) { + /* + * CBA2R. + * *Must* be initialised before CBAR thanks to VMID16 + * architectural oversight affected some implementations. + */ +#ifdef CONFIG_64BIT + reg = CBA2R_RW64_64BIT; +#else + reg = CBA2R_RW64_32BIT; +#endif + writel_relaxed(reg, gr1_base + ARM_SMMU_GR1_CBA2R(cfg->cbndx)); + } + + /* CBAR */ + reg = cfg->cbar; + if (smmu->version == ARM_SMMU_V1) + reg |= cfg->irptndx << CBAR_IRPTNDX_SHIFT; + + /* + * Use the weakest shareability/memory types, so they are + * overridden by the ttbcr/pte. + */ + if (stage1) { + reg |= (CBAR_S1_BPSHCFG_NSH << CBAR_S1_BPSHCFG_SHIFT) | + (CBAR_S1_MEMATTR_WB << CBAR_S1_MEMATTR_SHIFT); + } else { + reg |= ARM_SMMU_CB_VMID(cfg) << CBAR_VMID_SHIFT; + } + writel_relaxed(reg, gr1_base + ARM_SMMU_GR1_CBAR(cfg->cbndx)); + + /* TTBRs */ + if (stage1) { + reg = pgtbl_cfg->arm_lpae_s1_cfg.ttbr[0]; + writel_relaxed(reg, cb_base + ARM_SMMU_CB_TTBR0_LO); + reg = pgtbl_cfg->arm_lpae_s1_cfg.ttbr[0] >> 32; + reg |= ARM_SMMU_CB_ASID(cfg) << TTBRn_HI_ASID_SHIFT; + writel_relaxed(reg, cb_base + ARM_SMMU_CB_TTBR0_HI); + + reg = pgtbl_cfg->arm_lpae_s1_cfg.ttbr[1]; + writel_relaxed(reg, cb_base + ARM_SMMU_CB_TTBR1_LO); + reg = pgtbl_cfg->arm_lpae_s1_cfg.ttbr[1] >> 32; + reg |= ARM_SMMU_CB_ASID(cfg) << TTBRn_HI_ASID_SHIFT; + writel_relaxed(reg, cb_base + ARM_SMMU_CB_TTBR1_HI); + } else { + reg = pgtbl_cfg->arm_lpae_s2_cfg.vttbr; + writel_relaxed(reg, cb_base + ARM_SMMU_CB_TTBR0_LO); + reg = pgtbl_cfg->arm_lpae_s2_cfg.vttbr >> 32; + writel_relaxed(reg, cb_base + ARM_SMMU_CB_TTBR0_HI); + } + + /* TTBCR */ + if (stage1) { + reg = pgtbl_cfg->arm_lpae_s1_cfg.tcr; + writel_relaxed(reg, cb_base + ARM_SMMU_CB_TTBCR); + if (smmu->version > ARM_SMMU_V1) { + reg = pgtbl_cfg->arm_lpae_s1_cfg.tcr >> 32; + reg |= TTBCR2_SEP_UPSTREAM; + writel_relaxed(reg, cb_base + ARM_SMMU_CB_TTBCR2); + } + } else { + reg = pgtbl_cfg->arm_lpae_s2_cfg.vtcr; + writel_relaxed(reg, cb_base + ARM_SMMU_CB_TTBCR); + } + + /* MAIRs (stage-1 only) */ + if (stage1) { + reg = pgtbl_cfg->arm_lpae_s1_cfg.mair[0]; + writel_relaxed(reg, cb_base + ARM_SMMU_CB_S1_MAIR0); + reg = pgtbl_cfg->arm_lpae_s1_cfg.mair[1]; + writel_relaxed(reg, cb_base + ARM_SMMU_CB_S1_MAIR1); + } + + /* SCTLR */ + reg = SCTLR_CFCFG | SCTLR_CFIE | SCTLR_CFRE | SCTLR_M | SCTLR_EAE_SBOP; + if (stage1) + reg |= SCTLR_S1_ASIDPNE; +#ifdef __BIG_ENDIAN + reg |= SCTLR_E; +#endif + writel_relaxed(reg, cb_base + ARM_SMMU_CB_SCTLR); +} + +static int arm_smmu_init_domain_context(struct iommu_domain *domain, + struct arm_smmu_device *smmu) +{ + int irq, start, ret = 0; + unsigned long ias, oas; + struct io_pgtable_ops *pgtbl_ops; + struct io_pgtable_cfg pgtbl_cfg; + enum io_pgtable_fmt fmt; + struct arm_smmu_domain *smmu_domain = to_smmu_domain(domain); + struct arm_smmu_cfg *cfg = &smmu_domain->cfg; + + mutex_lock(&smmu_domain->init_mutex); + if (smmu_domain->smmu) + goto out_unlock; + + /* + * Mapping the requested stage onto what we support is surprisingly + * complicated, mainly because the spec allows S1+S2 SMMUs without + * support for nested translation. That means we end up with the + * following table: + * + * Requested Supported Actual + * S1 N S1 + * S1 S1+S2 S1 + * S1 S2 S2 + * S1 S1 S1 + * N N N + * N S1+S2 S2 + * N S2 S2 + * N S1 S1 + * + * Note that you can't actually request stage-2 mappings. + */ + if (!(smmu->features & ARM_SMMU_FEAT_TRANS_S1)) + smmu_domain->stage = ARM_SMMU_DOMAIN_S2; + if (!(smmu->features & ARM_SMMU_FEAT_TRANS_S2)) + smmu_domain->stage = ARM_SMMU_DOMAIN_S1; + + switch (smmu_domain->stage) { + case ARM_SMMU_DOMAIN_S1: + cfg->cbar = CBAR_TYPE_S1_TRANS_S2_BYPASS; + start = smmu->num_s2_context_banks; + ias = smmu->va_size; + oas = smmu->ipa_size; + if (IS_ENABLED(CONFIG_64BIT)) + fmt = ARM_64_LPAE_S1; + else + fmt = ARM_32_LPAE_S1; + break; + case ARM_SMMU_DOMAIN_NESTED: + /* + * We will likely want to change this if/when KVM gets + * involved. + */ + case ARM_SMMU_DOMAIN_S2: + cfg->cbar = CBAR_TYPE_S2_TRANS; + start = 0; + ias = smmu->ipa_size; + oas = smmu->pa_size; + if (IS_ENABLED(CONFIG_64BIT)) + fmt = ARM_64_LPAE_S2; + else + fmt = ARM_32_LPAE_S2; + break; + default: + ret = -EINVAL; + goto out_unlock; + } + + ret = __arm_smmu_alloc_bitmap(smmu->context_map, start, + smmu->num_context_banks); + if (IS_ERR_VALUE(ret)) + goto out_unlock; + + cfg->cbndx = ret; + if (smmu->version == ARM_SMMU_V1) { + cfg->irptndx = atomic_inc_return(&smmu->irptndx); + cfg->irptndx %= smmu->num_context_irqs; + } else { + cfg->irptndx = cfg->cbndx; + } + + pgtbl_cfg = (struct io_pgtable_cfg) { + .pgsize_bitmap = arm_smmu_ops.pgsize_bitmap, + .ias = ias, + .oas = oas, + .tlb = &arm_smmu_gather_ops, + }; + + smmu_domain->smmu = smmu; + pgtbl_ops = alloc_io_pgtable_ops(fmt, &pgtbl_cfg, smmu_domain); + if (!pgtbl_ops) { + ret = -ENOMEM; + goto out_clear_smmu; + } + + /* Update our support page sizes to reflect the page table format */ + arm_smmu_ops.pgsize_bitmap = pgtbl_cfg.pgsize_bitmap; + + /* Initialise the context bank with our page table cfg */ + arm_smmu_init_context_bank(smmu_domain, &pgtbl_cfg); + + /* + * Request context fault interrupt. Do this last to avoid the + * handler seeing a half-initialised domain state. + */ + irq = smmu->irqs[smmu->num_global_irqs + cfg->irptndx]; + ret = request_irq(irq, arm_smmu_context_fault, IRQF_SHARED, + "arm-smmu-context-fault", domain); + if (IS_ERR_VALUE(ret)) { + dev_err(smmu->dev, "failed to request context IRQ %d (%u)\n", + cfg->irptndx, irq); + cfg->irptndx = INVALID_IRPTNDX; + } + + mutex_unlock(&smmu_domain->init_mutex); + + /* Publish page table ops for map/unmap */ + smmu_domain->pgtbl_ops = pgtbl_ops; + return 0; + +out_clear_smmu: + smmu_domain->smmu = NULL; +out_unlock: + mutex_unlock(&smmu_domain->init_mutex); + return ret; +} + +static void arm_smmu_destroy_domain_context(struct iommu_domain *domain) +{ + struct arm_smmu_domain *smmu_domain = to_smmu_domain(domain); + struct arm_smmu_device *smmu = smmu_domain->smmu; + struct arm_smmu_cfg *cfg = &smmu_domain->cfg; + void __iomem *cb_base; + int irq; + + if (!smmu) + return; + + /* + * Disable the context bank and free the page tables before freeing + * it. + */ + cb_base = ARM_SMMU_CB_BASE(smmu) + ARM_SMMU_CB(smmu, cfg->cbndx); + writel_relaxed(0, cb_base + ARM_SMMU_CB_SCTLR); + + if (cfg->irptndx != INVALID_IRPTNDX) { + irq = smmu->irqs[smmu->num_global_irqs + cfg->irptndx]; + free_irq(irq, domain); + } + + if (smmu_domain->pgtbl_ops) + free_io_pgtable_ops(smmu_domain->pgtbl_ops); + + __arm_smmu_free_bitmap(smmu->context_map, cfg->cbndx); +} + +static struct iommu_domain *arm_smmu_domain_alloc(unsigned type) +{ + struct arm_smmu_domain *smmu_domain; + + if (type != IOMMU_DOMAIN_UNMANAGED) + return NULL; + /* + * Allocate the domain and initialise some of its data structures. + * We can't really do anything meaningful until we've added a + * master. + */ + smmu_domain = kzalloc(sizeof(*smmu_domain), GFP_KERNEL); + if (!smmu_domain) + return NULL; + + mutex_init(&smmu_domain->init_mutex); + spin_lock_init(&smmu_domain->pgtbl_lock); + + return &smmu_domain->domain; +} + +static void arm_smmu_domain_free(struct iommu_domain *domain) +{ + struct arm_smmu_domain *smmu_domain = to_smmu_domain(domain); + + /* + * Free the domain resources. We assume that all devices have + * already been detached. + */ + arm_smmu_destroy_domain_context(domain); + kfree(smmu_domain); +} + +static int arm_smmu_master_configure_smrs(struct arm_smmu_device *smmu, + struct arm_smmu_master_cfg *cfg) +{ + int i; + struct arm_smmu_smr *smrs; + void __iomem *gr0_base = ARM_SMMU_GR0(smmu); + + if (!(smmu->features & ARM_SMMU_FEAT_STREAM_MATCH)) + return 0; + + if (cfg->smrs) + return -EEXIST; + + smrs = kmalloc_array(cfg->num_streamids, sizeof(*smrs), GFP_KERNEL); + if (!smrs) { + dev_err(smmu->dev, "failed to allocate %d SMRs\n", + cfg->num_streamids); + return -ENOMEM; + } + + /* Allocate the SMRs on the SMMU */ + for (i = 0; i < cfg->num_streamids; ++i) { + int idx = __arm_smmu_alloc_bitmap(smmu->smr_map, 0, + smmu->num_mapping_groups); + if (IS_ERR_VALUE(idx)) { + dev_err(smmu->dev, "failed to allocate free SMR\n"); + goto err_free_smrs; + } + + smrs[i] = (struct arm_smmu_smr) { + .idx = idx, + .mask = 0, /* We don't currently share SMRs */ + .id = cfg->streamids[i], + }; + } + + /* It worked! Now, poke the actual hardware */ + for (i = 0; i < cfg->num_streamids; ++i) { + u32 reg = SMR_VALID | smrs[i].id << SMR_ID_SHIFT | + smrs[i].mask << SMR_MASK_SHIFT; + writel_relaxed(reg, gr0_base + ARM_SMMU_GR0_SMR(smrs[i].idx)); + } + + cfg->smrs = smrs; + return 0; + +err_free_smrs: + while (--i >= 0) + __arm_smmu_free_bitmap(smmu->smr_map, smrs[i].idx); + kfree(smrs); + return -ENOSPC; +} + +static void arm_smmu_master_free_smrs(struct arm_smmu_device *smmu, + struct arm_smmu_master_cfg *cfg) +{ + int i; + void __iomem *gr0_base = ARM_SMMU_GR0(smmu); + struct arm_smmu_smr *smrs = cfg->smrs; + + if (!smrs) + return; + + /* Invalidate the SMRs before freeing back to the allocator */ + for (i = 0; i < cfg->num_streamids; ++i) { + u8 idx = smrs[i].idx; + + writel_relaxed(~SMR_VALID, gr0_base + ARM_SMMU_GR0_SMR(idx)); + __arm_smmu_free_bitmap(smmu->smr_map, idx); + } + + cfg->smrs = NULL; + kfree(smrs); +} + +static int arm_smmu_domain_add_master(struct arm_smmu_domain *smmu_domain, + struct arm_smmu_master_cfg *cfg) +{ + int i, ret; + struct arm_smmu_device *smmu = smmu_domain->smmu; + void __iomem *gr0_base = ARM_SMMU_GR0(smmu); + + /* Devices in an IOMMU group may already be configured */ + ret = arm_smmu_master_configure_smrs(smmu, cfg); + if (ret) + return ret == -EEXIST ? 0 : ret; + + for (i = 0; i < cfg->num_streamids; ++i) { + u32 idx, s2cr; + + idx = cfg->smrs ? cfg->smrs[i].idx : cfg->streamids[i]; + s2cr = S2CR_TYPE_TRANS | + (smmu_domain->cfg.cbndx << S2CR_CBNDX_SHIFT); + writel_relaxed(s2cr, gr0_base + ARM_SMMU_GR0_S2CR(idx)); + } + + return 0; +} + +static void arm_smmu_domain_remove_master(struct arm_smmu_domain *smmu_domain, + struct arm_smmu_master_cfg *cfg) +{ + int i; + struct arm_smmu_device *smmu = smmu_domain->smmu; + void __iomem *gr0_base = ARM_SMMU_GR0(smmu); + + /* An IOMMU group is torn down by the first device to be removed */ + if ((smmu->features & ARM_SMMU_FEAT_STREAM_MATCH) && !cfg->smrs) + return; + + /* + * We *must* clear the S2CR first, because freeing the SMR means + * that it can be re-allocated immediately. + */ + for (i = 0; i < cfg->num_streamids; ++i) { + u32 idx = cfg->smrs ? cfg->smrs[i].idx : cfg->streamids[i]; + + writel_relaxed(S2CR_TYPE_BYPASS, + gr0_base + ARM_SMMU_GR0_S2CR(idx)); + } + + arm_smmu_master_free_smrs(smmu, cfg); +} + +static int arm_smmu_attach_dev(struct iommu_domain *domain, struct device *dev) +{ + int ret; + struct arm_smmu_domain *smmu_domain = to_smmu_domain(domain); + struct arm_smmu_device *smmu; + struct arm_smmu_master_cfg *cfg; + + smmu = find_smmu_for_device(dev); + if (!smmu) { + dev_err(dev, "cannot attach to SMMU, is it on the same bus?\n"); + return -ENXIO; + } + + if (dev->archdata.iommu) { + dev_err(dev, "already attached to IOMMU domain\n"); + return -EEXIST; + } + + /* Ensure that the domain is finalised */ + ret = arm_smmu_init_domain_context(domain, smmu); + if (IS_ERR_VALUE(ret)) + return ret; + + /* + * Sanity check the domain. We don't support domains across + * different SMMUs. + */ + if (smmu_domain->smmu != smmu) { + dev_err(dev, + "cannot attach to SMMU %s whilst already attached to domain on SMMU %s\n", + dev_name(smmu_domain->smmu->dev), dev_name(smmu->dev)); + return -EINVAL; + } + + /* Looks ok, so add the device to the domain */ + cfg = find_smmu_master_cfg(dev); + if (!cfg) + return -ENODEV; + + ret = arm_smmu_domain_add_master(smmu_domain, cfg); + if (!ret) + dev->archdata.iommu = domain; + return ret; +} + +static void arm_smmu_detach_dev(struct iommu_domain *domain, struct device *dev) +{ + struct arm_smmu_domain *smmu_domain = to_smmu_domain(domain); + struct arm_smmu_master_cfg *cfg; + + cfg = find_smmu_master_cfg(dev); + if (!cfg) + return; + + dev->archdata.iommu = NULL; + arm_smmu_domain_remove_master(smmu_domain, cfg); +} + +static int arm_smmu_map(struct iommu_domain *domain, unsigned long iova, + phys_addr_t paddr, size_t size, int prot) +{ + int ret; + unsigned long flags; + struct arm_smmu_domain *smmu_domain = to_smmu_domain(domain); + struct io_pgtable_ops *ops= smmu_domain->pgtbl_ops; + + if (!ops) + return -ENODEV; + + spin_lock_irqsave(&smmu_domain->pgtbl_lock, flags); + ret = ops->map(ops, iova, paddr, size, prot); + spin_unlock_irqrestore(&smmu_domain->pgtbl_lock, flags); + return ret; +} + +static size_t arm_smmu_unmap(struct iommu_domain *domain, unsigned long iova, + size_t size) +{ + size_t ret; + unsigned long flags; + struct arm_smmu_domain *smmu_domain = to_smmu_domain(domain); + struct io_pgtable_ops *ops= smmu_domain->pgtbl_ops; + + if (!ops) + return 0; + + spin_lock_irqsave(&smmu_domain->pgtbl_lock, flags); + ret = ops->unmap(ops, iova, size); + spin_unlock_irqrestore(&smmu_domain->pgtbl_lock, flags); + return ret; +} + +static phys_addr_t arm_smmu_iova_to_phys_hard(struct iommu_domain *domain, + dma_addr_t iova) +{ + struct arm_smmu_domain *smmu_domain = to_smmu_domain(domain); + struct arm_smmu_device *smmu = smmu_domain->smmu; + struct arm_smmu_cfg *cfg = &smmu_domain->cfg; + struct io_pgtable_ops *ops= smmu_domain->pgtbl_ops; + struct device *dev = smmu->dev; + void __iomem *cb_base; + u32 tmp; + u64 phys; + + cb_base = ARM_SMMU_CB_BASE(smmu) + ARM_SMMU_CB(smmu, cfg->cbndx); + + if (smmu->version == 1) { + u32 reg = iova & ~0xfff; + writel_relaxed(reg, cb_base + ARM_SMMU_CB_ATS1PR_LO); + } else { + u32 reg = iova & ~0xfff; + writel_relaxed(reg, cb_base + ARM_SMMU_CB_ATS1PR_LO); + reg = ((u64)iova & ~0xfff) >> 32; + writel_relaxed(reg, cb_base + ARM_SMMU_CB_ATS1PR_HI); + } + + if (readl_poll_timeout_atomic(cb_base + ARM_SMMU_CB_ATSR, tmp, + !(tmp & ATSR_ACTIVE), 5, 50)) { + dev_err(dev, + "iova to phys timed out on 0x%pad. Falling back to software table walk.\n", + &iova); + return ops->iova_to_phys(ops, iova); + } + + phys = readl_relaxed(cb_base + ARM_SMMU_CB_PAR_LO); + phys |= ((u64)readl_relaxed(cb_base + ARM_SMMU_CB_PAR_HI)) << 32; + + if (phys & CB_PAR_F) { + dev_err(dev, "translation fault!\n"); + dev_err(dev, "PAR = 0x%llx\n", phys); + return 0; + } + + return (phys & GENMASK_ULL(39, 12)) | (iova & 0xfff); +} + +static phys_addr_t arm_smmu_iova_to_phys(struct iommu_domain *domain, + dma_addr_t iova) +{ + phys_addr_t ret; + unsigned long flags; + struct arm_smmu_domain *smmu_domain = to_smmu_domain(domain); + struct io_pgtable_ops *ops= smmu_domain->pgtbl_ops; + + if (!ops) + return 0; + + spin_lock_irqsave(&smmu_domain->pgtbl_lock, flags); + if (smmu_domain->smmu->features & ARM_SMMU_FEAT_TRANS_OPS && + smmu_domain->stage == ARM_SMMU_DOMAIN_S1) { + ret = arm_smmu_iova_to_phys_hard(domain, iova); + } else { + ret = ops->iova_to_phys(ops, iova); + } + + spin_unlock_irqrestore(&smmu_domain->pgtbl_lock, flags); + + return ret; +} + +static bool arm_smmu_capable(enum iommu_cap cap) +{ + switch (cap) { + case IOMMU_CAP_CACHE_COHERENCY: + /* + * Return true here as the SMMU can always send out coherent + * requests. + */ + return true; + case IOMMU_CAP_INTR_REMAP: + return true; /* MSIs are just memory writes */ + case IOMMU_CAP_NOEXEC: + return true; + default: + return false; + } +} + +static int __arm_smmu_get_pci_sid(struct pci_dev *pdev, u16 alias, void *data) +{ + *((u16 *)data) = alias; + return 0; /* Continue walking */ +} + +static void __arm_smmu_release_pci_iommudata(void *data) +{ + kfree(data); +} + +static int arm_smmu_add_pci_device(struct pci_dev *pdev) +{ + int i, ret; + u16 sid; + struct iommu_group *group; + struct arm_smmu_master_cfg *cfg; + + group = iommu_group_get_for_dev(&pdev->dev); + if (IS_ERR(group)) + return PTR_ERR(group); + + cfg = iommu_group_get_iommudata(group); + if (!cfg) { + cfg = kzalloc(sizeof(*cfg), GFP_KERNEL); + if (!cfg) { + ret = -ENOMEM; + goto out_put_group; + } + + iommu_group_set_iommudata(group, cfg, + __arm_smmu_release_pci_iommudata); + } + + if (cfg->num_streamids >= MAX_MASTER_STREAMIDS) { + ret = -ENOSPC; + goto out_put_group; + } + + /* + * Assume Stream ID == Requester ID for now. + * We need a way to describe the ID mappings in FDT. + */ + pci_for_each_dma_alias(pdev, __arm_smmu_get_pci_sid, &sid); + for (i = 0; i < cfg->num_streamids; ++i) + if (cfg->streamids[i] == sid) + break; + + /* Avoid duplicate SIDs, as this can lead to SMR conflicts */ + if (i == cfg->num_streamids) + cfg->streamids[cfg->num_streamids++] = sid; + + return 0; +out_put_group: + iommu_group_put(group); + return ret; +} + +static int arm_smmu_add_platform_device(struct device *dev) +{ + struct iommu_group *group; + struct arm_smmu_master *master; + struct arm_smmu_device *smmu = find_smmu_for_device(dev); + + if (!smmu) + return -ENODEV; + + master = find_smmu_master(smmu, dev->of_node); + if (!master) + return -ENODEV; + + /* No automatic group creation for platform devices */ + group = iommu_group_alloc(); + if (IS_ERR(group)) + return PTR_ERR(group); + + iommu_group_set_iommudata(group, &master->cfg, NULL); + return iommu_group_add_device(group, dev); +} + +static int arm_smmu_add_device(struct device *dev) +{ + if (dev_is_pci(dev)) + return arm_smmu_add_pci_device(to_pci_dev(dev)); + + return arm_smmu_add_platform_device(dev); +} + +static void arm_smmu_remove_device(struct device *dev) +{ + iommu_group_remove_device(dev); +} + +static int arm_smmu_domain_get_attr(struct iommu_domain *domain, + enum iommu_attr attr, void *data) +{ + struct arm_smmu_domain *smmu_domain = to_smmu_domain(domain); + + switch (attr) { + case DOMAIN_ATTR_NESTING: + *(int *)data = (smmu_domain->stage == ARM_SMMU_DOMAIN_NESTED); + return 0; + default: + return -ENODEV; + } +} + +static int arm_smmu_domain_set_attr(struct iommu_domain *domain, + enum iommu_attr attr, void *data) +{ + int ret = 0; + struct arm_smmu_domain *smmu_domain = to_smmu_domain(domain); + + mutex_lock(&smmu_domain->init_mutex); + + switch (attr) { + case DOMAIN_ATTR_NESTING: + if (smmu_domain->smmu) { + ret = -EPERM; + goto out_unlock; + } + + if (*(int *)data) + smmu_domain->stage = ARM_SMMU_DOMAIN_NESTED; + else + smmu_domain->stage = ARM_SMMU_DOMAIN_S1; + + break; + default: + ret = -ENODEV; + } + +out_unlock: + mutex_unlock(&smmu_domain->init_mutex); + return ret; +} + +static struct iommu_ops arm_smmu_ops = { + .capable = arm_smmu_capable, + .domain_alloc = arm_smmu_domain_alloc, + .domain_free = arm_smmu_domain_free, + .attach_dev = arm_smmu_attach_dev, + .detach_dev = arm_smmu_detach_dev, + .map = arm_smmu_map, + .unmap = arm_smmu_unmap, + .map_sg = default_iommu_map_sg, + .iova_to_phys = arm_smmu_iova_to_phys, + .add_device = arm_smmu_add_device, + .remove_device = arm_smmu_remove_device, + .domain_get_attr = arm_smmu_domain_get_attr, + .domain_set_attr = arm_smmu_domain_set_attr, + .pgsize_bitmap = -1UL, /* Restricted during device attach */ +}; + +static void arm_smmu_device_reset(struct arm_smmu_device *smmu) +{ + void __iomem *gr0_base = ARM_SMMU_GR0(smmu); + void __iomem *cb_base; + int i = 0; + u32 reg; + + /* clear global FSR */ + reg = readl_relaxed(ARM_SMMU_GR0_NS(smmu) + ARM_SMMU_GR0_sGFSR); + writel(reg, ARM_SMMU_GR0_NS(smmu) + ARM_SMMU_GR0_sGFSR); + + /* Mark all SMRn as invalid and all S2CRn as bypass */ + for (i = 0; i < smmu->num_mapping_groups; ++i) { + writel_relaxed(0, gr0_base + ARM_SMMU_GR0_SMR(i)); + writel_relaxed(S2CR_TYPE_BYPASS, + gr0_base + ARM_SMMU_GR0_S2CR(i)); + } + + /* Make sure all context banks are disabled and clear CB_FSR */ + for (i = 0; i < smmu->num_context_banks; ++i) { + cb_base = ARM_SMMU_CB_BASE(smmu) + ARM_SMMU_CB(smmu, i); + writel_relaxed(0, cb_base + ARM_SMMU_CB_SCTLR); + writel_relaxed(FSR_FAULT, cb_base + ARM_SMMU_CB_FSR); + } + + /* Invalidate the TLB, just in case */ + writel_relaxed(0, gr0_base + ARM_SMMU_GR0_TLBIALLH); + writel_relaxed(0, gr0_base + ARM_SMMU_GR0_TLBIALLNSNH); + + reg = readl_relaxed(ARM_SMMU_GR0_NS(smmu) + ARM_SMMU_GR0_sCR0); + + /* Enable fault reporting */ + reg |= (sCR0_GFRE | sCR0_GFIE | sCR0_GCFGFRE | sCR0_GCFGFIE); + + /* Disable TLB broadcasting. */ + reg |= (sCR0_VMIDPNE | sCR0_PTM); + + /* Enable client access, but bypass when no mapping is found */ + reg &= ~(sCR0_CLIENTPD | sCR0_USFCFG); + + /* Disable forced broadcasting */ + reg &= ~sCR0_FB; + + /* Don't upgrade barriers */ + reg &= ~(sCR0_BSU_MASK << sCR0_BSU_SHIFT); + + /* Push the button */ + __arm_smmu_tlb_sync(smmu); + writel(reg, ARM_SMMU_GR0_NS(smmu) + ARM_SMMU_GR0_sCR0); +} + +static int arm_smmu_id_size_to_bits(int size) +{ + switch (size) { + case 0: + return 32; + case 1: + return 36; + case 2: + return 40; + case 3: + return 42; + case 4: + return 44; + case 5: + default: + return 48; + } +} + +static int arm_smmu_device_cfg_probe(struct arm_smmu_device *smmu) +{ + unsigned long size; + void __iomem *gr0_base = ARM_SMMU_GR0(smmu); + u32 id; + + dev_notice(smmu->dev, "probing hardware configuration...\n"); + dev_notice(smmu->dev, "SMMUv%d with:\n", smmu->version); + + /* ID0 */ + id = readl_relaxed(gr0_base + ARM_SMMU_GR0_ID0); + + /* Restrict available stages based on module parameter */ + if (force_stage == 1) + id &= ~(ID0_S2TS | ID0_NTS); + else if (force_stage == 2) + id &= ~(ID0_S1TS | ID0_NTS); + + if (id & ID0_S1TS) { + smmu->features |= ARM_SMMU_FEAT_TRANS_S1; + dev_notice(smmu->dev, "\tstage 1 translation\n"); + } + + if (id & ID0_S2TS) { + smmu->features |= ARM_SMMU_FEAT_TRANS_S2; + dev_notice(smmu->dev, "\tstage 2 translation\n"); + } + + if (id & ID0_NTS) { + smmu->features |= ARM_SMMU_FEAT_TRANS_NESTED; + dev_notice(smmu->dev, "\tnested translation\n"); + } + + if (!(smmu->features & + (ARM_SMMU_FEAT_TRANS_S1 | ARM_SMMU_FEAT_TRANS_S2))) { + dev_err(smmu->dev, "\tno translation support!\n"); + return -ENODEV; + } + + if ((id & ID0_S1TS) && ((smmu->version == 1) || !(id & ID0_ATOSNS))) { + smmu->features |= ARM_SMMU_FEAT_TRANS_OPS; + dev_notice(smmu->dev, "\taddress translation ops\n"); + } + + if (id & ID0_CTTW) { + smmu->features |= ARM_SMMU_FEAT_COHERENT_WALK; + dev_notice(smmu->dev, "\tcoherent table walk\n"); + } + + if (id & ID0_SMS) { + u32 smr, sid, mask; + + smmu->features |= ARM_SMMU_FEAT_STREAM_MATCH; + smmu->num_mapping_groups = (id >> ID0_NUMSMRG_SHIFT) & + ID0_NUMSMRG_MASK; + if (smmu->num_mapping_groups == 0) { + dev_err(smmu->dev, + "stream-matching supported, but no SMRs present!\n"); + return -ENODEV; + } + + smr = SMR_MASK_MASK << SMR_MASK_SHIFT; + smr |= (SMR_ID_MASK << SMR_ID_SHIFT); + writel_relaxed(smr, gr0_base + ARM_SMMU_GR0_SMR(0)); + smr = readl_relaxed(gr0_base + ARM_SMMU_GR0_SMR(0)); + + mask = (smr >> SMR_MASK_SHIFT) & SMR_MASK_MASK; + sid = (smr >> SMR_ID_SHIFT) & SMR_ID_MASK; + if ((mask & sid) != sid) { + dev_err(smmu->dev, + "SMR mask bits (0x%x) insufficient for ID field (0x%x)\n", + mask, sid); + return -ENODEV; + } + + dev_notice(smmu->dev, + "\tstream matching with %u register groups, mask 0x%x", + smmu->num_mapping_groups, mask); + } else { + smmu->num_mapping_groups = (id >> ID0_NUMSIDB_SHIFT) & + ID0_NUMSIDB_MASK; + } + + /* ID1 */ + id = readl_relaxed(gr0_base + ARM_SMMU_GR0_ID1); + smmu->pgshift = (id & ID1_PAGESIZE) ? 16 : 12; + + /* Check for size mismatch of SMMU address space from mapped region */ + size = 1 << (((id >> ID1_NUMPAGENDXB_SHIFT) & ID1_NUMPAGENDXB_MASK) + 1); + size *= 2 << smmu->pgshift; + if (smmu->size != size) + dev_warn(smmu->dev, + "SMMU address space size (0x%lx) differs from mapped region size (0x%lx)!\n", + size, smmu->size); + + smmu->num_s2_context_banks = (id >> ID1_NUMS2CB_SHIFT) & ID1_NUMS2CB_MASK; + smmu->num_context_banks = (id >> ID1_NUMCB_SHIFT) & ID1_NUMCB_MASK; + if (smmu->num_s2_context_banks > smmu->num_context_banks) { + dev_err(smmu->dev, "impossible number of S2 context banks!\n"); + return -ENODEV; + } + dev_notice(smmu->dev, "\t%u context banks (%u stage-2 only)\n", + smmu->num_context_banks, smmu->num_s2_context_banks); + + /* ID2 */ + id = readl_relaxed(gr0_base + ARM_SMMU_GR0_ID2); + size = arm_smmu_id_size_to_bits((id >> ID2_IAS_SHIFT) & ID2_IAS_MASK); + smmu->ipa_size = size; + + /* The output mask is also applied for bypass */ + size = arm_smmu_id_size_to_bits((id >> ID2_OAS_SHIFT) & ID2_OAS_MASK); + smmu->pa_size = size; + + /* + * What the page table walker can address actually depends on which + * descriptor format is in use, but since a) we don't know that yet, + * and b) it can vary per context bank, this will have to do... + */ + if (dma_set_mask_and_coherent(smmu->dev, DMA_BIT_MASK(size))) + dev_warn(smmu->dev, + "failed to set DMA mask for table walker\n"); + + if (smmu->version == ARM_SMMU_V1) { + smmu->va_size = smmu->ipa_size; + size = SZ_4K | SZ_2M | SZ_1G; + } else { + size = (id >> ID2_UBS_SHIFT) & ID2_UBS_MASK; + smmu->va_size = arm_smmu_id_size_to_bits(size); +#ifndef CONFIG_64BIT + smmu->va_size = min(32UL, smmu->va_size); +#endif + size = 0; + if (id & ID2_PTFS_4K) + size |= SZ_4K | SZ_2M | SZ_1G; + if (id & ID2_PTFS_16K) + size |= SZ_16K | SZ_32M; + if (id & ID2_PTFS_64K) + size |= SZ_64K | SZ_512M; + } + + arm_smmu_ops.pgsize_bitmap &= size; + dev_notice(smmu->dev, "\tSupported page sizes: 0x%08lx\n", size); + + if (smmu->features & ARM_SMMU_FEAT_TRANS_S1) + dev_notice(smmu->dev, "\tStage-1: %lu-bit VA -> %lu-bit IPA\n", + smmu->va_size, smmu->ipa_size); + + if (smmu->features & ARM_SMMU_FEAT_TRANS_S2) + dev_notice(smmu->dev, "\tStage-2: %lu-bit IPA -> %lu-bit PA\n", + smmu->ipa_size, smmu->pa_size); + + return 0; +} + +static const struct of_device_id arm_smmu_of_match[] = { + { .compatible = "arm,smmu-v1", .data = (void *)ARM_SMMU_V1 }, + { .compatible = "arm,smmu-v2", .data = (void *)ARM_SMMU_V2 }, + { .compatible = "arm,mmu-400", .data = (void *)ARM_SMMU_V1 }, + { .compatible = "arm,mmu-401", .data = (void *)ARM_SMMU_V1 }, + { .compatible = "arm,mmu-500", .data = (void *)ARM_SMMU_V2 }, + { }, +}; +MODULE_DEVICE_TABLE(of, arm_smmu_of_match); + +static int arm_smmu_device_dt_probe(struct platform_device *pdev) +{ + const struct of_device_id *of_id; + struct resource *res; + struct arm_smmu_device *smmu; + struct device *dev = &pdev->dev; + struct rb_node *node; + struct of_phandle_args masterspec; + int num_irqs, i, err; + + smmu = devm_kzalloc(dev, sizeof(*smmu), GFP_KERNEL); + if (!smmu) { + dev_err(dev, "failed to allocate arm_smmu_device\n"); + return -ENOMEM; + } + smmu->dev = dev; + + of_id = of_match_node(arm_smmu_of_match, dev->of_node); + smmu->version = (enum arm_smmu_arch_version)of_id->data; + + res = platform_get_resource(pdev, IORESOURCE_MEM, 0); + smmu->base = devm_ioremap_resource(dev, res); + if (IS_ERR(smmu->base)) + return PTR_ERR(smmu->base); + smmu->size = resource_size(res); + + if (of_property_read_u32(dev->of_node, "#global-interrupts", + &smmu->num_global_irqs)) { + dev_err(dev, "missing #global-interrupts property\n"); + return -ENODEV; + } + + num_irqs = 0; + while ((res = platform_get_resource(pdev, IORESOURCE_IRQ, num_irqs))) { + num_irqs++; + if (num_irqs > smmu->num_global_irqs) + smmu->num_context_irqs++; + } + + if (!smmu->num_context_irqs) { + dev_err(dev, "found %d interrupts but expected at least %d\n", + num_irqs, smmu->num_global_irqs + 1); + return -ENODEV; + } + + smmu->irqs = devm_kzalloc(dev, sizeof(*smmu->irqs) * num_irqs, + GFP_KERNEL); + if (!smmu->irqs) { + dev_err(dev, "failed to allocate %d irqs\n", num_irqs); + return -ENOMEM; + } + + for (i = 0; i < num_irqs; ++i) { + int irq = platform_get_irq(pdev, i); + + if (irq < 0) { + dev_err(dev, "failed to get irq index %d\n", i); + return -ENODEV; + } + smmu->irqs[i] = irq; + } + + err = arm_smmu_device_cfg_probe(smmu); + if (err) + return err; + + i = 0; + smmu->masters = RB_ROOT; + while (!of_parse_phandle_with_args(dev->of_node, "mmu-masters", + "#stream-id-cells", i, + &masterspec)) { + err = register_smmu_master(smmu, dev, &masterspec); + if (err) { + dev_err(dev, "failed to add master %s\n", + masterspec.np->name); + goto out_put_masters; + } + + i++; + } + dev_notice(dev, "registered %d master devices\n", i); + + parse_driver_options(smmu); + + if (smmu->version > ARM_SMMU_V1 && + smmu->num_context_banks != smmu->num_context_irqs) { + dev_err(dev, + "found only %d context interrupt(s) but %d required\n", + smmu->num_context_irqs, smmu->num_context_banks); + err = -ENODEV; + goto out_put_masters; + } + + for (i = 0; i < smmu->num_global_irqs; ++i) { + err = request_irq(smmu->irqs[i], + arm_smmu_global_fault, + IRQF_SHARED, + "arm-smmu global fault", + smmu); + if (err) { + dev_err(dev, "failed to request global IRQ %d (%u)\n", + i, smmu->irqs[i]); + goto out_free_irqs; + } + } + + INIT_LIST_HEAD(&smmu->list); + spin_lock(&arm_smmu_devices_lock); + list_add(&smmu->list, &arm_smmu_devices); + spin_unlock(&arm_smmu_devices_lock); + + arm_smmu_device_reset(smmu); + return 0; + +out_free_irqs: + while (i--) + free_irq(smmu->irqs[i], smmu); + +out_put_masters: + for (node = rb_first(&smmu->masters); node; node = rb_next(node)) { + struct arm_smmu_master *master + = container_of(node, struct arm_smmu_master, node); + of_node_put(master->of_node); + } + + return err; +} + +static int arm_smmu_device_remove(struct platform_device *pdev) +{ + int i; + struct device *dev = &pdev->dev; + struct arm_smmu_device *curr, *smmu = NULL; + struct rb_node *node; + + spin_lock(&arm_smmu_devices_lock); + list_for_each_entry(curr, &arm_smmu_devices, list) { + if (curr->dev == dev) { + smmu = curr; + list_del(&smmu->list); + break; + } + } + spin_unlock(&arm_smmu_devices_lock); + + if (!smmu) + return -ENODEV; + + for (node = rb_first(&smmu->masters); node; node = rb_next(node)) { + struct arm_smmu_master *master + = container_of(node, struct arm_smmu_master, node); + of_node_put(master->of_node); + } + + if (!bitmap_empty(smmu->context_map, ARM_SMMU_MAX_CBS)) + dev_err(dev, "removing device with active domains!\n"); + + for (i = 0; i < smmu->num_global_irqs; ++i) + free_irq(smmu->irqs[i], smmu); + + /* Turn the thing off */ + writel(sCR0_CLIENTPD, ARM_SMMU_GR0_NS(smmu) + ARM_SMMU_GR0_sCR0); + return 0; +} + +static struct platform_driver arm_smmu_driver = { + .driver = { + .name = "arm-smmu", + .of_match_table = of_match_ptr(arm_smmu_of_match), + }, + .probe = arm_smmu_device_dt_probe, + .remove = arm_smmu_device_remove, +}; + +static int __init arm_smmu_init(void) +{ + struct device_node *np; + int ret; + + /* + * Play nice with systems that don't have an ARM SMMU by checking that + * an ARM SMMU exists in the system before proceeding with the driver + * and IOMMU bus operation registration. + */ + np = of_find_matching_node(NULL, arm_smmu_of_match); + if (!np) + return 0; + + of_node_put(np); + + ret = platform_driver_register(&arm_smmu_driver); + if (ret) + return ret; + + /* Oh, for a proper bus abstraction */ + if (!iommu_present(&platform_bus_type)) + bus_set_iommu(&platform_bus_type, &arm_smmu_ops); + +#ifdef CONFIG_ARM_AMBA + if (!iommu_present(&amba_bustype)) + bus_set_iommu(&amba_bustype, &arm_smmu_ops); +#endif + +#ifdef CONFIG_PCI + if (!iommu_present(&pci_bus_type)) + bus_set_iommu(&pci_bus_type, &arm_smmu_ops); +#endif + + return 0; +} + +static void __exit arm_smmu_exit(void) +{ + return platform_driver_unregister(&arm_smmu_driver); +} + +subsys_initcall(arm_smmu_init); +module_exit(arm_smmu_exit); + +MODULE_DESCRIPTION("IOMMU API for ARM architected SMMU implementations"); +MODULE_AUTHOR("Will Deacon <will.deacon@arm.com>"); +MODULE_LICENSE("GPL v2"); diff --git a/drivers/iommu/dmar.c b/drivers/iommu/dmar.c new file mode 100644 index 000000000..984761308 --- /dev/null +++ b/drivers/iommu/dmar.c @@ -0,0 +1,2000 @@ +/* + * Copyright (c) 2006, Intel Corporation. + * + * This program is free software; you can redistribute it and/or modify it + * under the terms and conditions of the GNU General Public License, + * version 2, as published by the Free Software Foundation. + * + * This program is distributed in the hope it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for + * more details. + * + * You should have received a copy of the GNU General Public License along with + * this program; if not, write to the Free Software Foundation, Inc., 59 Temple + * Place - Suite 330, Boston, MA 02111-1307 USA. + * + * Copyright (C) 2006-2008 Intel Corporation + * Author: Ashok Raj <ashok.raj@intel.com> + * Author: Shaohua Li <shaohua.li@intel.com> + * Author: Anil S Keshavamurthy <anil.s.keshavamurthy@intel.com> + * + * This file implements early detection/parsing of Remapping Devices + * reported to OS through BIOS via DMA remapping reporting (DMAR) ACPI + * tables. + * + * These routines are used by both DMA-remapping and Interrupt-remapping + */ + +#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt /* has to precede printk.h */ + +#include <linux/pci.h> +#include <linux/dmar.h> +#include <linux/iova.h> +#include <linux/intel-iommu.h> +#include <linux/timer.h> +#include <linux/irq.h> +#include <linux/interrupt.h> +#include <linux/tboot.h> +#include <linux/dmi.h> +#include <linux/slab.h> +#include <linux/iommu.h> +#include <asm/irq_remapping.h> +#include <asm/iommu_table.h> + +#include "irq_remapping.h" + +typedef int (*dmar_res_handler_t)(struct acpi_dmar_header *, void *); +struct dmar_res_callback { + dmar_res_handler_t cb[ACPI_DMAR_TYPE_RESERVED]; + void *arg[ACPI_DMAR_TYPE_RESERVED]; + bool ignore_unhandled; + bool print_entry; +}; + +/* + * Assumptions: + * 1) The hotplug framework guarentees that DMAR unit will be hot-added + * before IO devices managed by that unit. + * 2) The hotplug framework guarantees that DMAR unit will be hot-removed + * after IO devices managed by that unit. + * 3) Hotplug events are rare. + * + * Locking rules for DMA and interrupt remapping related global data structures: + * 1) Use dmar_global_lock in process context + * 2) Use RCU in interrupt context + */ +DECLARE_RWSEM(dmar_global_lock); +LIST_HEAD(dmar_drhd_units); + +struct acpi_table_header * __initdata dmar_tbl; +static acpi_size dmar_tbl_size; +static int dmar_dev_scope_status = 1; +static unsigned long dmar_seq_ids[BITS_TO_LONGS(DMAR_UNITS_SUPPORTED)]; + +static int alloc_iommu(struct dmar_drhd_unit *drhd); +static void free_iommu(struct intel_iommu *iommu); + +static void dmar_register_drhd_unit(struct dmar_drhd_unit *drhd) +{ + /* + * add INCLUDE_ALL at the tail, so scan the list will find it at + * the very end. + */ + if (drhd->include_all) + list_add_tail_rcu(&drhd->list, &dmar_drhd_units); + else + list_add_rcu(&drhd->list, &dmar_drhd_units); +} + +void *dmar_alloc_dev_scope(void *start, void *end, int *cnt) +{ + struct acpi_dmar_device_scope *scope; + + *cnt = 0; + while (start < end) { + scope = start; + if (scope->entry_type == ACPI_DMAR_SCOPE_TYPE_NAMESPACE || + scope->entry_type == ACPI_DMAR_SCOPE_TYPE_ENDPOINT || + scope->entry_type == ACPI_DMAR_SCOPE_TYPE_BRIDGE) + (*cnt)++; + else if (scope->entry_type != ACPI_DMAR_SCOPE_TYPE_IOAPIC && + scope->entry_type != ACPI_DMAR_SCOPE_TYPE_HPET) { + pr_warn("Unsupported device scope\n"); + } + start += scope->length; + } + if (*cnt == 0) + return NULL; + + return kcalloc(*cnt, sizeof(struct dmar_dev_scope), GFP_KERNEL); +} + +void dmar_free_dev_scope(struct dmar_dev_scope **devices, int *cnt) +{ + int i; + struct device *tmp_dev; + + if (*devices && *cnt) { + for_each_active_dev_scope(*devices, *cnt, i, tmp_dev) + put_device(tmp_dev); + kfree(*devices); + } + + *devices = NULL; + *cnt = 0; +} + +/* Optimize out kzalloc()/kfree() for normal cases */ +static char dmar_pci_notify_info_buf[64]; + +static struct dmar_pci_notify_info * +dmar_alloc_pci_notify_info(struct pci_dev *dev, unsigned long event) +{ + int level = 0; + size_t size; + struct pci_dev *tmp; + struct dmar_pci_notify_info *info; + + BUG_ON(dev->is_virtfn); + + /* Only generate path[] for device addition event */ + if (event == BUS_NOTIFY_ADD_DEVICE) + for (tmp = dev; tmp; tmp = tmp->bus->self) + level++; + + size = sizeof(*info) + level * sizeof(struct acpi_dmar_pci_path); + if (size <= sizeof(dmar_pci_notify_info_buf)) { + info = (struct dmar_pci_notify_info *)dmar_pci_notify_info_buf; + } else { + info = kzalloc(size, GFP_KERNEL); + if (!info) { + pr_warn("Out of memory when allocating notify_info " + "for %s.\n", pci_name(dev)); + if (dmar_dev_scope_status == 0) + dmar_dev_scope_status = -ENOMEM; + return NULL; + } + } + + info->event = event; + info->dev = dev; + info->seg = pci_domain_nr(dev->bus); + info->level = level; + if (event == BUS_NOTIFY_ADD_DEVICE) { + for (tmp = dev; tmp; tmp = tmp->bus->self) { + level--; + info->path[level].bus = tmp->bus->number; + info->path[level].device = PCI_SLOT(tmp->devfn); + info->path[level].function = PCI_FUNC(tmp->devfn); + if (pci_is_root_bus(tmp->bus)) + info->bus = tmp->bus->number; + } + } + + return info; +} + +static inline void dmar_free_pci_notify_info(struct dmar_pci_notify_info *info) +{ + if ((void *)info != dmar_pci_notify_info_buf) + kfree(info); +} + +static bool dmar_match_pci_path(struct dmar_pci_notify_info *info, int bus, + struct acpi_dmar_pci_path *path, int count) +{ + int i; + + if (info->bus != bus) + goto fallback; + if (info->level != count) + goto fallback; + + for (i = 0; i < count; i++) { + if (path[i].device != info->path[i].device || + path[i].function != info->path[i].function) + goto fallback; + } + + return true; + +fallback: + + if (count != 1) + return false; + + i = info->level - 1; + if (bus == info->path[i].bus && + path[0].device == info->path[i].device && + path[0].function == info->path[i].function) { + pr_info(FW_BUG "RMRR entry for device %02x:%02x.%x is broken - applying workaround\n", + bus, path[0].device, path[0].function); + return true; + } + + return false; +} + +/* Return: > 0 if match found, 0 if no match found, < 0 if error happens */ +int dmar_insert_dev_scope(struct dmar_pci_notify_info *info, + void *start, void*end, u16 segment, + struct dmar_dev_scope *devices, + int devices_cnt) +{ + int i, level; + struct device *tmp, *dev = &info->dev->dev; + struct acpi_dmar_device_scope *scope; + struct acpi_dmar_pci_path *path; + + if (segment != info->seg) + return 0; + + for (; start < end; start += scope->length) { + scope = start; + if (scope->entry_type != ACPI_DMAR_SCOPE_TYPE_ENDPOINT && + scope->entry_type != ACPI_DMAR_SCOPE_TYPE_BRIDGE) + continue; + + path = (struct acpi_dmar_pci_path *)(scope + 1); + level = (scope->length - sizeof(*scope)) / sizeof(*path); + if (!dmar_match_pci_path(info, scope->bus, path, level)) + continue; + + if ((scope->entry_type == ACPI_DMAR_SCOPE_TYPE_ENDPOINT) ^ + (info->dev->hdr_type == PCI_HEADER_TYPE_NORMAL)) { + pr_warn("Device scope type does not match for %s\n", + pci_name(info->dev)); + return -EINVAL; + } + + for_each_dev_scope(devices, devices_cnt, i, tmp) + if (tmp == NULL) { + devices[i].bus = info->dev->bus->number; + devices[i].devfn = info->dev->devfn; + rcu_assign_pointer(devices[i].dev, + get_device(dev)); + return 1; + } + BUG_ON(i >= devices_cnt); + } + + return 0; +} + +int dmar_remove_dev_scope(struct dmar_pci_notify_info *info, u16 segment, + struct dmar_dev_scope *devices, int count) +{ + int index; + struct device *tmp; + + if (info->seg != segment) + return 0; + + for_each_active_dev_scope(devices, count, index, tmp) + if (tmp == &info->dev->dev) { + RCU_INIT_POINTER(devices[index].dev, NULL); + synchronize_rcu(); + put_device(tmp); + return 1; + } + + return 0; +} + +static int dmar_pci_bus_add_dev(struct dmar_pci_notify_info *info) +{ + int ret = 0; + struct dmar_drhd_unit *dmaru; + struct acpi_dmar_hardware_unit *drhd; + + for_each_drhd_unit(dmaru) { + if (dmaru->include_all) + continue; + + drhd = container_of(dmaru->hdr, + struct acpi_dmar_hardware_unit, header); + ret = dmar_insert_dev_scope(info, (void *)(drhd + 1), + ((void *)drhd) + drhd->header.length, + dmaru->segment, + dmaru->devices, dmaru->devices_cnt); + if (ret != 0) + break; + } + if (ret >= 0) + ret = dmar_iommu_notify_scope_dev(info); + if (ret < 0 && dmar_dev_scope_status == 0) + dmar_dev_scope_status = ret; + + return ret; +} + +static void dmar_pci_bus_del_dev(struct dmar_pci_notify_info *info) +{ + struct dmar_drhd_unit *dmaru; + + for_each_drhd_unit(dmaru) + if (dmar_remove_dev_scope(info, dmaru->segment, + dmaru->devices, dmaru->devices_cnt)) + break; + dmar_iommu_notify_scope_dev(info); +} + +static int dmar_pci_bus_notifier(struct notifier_block *nb, + unsigned long action, void *data) +{ + struct pci_dev *pdev = to_pci_dev(data); + struct dmar_pci_notify_info *info; + + /* Only care about add/remove events for physical functions */ + if (pdev->is_virtfn) + return NOTIFY_DONE; + if (action != BUS_NOTIFY_ADD_DEVICE && action != BUS_NOTIFY_DEL_DEVICE) + return NOTIFY_DONE; + + info = dmar_alloc_pci_notify_info(pdev, action); + if (!info) + return NOTIFY_DONE; + + down_write(&dmar_global_lock); + if (action == BUS_NOTIFY_ADD_DEVICE) + dmar_pci_bus_add_dev(info); + else if (action == BUS_NOTIFY_DEL_DEVICE) + dmar_pci_bus_del_dev(info); + up_write(&dmar_global_lock); + + dmar_free_pci_notify_info(info); + + return NOTIFY_OK; +} + +static struct notifier_block dmar_pci_bus_nb = { + .notifier_call = dmar_pci_bus_notifier, + .priority = INT_MIN, +}; + +static struct dmar_drhd_unit * +dmar_find_dmaru(struct acpi_dmar_hardware_unit *drhd) +{ + struct dmar_drhd_unit *dmaru; + + list_for_each_entry_rcu(dmaru, &dmar_drhd_units, list) + if (dmaru->segment == drhd->segment && + dmaru->reg_base_addr == drhd->address) + return dmaru; + + return NULL; +} + +/** + * dmar_parse_one_drhd - parses exactly one DMA remapping hardware definition + * structure which uniquely represent one DMA remapping hardware unit + * present in the platform + */ +static int dmar_parse_one_drhd(struct acpi_dmar_header *header, void *arg) +{ + struct acpi_dmar_hardware_unit *drhd; + struct dmar_drhd_unit *dmaru; + int ret = 0; + + drhd = (struct acpi_dmar_hardware_unit *)header; + dmaru = dmar_find_dmaru(drhd); + if (dmaru) + goto out; + + dmaru = kzalloc(sizeof(*dmaru) + header->length, GFP_KERNEL); + if (!dmaru) + return -ENOMEM; + + /* + * If header is allocated from slab by ACPI _DSM method, we need to + * copy the content because the memory buffer will be freed on return. + */ + dmaru->hdr = (void *)(dmaru + 1); + memcpy(dmaru->hdr, header, header->length); + dmaru->reg_base_addr = drhd->address; + dmaru->segment = drhd->segment; + dmaru->include_all = drhd->flags & 0x1; /* BIT0: INCLUDE_ALL */ + dmaru->devices = dmar_alloc_dev_scope((void *)(drhd + 1), + ((void *)drhd) + drhd->header.length, + &dmaru->devices_cnt); + if (dmaru->devices_cnt && dmaru->devices == NULL) { + kfree(dmaru); + return -ENOMEM; + } + + ret = alloc_iommu(dmaru); + if (ret) { + dmar_free_dev_scope(&dmaru->devices, + &dmaru->devices_cnt); + kfree(dmaru); + return ret; + } + dmar_register_drhd_unit(dmaru); + +out: + if (arg) + (*(int *)arg)++; + + return 0; +} + +static void dmar_free_drhd(struct dmar_drhd_unit *dmaru) +{ + if (dmaru->devices && dmaru->devices_cnt) + dmar_free_dev_scope(&dmaru->devices, &dmaru->devices_cnt); + if (dmaru->iommu) + free_iommu(dmaru->iommu); + kfree(dmaru); +} + +static int __init dmar_parse_one_andd(struct acpi_dmar_header *header, + void *arg) +{ + struct acpi_dmar_andd *andd = (void *)header; + + /* Check for NUL termination within the designated length */ + if (strnlen(andd->device_name, header->length - 8) == header->length - 8) { + WARN_TAINT(1, TAINT_FIRMWARE_WORKAROUND, + "Your BIOS is broken; ANDD object name is not NUL-terminated\n" + "BIOS vendor: %s; Ver: %s; Product Version: %s\n", + dmi_get_system_info(DMI_BIOS_VENDOR), + dmi_get_system_info(DMI_BIOS_VERSION), + dmi_get_system_info(DMI_PRODUCT_VERSION)); + return -EINVAL; + } + pr_info("ANDD device: %x name: %s\n", andd->device_number, + andd->device_name); + + return 0; +} + +#ifdef CONFIG_ACPI_NUMA +static int dmar_parse_one_rhsa(struct acpi_dmar_header *header, void *arg) +{ + struct acpi_dmar_rhsa *rhsa; + struct dmar_drhd_unit *drhd; + + rhsa = (struct acpi_dmar_rhsa *)header; + for_each_drhd_unit(drhd) { + if (drhd->reg_base_addr == rhsa->base_address) { + int node = acpi_map_pxm_to_node(rhsa->proximity_domain); + + if (!node_online(node)) + node = -1; + drhd->iommu->node = node; + return 0; + } + } + WARN_TAINT( + 1, TAINT_FIRMWARE_WORKAROUND, + "Your BIOS is broken; RHSA refers to non-existent DMAR unit at %llx\n" + "BIOS vendor: %s; Ver: %s; Product Version: %s\n", + drhd->reg_base_addr, + dmi_get_system_info(DMI_BIOS_VENDOR), + dmi_get_system_info(DMI_BIOS_VERSION), + dmi_get_system_info(DMI_PRODUCT_VERSION)); + + return 0; +} +#else +#define dmar_parse_one_rhsa dmar_res_noop +#endif + +static void __init +dmar_table_print_dmar_entry(struct acpi_dmar_header *header) +{ + struct acpi_dmar_hardware_unit *drhd; + struct acpi_dmar_reserved_memory *rmrr; + struct acpi_dmar_atsr *atsr; + struct acpi_dmar_rhsa *rhsa; + + switch (header->type) { + case ACPI_DMAR_TYPE_HARDWARE_UNIT: + drhd = container_of(header, struct acpi_dmar_hardware_unit, + header); + pr_info("DRHD base: %#016Lx flags: %#x\n", + (unsigned long long)drhd->address, drhd->flags); + break; + case ACPI_DMAR_TYPE_RESERVED_MEMORY: + rmrr = container_of(header, struct acpi_dmar_reserved_memory, + header); + pr_info("RMRR base: %#016Lx end: %#016Lx\n", + (unsigned long long)rmrr->base_address, + (unsigned long long)rmrr->end_address); + break; + case ACPI_DMAR_TYPE_ROOT_ATS: + atsr = container_of(header, struct acpi_dmar_atsr, header); + pr_info("ATSR flags: %#x\n", atsr->flags); + break; + case ACPI_DMAR_TYPE_HARDWARE_AFFINITY: + rhsa = container_of(header, struct acpi_dmar_rhsa, header); + pr_info("RHSA base: %#016Lx proximity domain: %#x\n", + (unsigned long long)rhsa->base_address, + rhsa->proximity_domain); + break; + case ACPI_DMAR_TYPE_NAMESPACE: + /* We don't print this here because we need to sanity-check + it first. So print it in dmar_parse_one_andd() instead. */ + break; + } +} + +/** + * dmar_table_detect - checks to see if the platform supports DMAR devices + */ +static int __init dmar_table_detect(void) +{ + acpi_status status = AE_OK; + + /* if we could find DMAR table, then there are DMAR devices */ + status = acpi_get_table_with_size(ACPI_SIG_DMAR, 0, + (struct acpi_table_header **)&dmar_tbl, + &dmar_tbl_size); + + if (ACPI_SUCCESS(status) && !dmar_tbl) { + pr_warn("Unable to map DMAR\n"); + status = AE_NOT_FOUND; + } + + return (ACPI_SUCCESS(status) ? 1 : 0); +} + +static int dmar_walk_remapping_entries(struct acpi_dmar_header *start, + size_t len, struct dmar_res_callback *cb) +{ + int ret = 0; + struct acpi_dmar_header *iter, *next; + struct acpi_dmar_header *end = ((void *)start) + len; + + for (iter = start; iter < end && ret == 0; iter = next) { + next = (void *)iter + iter->length; + if (iter->length == 0) { + /* Avoid looping forever on bad ACPI tables */ + pr_debug(FW_BUG "Invalid 0-length structure\n"); + break; + } else if (next > end) { + /* Avoid passing table end */ + pr_warn(FW_BUG "record passes table end\n"); + ret = -EINVAL; + break; + } + + if (cb->print_entry) + dmar_table_print_dmar_entry(iter); + + if (iter->type >= ACPI_DMAR_TYPE_RESERVED) { + /* continue for forward compatibility */ + pr_debug("Unknown DMAR structure type %d\n", + iter->type); + } else if (cb->cb[iter->type]) { + ret = cb->cb[iter->type](iter, cb->arg[iter->type]); + } else if (!cb->ignore_unhandled) { + pr_warn("No handler for DMAR structure type %d\n", + iter->type); + ret = -EINVAL; + } + } + + return ret; +} + +static inline int dmar_walk_dmar_table(struct acpi_table_dmar *dmar, + struct dmar_res_callback *cb) +{ + return dmar_walk_remapping_entries((void *)(dmar + 1), + dmar->header.length - sizeof(*dmar), cb); +} + +/** + * parse_dmar_table - parses the DMA reporting table + */ +static int __init +parse_dmar_table(void) +{ + struct acpi_table_dmar *dmar; + int ret = 0; + int drhd_count = 0; + struct dmar_res_callback cb = { + .print_entry = true, + .ignore_unhandled = true, + .arg[ACPI_DMAR_TYPE_HARDWARE_UNIT] = &drhd_count, + .cb[ACPI_DMAR_TYPE_HARDWARE_UNIT] = &dmar_parse_one_drhd, + .cb[ACPI_DMAR_TYPE_RESERVED_MEMORY] = &dmar_parse_one_rmrr, + .cb[ACPI_DMAR_TYPE_ROOT_ATS] = &dmar_parse_one_atsr, + .cb[ACPI_DMAR_TYPE_HARDWARE_AFFINITY] = &dmar_parse_one_rhsa, + .cb[ACPI_DMAR_TYPE_NAMESPACE] = &dmar_parse_one_andd, + }; + + /* + * Do it again, earlier dmar_tbl mapping could be mapped with + * fixed map. + */ + dmar_table_detect(); + + /* + * ACPI tables may not be DMA protected by tboot, so use DMAR copy + * SINIT saved in SinitMleData in TXT heap (which is DMA protected) + */ + dmar_tbl = tboot_get_dmar_table(dmar_tbl); + + dmar = (struct acpi_table_dmar *)dmar_tbl; + if (!dmar) + return -ENODEV; + + if (dmar->width < PAGE_SHIFT - 1) { + pr_warn("Invalid DMAR haw\n"); + return -EINVAL; + } + + pr_info("Host address width %d\n", dmar->width + 1); + ret = dmar_walk_dmar_table(dmar, &cb); + if (ret == 0 && drhd_count == 0) + pr_warn(FW_BUG "No DRHD structure found in DMAR table\n"); + + return ret; +} + +static int dmar_pci_device_match(struct dmar_dev_scope devices[], + int cnt, struct pci_dev *dev) +{ + int index; + struct device *tmp; + + while (dev) { + for_each_active_dev_scope(devices, cnt, index, tmp) + if (dev_is_pci(tmp) && dev == to_pci_dev(tmp)) + return 1; + + /* Check our parent */ + dev = dev->bus->self; + } + + return 0; +} + +struct dmar_drhd_unit * +dmar_find_matched_drhd_unit(struct pci_dev *dev) +{ + struct dmar_drhd_unit *dmaru; + struct acpi_dmar_hardware_unit *drhd; + + dev = pci_physfn(dev); + + rcu_read_lock(); + for_each_drhd_unit(dmaru) { + drhd = container_of(dmaru->hdr, + struct acpi_dmar_hardware_unit, + header); + + if (dmaru->include_all && + drhd->segment == pci_domain_nr(dev->bus)) + goto out; + + if (dmar_pci_device_match(dmaru->devices, + dmaru->devices_cnt, dev)) + goto out; + } + dmaru = NULL; +out: + rcu_read_unlock(); + + return dmaru; +} + +static void __init dmar_acpi_insert_dev_scope(u8 device_number, + struct acpi_device *adev) +{ + struct dmar_drhd_unit *dmaru; + struct acpi_dmar_hardware_unit *drhd; + struct acpi_dmar_device_scope *scope; + struct device *tmp; + int i; + struct acpi_dmar_pci_path *path; + + for_each_drhd_unit(dmaru) { + drhd = container_of(dmaru->hdr, + struct acpi_dmar_hardware_unit, + header); + + for (scope = (void *)(drhd + 1); + (unsigned long)scope < ((unsigned long)drhd) + drhd->header.length; + scope = ((void *)scope) + scope->length) { + if (scope->entry_type != ACPI_DMAR_SCOPE_TYPE_NAMESPACE) + continue; + if (scope->enumeration_id != device_number) + continue; + + path = (void *)(scope + 1); + pr_info("ACPI device \"%s\" under DMAR at %llx as %02x:%02x.%d\n", + dev_name(&adev->dev), dmaru->reg_base_addr, + scope->bus, path->device, path->function); + for_each_dev_scope(dmaru->devices, dmaru->devices_cnt, i, tmp) + if (tmp == NULL) { + dmaru->devices[i].bus = scope->bus; + dmaru->devices[i].devfn = PCI_DEVFN(path->device, + path->function); + rcu_assign_pointer(dmaru->devices[i].dev, + get_device(&adev->dev)); + return; + } + BUG_ON(i >= dmaru->devices_cnt); + } + } + pr_warn("No IOMMU scope found for ANDD enumeration ID %d (%s)\n", + device_number, dev_name(&adev->dev)); +} + +static int __init dmar_acpi_dev_scope_init(void) +{ + struct acpi_dmar_andd *andd; + + if (dmar_tbl == NULL) + return -ENODEV; + + for (andd = (void *)dmar_tbl + sizeof(struct acpi_table_dmar); + ((unsigned long)andd) < ((unsigned long)dmar_tbl) + dmar_tbl->length; + andd = ((void *)andd) + andd->header.length) { + if (andd->header.type == ACPI_DMAR_TYPE_NAMESPACE) { + acpi_handle h; + struct acpi_device *adev; + + if (!ACPI_SUCCESS(acpi_get_handle(ACPI_ROOT_OBJECT, + andd->device_name, + &h))) { + pr_err("Failed to find handle for ACPI object %s\n", + andd->device_name); + continue; + } + if (acpi_bus_get_device(h, &adev)) { + pr_err("Failed to get device for ACPI object %s\n", + andd->device_name); + continue; + } + dmar_acpi_insert_dev_scope(andd->device_number, adev); + } + } + return 0; +} + +int __init dmar_dev_scope_init(void) +{ + struct pci_dev *dev = NULL; + struct dmar_pci_notify_info *info; + + if (dmar_dev_scope_status != 1) + return dmar_dev_scope_status; + + if (list_empty(&dmar_drhd_units)) { + dmar_dev_scope_status = -ENODEV; + } else { + dmar_dev_scope_status = 0; + + dmar_acpi_dev_scope_init(); + + for_each_pci_dev(dev) { + if (dev->is_virtfn) + continue; + + info = dmar_alloc_pci_notify_info(dev, + BUS_NOTIFY_ADD_DEVICE); + if (!info) { + return dmar_dev_scope_status; + } else { + dmar_pci_bus_add_dev(info); + dmar_free_pci_notify_info(info); + } + } + + bus_register_notifier(&pci_bus_type, &dmar_pci_bus_nb); + } + + return dmar_dev_scope_status; +} + + +int __init dmar_table_init(void) +{ + static int dmar_table_initialized; + int ret; + + if (dmar_table_initialized == 0) { + ret = parse_dmar_table(); + if (ret < 0) { + if (ret != -ENODEV) + pr_info("parse DMAR table failure.\n"); + } else if (list_empty(&dmar_drhd_units)) { + pr_info("No DMAR devices found\n"); + ret = -ENODEV; + } + + if (ret < 0) + dmar_table_initialized = ret; + else + dmar_table_initialized = 1; + } + + return dmar_table_initialized < 0 ? dmar_table_initialized : 0; +} + +static void warn_invalid_dmar(u64 addr, const char *message) +{ + WARN_TAINT_ONCE( + 1, TAINT_FIRMWARE_WORKAROUND, + "Your BIOS is broken; DMAR reported at address %llx%s!\n" + "BIOS vendor: %s; Ver: %s; Product Version: %s\n", + addr, message, + dmi_get_system_info(DMI_BIOS_VENDOR), + dmi_get_system_info(DMI_BIOS_VERSION), + dmi_get_system_info(DMI_PRODUCT_VERSION)); +} + +static int __ref +dmar_validate_one_drhd(struct acpi_dmar_header *entry, void *arg) +{ + struct acpi_dmar_hardware_unit *drhd; + void __iomem *addr; + u64 cap, ecap; + + drhd = (void *)entry; + if (!drhd->address) { + warn_invalid_dmar(0, ""); + return -EINVAL; + } + + if (arg) + addr = ioremap(drhd->address, VTD_PAGE_SIZE); + else + addr = early_ioremap(drhd->address, VTD_PAGE_SIZE); + if (!addr) { + pr_warn("IOMMU: can't validate: %llx\n", drhd->address); + return -EINVAL; + } + + cap = dmar_readq(addr + DMAR_CAP_REG); + ecap = dmar_readq(addr + DMAR_ECAP_REG); + + if (arg) + iounmap(addr); + else + early_iounmap(addr, VTD_PAGE_SIZE); + + if (cap == (uint64_t)-1 && ecap == (uint64_t)-1) { + warn_invalid_dmar(drhd->address, " returns all ones"); + return -EINVAL; + } + + return 0; +} + +int __init detect_intel_iommu(void) +{ + int ret; + struct dmar_res_callback validate_drhd_cb = { + .cb[ACPI_DMAR_TYPE_HARDWARE_UNIT] = &dmar_validate_one_drhd, + .ignore_unhandled = true, + }; + + down_write(&dmar_global_lock); + ret = dmar_table_detect(); + if (ret) + ret = !dmar_walk_dmar_table((struct acpi_table_dmar *)dmar_tbl, + &validate_drhd_cb); + if (ret && !no_iommu && !iommu_detected && !dmar_disabled) { + iommu_detected = 1; + /* Make sure ACS will be enabled */ + pci_request_acs(); + } + +#ifdef CONFIG_X86 + if (ret) + x86_init.iommu.iommu_init = intel_iommu_init; +#endif + + early_acpi_os_unmap_memory((void __iomem *)dmar_tbl, dmar_tbl_size); + dmar_tbl = NULL; + up_write(&dmar_global_lock); + + return ret ? 1 : -ENODEV; +} + + +static void unmap_iommu(struct intel_iommu *iommu) +{ + iounmap(iommu->reg); + release_mem_region(iommu->reg_phys, iommu->reg_size); +} + +/** + * map_iommu: map the iommu's registers + * @iommu: the iommu to map + * @phys_addr: the physical address of the base resgister + * + * Memory map the iommu's registers. Start w/ a single page, and + * possibly expand if that turns out to be insufficent. + */ +static int map_iommu(struct intel_iommu *iommu, u64 phys_addr) +{ + int map_size, err=0; + + iommu->reg_phys = phys_addr; + iommu->reg_size = VTD_PAGE_SIZE; + + if (!request_mem_region(iommu->reg_phys, iommu->reg_size, iommu->name)) { + pr_err("IOMMU: can't reserve memory\n"); + err = -EBUSY; + goto out; + } + + iommu->reg = ioremap(iommu->reg_phys, iommu->reg_size); + if (!iommu->reg) { + pr_err("IOMMU: can't map the region\n"); + err = -ENOMEM; + goto release; + } + + iommu->cap = dmar_readq(iommu->reg + DMAR_CAP_REG); + iommu->ecap = dmar_readq(iommu->reg + DMAR_ECAP_REG); + + if (iommu->cap == (uint64_t)-1 && iommu->ecap == (uint64_t)-1) { + err = -EINVAL; + warn_invalid_dmar(phys_addr, " returns all ones"); + goto unmap; + } + + /* the registers might be more than one page */ + map_size = max_t(int, ecap_max_iotlb_offset(iommu->ecap), + cap_max_fault_reg_offset(iommu->cap)); + map_size = VTD_PAGE_ALIGN(map_size); + if (map_size > iommu->reg_size) { + iounmap(iommu->reg); + release_mem_region(iommu->reg_phys, iommu->reg_size); + iommu->reg_size = map_size; + if (!request_mem_region(iommu->reg_phys, iommu->reg_size, + iommu->name)) { + pr_err("IOMMU: can't reserve memory\n"); + err = -EBUSY; + goto out; + } + iommu->reg = ioremap(iommu->reg_phys, iommu->reg_size); + if (!iommu->reg) { + pr_err("IOMMU: can't map the region\n"); + err = -ENOMEM; + goto release; + } + } + err = 0; + goto out; + +unmap: + iounmap(iommu->reg); +release: + release_mem_region(iommu->reg_phys, iommu->reg_size); +out: + return err; +} + +static int dmar_alloc_seq_id(struct intel_iommu *iommu) +{ + iommu->seq_id = find_first_zero_bit(dmar_seq_ids, + DMAR_UNITS_SUPPORTED); + if (iommu->seq_id >= DMAR_UNITS_SUPPORTED) { + iommu->seq_id = -1; + } else { + set_bit(iommu->seq_id, dmar_seq_ids); + sprintf(iommu->name, "dmar%d", iommu->seq_id); + } + + return iommu->seq_id; +} + +static void dmar_free_seq_id(struct intel_iommu *iommu) +{ + if (iommu->seq_id >= 0) { + clear_bit(iommu->seq_id, dmar_seq_ids); + iommu->seq_id = -1; + } +} + +static int alloc_iommu(struct dmar_drhd_unit *drhd) +{ + struct intel_iommu *iommu; + u32 ver, sts; + int agaw = 0; + int msagaw = 0; + int err; + + if (!drhd->reg_base_addr) { + warn_invalid_dmar(0, ""); + return -EINVAL; + } + + iommu = kzalloc(sizeof(*iommu), GFP_KERNEL); + if (!iommu) + return -ENOMEM; + + if (dmar_alloc_seq_id(iommu) < 0) { + pr_err("IOMMU: failed to allocate seq_id\n"); + err = -ENOSPC; + goto error; + } + + err = map_iommu(iommu, drhd->reg_base_addr); + if (err) { + pr_err("IOMMU: failed to map %s\n", iommu->name); + goto error_free_seq_id; + } + + err = -EINVAL; + agaw = iommu_calculate_agaw(iommu); + if (agaw < 0) { + pr_err("Cannot get a valid agaw for iommu (seq_id = %d)\n", + iommu->seq_id); + goto err_unmap; + } + msagaw = iommu_calculate_max_sagaw(iommu); + if (msagaw < 0) { + pr_err("Cannot get a valid max agaw for iommu (seq_id = %d)\n", + iommu->seq_id); + goto err_unmap; + } + iommu->agaw = agaw; + iommu->msagaw = msagaw; + iommu->segment = drhd->segment; + + iommu->node = -1; + + ver = readl(iommu->reg + DMAR_VER_REG); + pr_info("IOMMU %d: reg_base_addr %llx ver %d:%d cap %llx ecap %llx\n", + iommu->seq_id, + (unsigned long long)drhd->reg_base_addr, + DMAR_VER_MAJOR(ver), DMAR_VER_MINOR(ver), + (unsigned long long)iommu->cap, + (unsigned long long)iommu->ecap); + + /* Reflect status in gcmd */ + sts = readl(iommu->reg + DMAR_GSTS_REG); + if (sts & DMA_GSTS_IRES) + iommu->gcmd |= DMA_GCMD_IRE; + if (sts & DMA_GSTS_TES) + iommu->gcmd |= DMA_GCMD_TE; + if (sts & DMA_GSTS_QIES) + iommu->gcmd |= DMA_GCMD_QIE; + + raw_spin_lock_init(&iommu->register_lock); + + drhd->iommu = iommu; + + if (intel_iommu_enabled) + iommu->iommu_dev = iommu_device_create(NULL, iommu, + intel_iommu_groups, + iommu->name); + + return 0; + +err_unmap: + unmap_iommu(iommu); +error_free_seq_id: + dmar_free_seq_id(iommu); +error: + kfree(iommu); + return err; +} + +static void free_iommu(struct intel_iommu *iommu) +{ + iommu_device_destroy(iommu->iommu_dev); + + if (iommu->irq) { + free_irq(iommu->irq, iommu); + irq_set_handler_data(iommu->irq, NULL); + dmar_free_hwirq(iommu->irq); + } + + if (iommu->qi) { + free_page((unsigned long)iommu->qi->desc); + kfree(iommu->qi->desc_status); + kfree(iommu->qi); + } + + if (iommu->reg) + unmap_iommu(iommu); + + dmar_free_seq_id(iommu); + kfree(iommu); +} + +/* + * Reclaim all the submitted descriptors which have completed its work. + */ +static inline void reclaim_free_desc(struct q_inval *qi) +{ + while (qi->desc_status[qi->free_tail] == QI_DONE || + qi->desc_status[qi->free_tail] == QI_ABORT) { + qi->desc_status[qi->free_tail] = QI_FREE; + qi->free_tail = (qi->free_tail + 1) % QI_LENGTH; + qi->free_cnt++; + } +} + +static int qi_check_fault(struct intel_iommu *iommu, int index) +{ + u32 fault; + int head, tail; + struct q_inval *qi = iommu->qi; + int wait_index = (index + 1) % QI_LENGTH; + + if (qi->desc_status[wait_index] == QI_ABORT) + return -EAGAIN; + + fault = readl(iommu->reg + DMAR_FSTS_REG); + + /* + * If IQE happens, the head points to the descriptor associated + * with the error. No new descriptors are fetched until the IQE + * is cleared. + */ + if (fault & DMA_FSTS_IQE) { + head = readl(iommu->reg + DMAR_IQH_REG); + if ((head >> DMAR_IQ_SHIFT) == index) { + pr_err("VT-d detected invalid descriptor: " + "low=%llx, high=%llx\n", + (unsigned long long)qi->desc[index].low, + (unsigned long long)qi->desc[index].high); + memcpy(&qi->desc[index], &qi->desc[wait_index], + sizeof(struct qi_desc)); + __iommu_flush_cache(iommu, &qi->desc[index], + sizeof(struct qi_desc)); + writel(DMA_FSTS_IQE, iommu->reg + DMAR_FSTS_REG); + return -EINVAL; + } + } + + /* + * If ITE happens, all pending wait_desc commands are aborted. + * No new descriptors are fetched until the ITE is cleared. + */ + if (fault & DMA_FSTS_ITE) { + head = readl(iommu->reg + DMAR_IQH_REG); + head = ((head >> DMAR_IQ_SHIFT) - 1 + QI_LENGTH) % QI_LENGTH; + head |= 1; + tail = readl(iommu->reg + DMAR_IQT_REG); + tail = ((tail >> DMAR_IQ_SHIFT) - 1 + QI_LENGTH) % QI_LENGTH; + + writel(DMA_FSTS_ITE, iommu->reg + DMAR_FSTS_REG); + + do { + if (qi->desc_status[head] == QI_IN_USE) + qi->desc_status[head] = QI_ABORT; + head = (head - 2 + QI_LENGTH) % QI_LENGTH; + } while (head != tail); + + if (qi->desc_status[wait_index] == QI_ABORT) + return -EAGAIN; + } + + if (fault & DMA_FSTS_ICE) + writel(DMA_FSTS_ICE, iommu->reg + DMAR_FSTS_REG); + + return 0; +} + +/* + * Submit the queued invalidation descriptor to the remapping + * hardware unit and wait for its completion. + */ +int qi_submit_sync(struct qi_desc *desc, struct intel_iommu *iommu) +{ + int rc; + struct q_inval *qi = iommu->qi; + struct qi_desc *hw, wait_desc; + int wait_index, index; + unsigned long flags; + + if (!qi) + return 0; + + hw = qi->desc; + +restart: + rc = 0; + + raw_spin_lock_irqsave(&qi->q_lock, flags); + while (qi->free_cnt < 3) { + raw_spin_unlock_irqrestore(&qi->q_lock, flags); + cpu_relax(); + raw_spin_lock_irqsave(&qi->q_lock, flags); + } + + index = qi->free_head; + wait_index = (index + 1) % QI_LENGTH; + + qi->desc_status[index] = qi->desc_status[wait_index] = QI_IN_USE; + + hw[index] = *desc; + + wait_desc.low = QI_IWD_STATUS_DATA(QI_DONE) | + QI_IWD_STATUS_WRITE | QI_IWD_TYPE; + wait_desc.high = virt_to_phys(&qi->desc_status[wait_index]); + + hw[wait_index] = wait_desc; + + __iommu_flush_cache(iommu, &hw[index], sizeof(struct qi_desc)); + __iommu_flush_cache(iommu, &hw[wait_index], sizeof(struct qi_desc)); + + qi->free_head = (qi->free_head + 2) % QI_LENGTH; + qi->free_cnt -= 2; + + /* + * update the HW tail register indicating the presence of + * new descriptors. + */ + writel(qi->free_head << DMAR_IQ_SHIFT, iommu->reg + DMAR_IQT_REG); + + while (qi->desc_status[wait_index] != QI_DONE) { + /* + * We will leave the interrupts disabled, to prevent interrupt + * context to queue another cmd while a cmd is already submitted + * and waiting for completion on this cpu. This is to avoid + * a deadlock where the interrupt context can wait indefinitely + * for free slots in the queue. + */ + rc = qi_check_fault(iommu, index); + if (rc) + break; + + raw_spin_unlock(&qi->q_lock); + cpu_relax(); + raw_spin_lock(&qi->q_lock); + } + + qi->desc_status[index] = QI_DONE; + + reclaim_free_desc(qi); + raw_spin_unlock_irqrestore(&qi->q_lock, flags); + + if (rc == -EAGAIN) + goto restart; + + return rc; +} + +/* + * Flush the global interrupt entry cache. + */ +void qi_global_iec(struct intel_iommu *iommu) +{ + struct qi_desc desc; + + desc.low = QI_IEC_TYPE; + desc.high = 0; + + /* should never fail */ + qi_submit_sync(&desc, iommu); +} + +void qi_flush_context(struct intel_iommu *iommu, u16 did, u16 sid, u8 fm, + u64 type) +{ + struct qi_desc desc; + + desc.low = QI_CC_FM(fm) | QI_CC_SID(sid) | QI_CC_DID(did) + | QI_CC_GRAN(type) | QI_CC_TYPE; + desc.high = 0; + + qi_submit_sync(&desc, iommu); +} + +void qi_flush_iotlb(struct intel_iommu *iommu, u16 did, u64 addr, + unsigned int size_order, u64 type) +{ + u8 dw = 0, dr = 0; + + struct qi_desc desc; + int ih = 0; + + if (cap_write_drain(iommu->cap)) + dw = 1; + + if (cap_read_drain(iommu->cap)) + dr = 1; + + desc.low = QI_IOTLB_DID(did) | QI_IOTLB_DR(dr) | QI_IOTLB_DW(dw) + | QI_IOTLB_GRAN(type) | QI_IOTLB_TYPE; + desc.high = QI_IOTLB_ADDR(addr) | QI_IOTLB_IH(ih) + | QI_IOTLB_AM(size_order); + + qi_submit_sync(&desc, iommu); +} + +void qi_flush_dev_iotlb(struct intel_iommu *iommu, u16 sid, u16 qdep, + u64 addr, unsigned mask) +{ + struct qi_desc desc; + + if (mask) { + BUG_ON(addr & ((1 << (VTD_PAGE_SHIFT + mask)) - 1)); + addr |= (1 << (VTD_PAGE_SHIFT + mask - 1)) - 1; + desc.high = QI_DEV_IOTLB_ADDR(addr) | QI_DEV_IOTLB_SIZE; + } else + desc.high = QI_DEV_IOTLB_ADDR(addr); + + if (qdep >= QI_DEV_IOTLB_MAX_INVS) + qdep = 0; + + desc.low = QI_DEV_IOTLB_SID(sid) | QI_DEV_IOTLB_QDEP(qdep) | + QI_DIOTLB_TYPE; + + qi_submit_sync(&desc, iommu); +} + +/* + * Disable Queued Invalidation interface. + */ +void dmar_disable_qi(struct intel_iommu *iommu) +{ + unsigned long flags; + u32 sts; + cycles_t start_time = get_cycles(); + + if (!ecap_qis(iommu->ecap)) + return; + + raw_spin_lock_irqsave(&iommu->register_lock, flags); + + sts = dmar_readq(iommu->reg + DMAR_GSTS_REG); + if (!(sts & DMA_GSTS_QIES)) + goto end; + + /* + * Give a chance to HW to complete the pending invalidation requests. + */ + while ((readl(iommu->reg + DMAR_IQT_REG) != + readl(iommu->reg + DMAR_IQH_REG)) && + (DMAR_OPERATION_TIMEOUT > (get_cycles() - start_time))) + cpu_relax(); + + iommu->gcmd &= ~DMA_GCMD_QIE; + writel(iommu->gcmd, iommu->reg + DMAR_GCMD_REG); + + IOMMU_WAIT_OP(iommu, DMAR_GSTS_REG, readl, + !(sts & DMA_GSTS_QIES), sts); +end: + raw_spin_unlock_irqrestore(&iommu->register_lock, flags); +} + +/* + * Enable queued invalidation. + */ +static void __dmar_enable_qi(struct intel_iommu *iommu) +{ + u32 sts; + unsigned long flags; + struct q_inval *qi = iommu->qi; + + qi->free_head = qi->free_tail = 0; + qi->free_cnt = QI_LENGTH; + + raw_spin_lock_irqsave(&iommu->register_lock, flags); + + /* write zero to the tail reg */ + writel(0, iommu->reg + DMAR_IQT_REG); + + dmar_writeq(iommu->reg + DMAR_IQA_REG, virt_to_phys(qi->desc)); + + iommu->gcmd |= DMA_GCMD_QIE; + writel(iommu->gcmd, iommu->reg + DMAR_GCMD_REG); + + /* Make sure hardware complete it */ + IOMMU_WAIT_OP(iommu, DMAR_GSTS_REG, readl, (sts & DMA_GSTS_QIES), sts); + + raw_spin_unlock_irqrestore(&iommu->register_lock, flags); +} + +/* + * Enable Queued Invalidation interface. This is a must to support + * interrupt-remapping. Also used by DMA-remapping, which replaces + * register based IOTLB invalidation. + */ +int dmar_enable_qi(struct intel_iommu *iommu) +{ + struct q_inval *qi; + struct page *desc_page; + + if (!ecap_qis(iommu->ecap)) + return -ENOENT; + + /* + * queued invalidation is already setup and enabled. + */ + if (iommu->qi) + return 0; + + iommu->qi = kmalloc(sizeof(*qi), GFP_ATOMIC); + if (!iommu->qi) + return -ENOMEM; + + qi = iommu->qi; + + + desc_page = alloc_pages_node(iommu->node, GFP_ATOMIC | __GFP_ZERO, 0); + if (!desc_page) { + kfree(qi); + iommu->qi = NULL; + return -ENOMEM; + } + + qi->desc = page_address(desc_page); + + qi->desc_status = kzalloc(QI_LENGTH * sizeof(int), GFP_ATOMIC); + if (!qi->desc_status) { + free_page((unsigned long) qi->desc); + kfree(qi); + iommu->qi = NULL; + return -ENOMEM; + } + + raw_spin_lock_init(&qi->q_lock); + + __dmar_enable_qi(iommu); + + return 0; +} + +/* iommu interrupt handling. Most stuff are MSI-like. */ + +enum faulttype { + DMA_REMAP, + INTR_REMAP, + UNKNOWN, +}; + +static const char *dma_remap_fault_reasons[] = +{ + "Software", + "Present bit in root entry is clear", + "Present bit in context entry is clear", + "Invalid context entry", + "Access beyond MGAW", + "PTE Write access is not set", + "PTE Read access is not set", + "Next page table ptr is invalid", + "Root table address invalid", + "Context table ptr is invalid", + "non-zero reserved fields in RTP", + "non-zero reserved fields in CTP", + "non-zero reserved fields in PTE", + "PCE for translation request specifies blocking", +}; + +static const char *irq_remap_fault_reasons[] = +{ + "Detected reserved fields in the decoded interrupt-remapped request", + "Interrupt index exceeded the interrupt-remapping table size", + "Present field in the IRTE entry is clear", + "Error accessing interrupt-remapping table pointed by IRTA_REG", + "Detected reserved fields in the IRTE entry", + "Blocked a compatibility format interrupt request", + "Blocked an interrupt request due to source-id verification failure", +}; + +static const char *dmar_get_fault_reason(u8 fault_reason, int *fault_type) +{ + if (fault_reason >= 0x20 && (fault_reason - 0x20 < + ARRAY_SIZE(irq_remap_fault_reasons))) { + *fault_type = INTR_REMAP; + return irq_remap_fault_reasons[fault_reason - 0x20]; + } else if (fault_reason < ARRAY_SIZE(dma_remap_fault_reasons)) { + *fault_type = DMA_REMAP; + return dma_remap_fault_reasons[fault_reason]; + } else { + *fault_type = UNKNOWN; + return "Unknown"; + } +} + +void dmar_msi_unmask(struct irq_data *data) +{ + struct intel_iommu *iommu = irq_data_get_irq_handler_data(data); + unsigned long flag; + + /* unmask it */ + raw_spin_lock_irqsave(&iommu->register_lock, flag); + writel(0, iommu->reg + DMAR_FECTL_REG); + /* Read a reg to force flush the post write */ + readl(iommu->reg + DMAR_FECTL_REG); + raw_spin_unlock_irqrestore(&iommu->register_lock, flag); +} + +void dmar_msi_mask(struct irq_data *data) +{ + unsigned long flag; + struct intel_iommu *iommu = irq_data_get_irq_handler_data(data); + + /* mask it */ + raw_spin_lock_irqsave(&iommu->register_lock, flag); + writel(DMA_FECTL_IM, iommu->reg + DMAR_FECTL_REG); + /* Read a reg to force flush the post write */ + readl(iommu->reg + DMAR_FECTL_REG); + raw_spin_unlock_irqrestore(&iommu->register_lock, flag); +} + +void dmar_msi_write(int irq, struct msi_msg *msg) +{ + struct intel_iommu *iommu = irq_get_handler_data(irq); + unsigned long flag; + + raw_spin_lock_irqsave(&iommu->register_lock, flag); + writel(msg->data, iommu->reg + DMAR_FEDATA_REG); + writel(msg->address_lo, iommu->reg + DMAR_FEADDR_REG); + writel(msg->address_hi, iommu->reg + DMAR_FEUADDR_REG); + raw_spin_unlock_irqrestore(&iommu->register_lock, flag); +} + +void dmar_msi_read(int irq, struct msi_msg *msg) +{ + struct intel_iommu *iommu = irq_get_handler_data(irq); + unsigned long flag; + + raw_spin_lock_irqsave(&iommu->register_lock, flag); + msg->data = readl(iommu->reg + DMAR_FEDATA_REG); + msg->address_lo = readl(iommu->reg + DMAR_FEADDR_REG); + msg->address_hi = readl(iommu->reg + DMAR_FEUADDR_REG); + raw_spin_unlock_irqrestore(&iommu->register_lock, flag); +} + +static int dmar_fault_do_one(struct intel_iommu *iommu, int type, + u8 fault_reason, u16 source_id, unsigned long long addr) +{ + const char *reason; + int fault_type; + + reason = dmar_get_fault_reason(fault_reason, &fault_type); + + if (fault_type == INTR_REMAP) + pr_err("INTR-REMAP: Request device [[%02x:%02x.%d] " + "fault index %llx\n" + "INTR-REMAP:[fault reason %02d] %s\n", + (source_id >> 8), PCI_SLOT(source_id & 0xFF), + PCI_FUNC(source_id & 0xFF), addr >> 48, + fault_reason, reason); + else + pr_err("DMAR:[%s] Request device [%02x:%02x.%d] " + "fault addr %llx \n" + "DMAR:[fault reason %02d] %s\n", + (type ? "DMA Read" : "DMA Write"), + (source_id >> 8), PCI_SLOT(source_id & 0xFF), + PCI_FUNC(source_id & 0xFF), addr, fault_reason, reason); + return 0; +} + +#define PRIMARY_FAULT_REG_LEN (16) +irqreturn_t dmar_fault(int irq, void *dev_id) +{ + struct intel_iommu *iommu = dev_id; + int reg, fault_index; + u32 fault_status; + unsigned long flag; + + raw_spin_lock_irqsave(&iommu->register_lock, flag); + fault_status = readl(iommu->reg + DMAR_FSTS_REG); + if (fault_status) + pr_err("DRHD: handling fault status reg %x\n", fault_status); + + /* TBD: ignore advanced fault log currently */ + if (!(fault_status & DMA_FSTS_PPF)) + goto unlock_exit; + + fault_index = dma_fsts_fault_record_index(fault_status); + reg = cap_fault_reg_offset(iommu->cap); + while (1) { + u8 fault_reason; + u16 source_id; + u64 guest_addr; + int type; + u32 data; + + /* highest 32 bits */ + data = readl(iommu->reg + reg + + fault_index * PRIMARY_FAULT_REG_LEN + 12); + if (!(data & DMA_FRCD_F)) + break; + + fault_reason = dma_frcd_fault_reason(data); + type = dma_frcd_type(data); + + data = readl(iommu->reg + reg + + fault_index * PRIMARY_FAULT_REG_LEN + 8); + source_id = dma_frcd_source_id(data); + + guest_addr = dmar_readq(iommu->reg + reg + + fault_index * PRIMARY_FAULT_REG_LEN); + guest_addr = dma_frcd_page_addr(guest_addr); + /* clear the fault */ + writel(DMA_FRCD_F, iommu->reg + reg + + fault_index * PRIMARY_FAULT_REG_LEN + 12); + + raw_spin_unlock_irqrestore(&iommu->register_lock, flag); + + dmar_fault_do_one(iommu, type, fault_reason, + source_id, guest_addr); + + fault_index++; + if (fault_index >= cap_num_fault_regs(iommu->cap)) + fault_index = 0; + raw_spin_lock_irqsave(&iommu->register_lock, flag); + } + + writel(DMA_FSTS_PFO | DMA_FSTS_PPF, iommu->reg + DMAR_FSTS_REG); + +unlock_exit: + raw_spin_unlock_irqrestore(&iommu->register_lock, flag); + return IRQ_HANDLED; +} + +int dmar_set_interrupt(struct intel_iommu *iommu) +{ + int irq, ret; + + /* + * Check if the fault interrupt is already initialized. + */ + if (iommu->irq) + return 0; + + irq = dmar_alloc_hwirq(); + if (irq <= 0) { + pr_err("IOMMU: no free vectors\n"); + return -EINVAL; + } + + irq_set_handler_data(irq, iommu); + iommu->irq = irq; + + ret = arch_setup_dmar_msi(irq); + if (ret) { + irq_set_handler_data(irq, NULL); + iommu->irq = 0; + dmar_free_hwirq(irq); + return ret; + } + + ret = request_irq(irq, dmar_fault, IRQF_NO_THREAD, iommu->name, iommu); + if (ret) + pr_err("IOMMU: can't request irq\n"); + return ret; +} + +int __init enable_drhd_fault_handling(void) +{ + struct dmar_drhd_unit *drhd; + struct intel_iommu *iommu; + + /* + * Enable fault control interrupt. + */ + for_each_iommu(iommu, drhd) { + u32 fault_status; + int ret = dmar_set_interrupt(iommu); + + if (ret) { + pr_err("DRHD %Lx: failed to enable fault, interrupt, ret %d\n", + (unsigned long long)drhd->reg_base_addr, ret); + return -1; + } + + /* + * Clear any previous faults. + */ + dmar_fault(iommu->irq, iommu); + fault_status = readl(iommu->reg + DMAR_FSTS_REG); + writel(fault_status, iommu->reg + DMAR_FSTS_REG); + } + + return 0; +} + +/* + * Re-enable Queued Invalidation interface. + */ +int dmar_reenable_qi(struct intel_iommu *iommu) +{ + if (!ecap_qis(iommu->ecap)) + return -ENOENT; + + if (!iommu->qi) + return -ENOENT; + + /* + * First disable queued invalidation. + */ + dmar_disable_qi(iommu); + /* + * Then enable queued invalidation again. Since there is no pending + * invalidation requests now, it's safe to re-enable queued + * invalidation. + */ + __dmar_enable_qi(iommu); + + return 0; +} + +/* + * Check interrupt remapping support in DMAR table description. + */ +int __init dmar_ir_support(void) +{ + struct acpi_table_dmar *dmar; + dmar = (struct acpi_table_dmar *)dmar_tbl; + if (!dmar) + return 0; + return dmar->flags & 0x1; +} + +/* Check whether DMAR units are in use */ +static inline bool dmar_in_use(void) +{ + return irq_remapping_enabled || intel_iommu_enabled; +} + +static int __init dmar_free_unused_resources(void) +{ + struct dmar_drhd_unit *dmaru, *dmaru_n; + + if (dmar_in_use()) + return 0; + + if (dmar_dev_scope_status != 1 && !list_empty(&dmar_drhd_units)) + bus_unregister_notifier(&pci_bus_type, &dmar_pci_bus_nb); + + down_write(&dmar_global_lock); + list_for_each_entry_safe(dmaru, dmaru_n, &dmar_drhd_units, list) { + list_del(&dmaru->list); + dmar_free_drhd(dmaru); + } + up_write(&dmar_global_lock); + + return 0; +} + +late_initcall(dmar_free_unused_resources); +IOMMU_INIT_POST(detect_intel_iommu); + +/* + * DMAR Hotplug Support + * For more details, please refer to Intel(R) Virtualization Technology + * for Directed-IO Architecture Specifiction, Rev 2.2, Section 8.8 + * "Remapping Hardware Unit Hot Plug". + */ +static u8 dmar_hp_uuid[] = { + /* 0000 */ 0xA6, 0xA3, 0xC1, 0xD8, 0x9B, 0xBE, 0x9B, 0x4C, + /* 0008 */ 0x91, 0xBF, 0xC3, 0xCB, 0x81, 0xFC, 0x5D, 0xAF +}; + +/* + * Currently there's only one revision and BIOS will not check the revision id, + * so use 0 for safety. + */ +#define DMAR_DSM_REV_ID 0 +#define DMAR_DSM_FUNC_DRHD 1 +#define DMAR_DSM_FUNC_ATSR 2 +#define DMAR_DSM_FUNC_RHSA 3 + +static inline bool dmar_detect_dsm(acpi_handle handle, int func) +{ + return acpi_check_dsm(handle, dmar_hp_uuid, DMAR_DSM_REV_ID, 1 << func); +} + +static int dmar_walk_dsm_resource(acpi_handle handle, int func, + dmar_res_handler_t handler, void *arg) +{ + int ret = -ENODEV; + union acpi_object *obj; + struct acpi_dmar_header *start; + struct dmar_res_callback callback; + static int res_type[] = { + [DMAR_DSM_FUNC_DRHD] = ACPI_DMAR_TYPE_HARDWARE_UNIT, + [DMAR_DSM_FUNC_ATSR] = ACPI_DMAR_TYPE_ROOT_ATS, + [DMAR_DSM_FUNC_RHSA] = ACPI_DMAR_TYPE_HARDWARE_AFFINITY, + }; + + if (!dmar_detect_dsm(handle, func)) + return 0; + + obj = acpi_evaluate_dsm_typed(handle, dmar_hp_uuid, DMAR_DSM_REV_ID, + func, NULL, ACPI_TYPE_BUFFER); + if (!obj) + return -ENODEV; + + memset(&callback, 0, sizeof(callback)); + callback.cb[res_type[func]] = handler; + callback.arg[res_type[func]] = arg; + start = (struct acpi_dmar_header *)obj->buffer.pointer; + ret = dmar_walk_remapping_entries(start, obj->buffer.length, &callback); + + ACPI_FREE(obj); + + return ret; +} + +static int dmar_hp_add_drhd(struct acpi_dmar_header *header, void *arg) +{ + int ret; + struct dmar_drhd_unit *dmaru; + + dmaru = dmar_find_dmaru((struct acpi_dmar_hardware_unit *)header); + if (!dmaru) + return -ENODEV; + + ret = dmar_ir_hotplug(dmaru, true); + if (ret == 0) + ret = dmar_iommu_hotplug(dmaru, true); + + return ret; +} + +static int dmar_hp_remove_drhd(struct acpi_dmar_header *header, void *arg) +{ + int i, ret; + struct device *dev; + struct dmar_drhd_unit *dmaru; + + dmaru = dmar_find_dmaru((struct acpi_dmar_hardware_unit *)header); + if (!dmaru) + return 0; + + /* + * All PCI devices managed by this unit should have been destroyed. + */ + if (!dmaru->include_all && dmaru->devices && dmaru->devices_cnt) + for_each_active_dev_scope(dmaru->devices, + dmaru->devices_cnt, i, dev) + return -EBUSY; + + ret = dmar_ir_hotplug(dmaru, false); + if (ret == 0) + ret = dmar_iommu_hotplug(dmaru, false); + + return ret; +} + +static int dmar_hp_release_drhd(struct acpi_dmar_header *header, void *arg) +{ + struct dmar_drhd_unit *dmaru; + + dmaru = dmar_find_dmaru((struct acpi_dmar_hardware_unit *)header); + if (dmaru) { + list_del_rcu(&dmaru->list); + synchronize_rcu(); + dmar_free_drhd(dmaru); + } + + return 0; +} + +static int dmar_hotplug_insert(acpi_handle handle) +{ + int ret; + int drhd_count = 0; + + ret = dmar_walk_dsm_resource(handle, DMAR_DSM_FUNC_DRHD, + &dmar_validate_one_drhd, (void *)1); + if (ret) + goto out; + + ret = dmar_walk_dsm_resource(handle, DMAR_DSM_FUNC_DRHD, + &dmar_parse_one_drhd, (void *)&drhd_count); + if (ret == 0 && drhd_count == 0) { + pr_warn(FW_BUG "No DRHD structures in buffer returned by _DSM method\n"); + goto out; + } else if (ret) { + goto release_drhd; + } + + ret = dmar_walk_dsm_resource(handle, DMAR_DSM_FUNC_RHSA, + &dmar_parse_one_rhsa, NULL); + if (ret) + goto release_drhd; + + ret = dmar_walk_dsm_resource(handle, DMAR_DSM_FUNC_ATSR, + &dmar_parse_one_atsr, NULL); + if (ret) + goto release_atsr; + + ret = dmar_walk_dsm_resource(handle, DMAR_DSM_FUNC_DRHD, + &dmar_hp_add_drhd, NULL); + if (!ret) + return 0; + + dmar_walk_dsm_resource(handle, DMAR_DSM_FUNC_DRHD, + &dmar_hp_remove_drhd, NULL); +release_atsr: + dmar_walk_dsm_resource(handle, DMAR_DSM_FUNC_ATSR, + &dmar_release_one_atsr, NULL); +release_drhd: + dmar_walk_dsm_resource(handle, DMAR_DSM_FUNC_DRHD, + &dmar_hp_release_drhd, NULL); +out: + return ret; +} + +static int dmar_hotplug_remove(acpi_handle handle) +{ + int ret; + + ret = dmar_walk_dsm_resource(handle, DMAR_DSM_FUNC_ATSR, + &dmar_check_one_atsr, NULL); + if (ret) + return ret; + + ret = dmar_walk_dsm_resource(handle, DMAR_DSM_FUNC_DRHD, + &dmar_hp_remove_drhd, NULL); + if (ret == 0) { + WARN_ON(dmar_walk_dsm_resource(handle, DMAR_DSM_FUNC_ATSR, + &dmar_release_one_atsr, NULL)); + WARN_ON(dmar_walk_dsm_resource(handle, DMAR_DSM_FUNC_DRHD, + &dmar_hp_release_drhd, NULL)); + } else { + dmar_walk_dsm_resource(handle, DMAR_DSM_FUNC_DRHD, + &dmar_hp_add_drhd, NULL); + } + + return ret; +} + +static acpi_status dmar_get_dsm_handle(acpi_handle handle, u32 lvl, + void *context, void **retval) +{ + acpi_handle *phdl = retval; + + if (dmar_detect_dsm(handle, DMAR_DSM_FUNC_DRHD)) { + *phdl = handle; + return AE_CTRL_TERMINATE; + } + + return AE_OK; +} + +static int dmar_device_hotplug(acpi_handle handle, bool insert) +{ + int ret; + acpi_handle tmp = NULL; + acpi_status status; + + if (!dmar_in_use()) + return 0; + + if (dmar_detect_dsm(handle, DMAR_DSM_FUNC_DRHD)) { + tmp = handle; + } else { + status = acpi_walk_namespace(ACPI_TYPE_DEVICE, handle, + ACPI_UINT32_MAX, + dmar_get_dsm_handle, + NULL, NULL, &tmp); + if (ACPI_FAILURE(status)) { + pr_warn("Failed to locate _DSM method.\n"); + return -ENXIO; + } + } + if (tmp == NULL) + return 0; + + down_write(&dmar_global_lock); + if (insert) + ret = dmar_hotplug_insert(tmp); + else + ret = dmar_hotplug_remove(tmp); + up_write(&dmar_global_lock); + + return ret; +} + +int dmar_device_add(acpi_handle handle) +{ + return dmar_device_hotplug(handle, true); +} + +int dmar_device_remove(acpi_handle handle) +{ + return dmar_device_hotplug(handle, false); +} diff --git a/drivers/iommu/exynos-iommu.c b/drivers/iommu/exynos-iommu.c new file mode 100644 index 000000000..3e898504a --- /dev/null +++ b/drivers/iommu/exynos-iommu.c @@ -0,0 +1,1242 @@ +/* linux/drivers/iommu/exynos_iommu.c + * + * Copyright (c) 2011 Samsung Electronics Co., Ltd. + * http://www.samsung.com + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + */ + +#ifdef CONFIG_EXYNOS_IOMMU_DEBUG +#define DEBUG +#endif + +#include <linux/io.h> +#include <linux/interrupt.h> +#include <linux/platform_device.h> +#include <linux/slab.h> +#include <linux/pm_runtime.h> +#include <linux/clk.h> +#include <linux/err.h> +#include <linux/mm.h> +#include <linux/iommu.h> +#include <linux/errno.h> +#include <linux/list.h> +#include <linux/memblock.h> +#include <linux/export.h> + +#include <asm/cacheflush.h> +#include <asm/pgtable.h> + +typedef u32 sysmmu_iova_t; +typedef u32 sysmmu_pte_t; + +/* We do not consider super section mapping (16MB) */ +#define SECT_ORDER 20 +#define LPAGE_ORDER 16 +#define SPAGE_ORDER 12 + +#define SECT_SIZE (1 << SECT_ORDER) +#define LPAGE_SIZE (1 << LPAGE_ORDER) +#define SPAGE_SIZE (1 << SPAGE_ORDER) + +#define SECT_MASK (~(SECT_SIZE - 1)) +#define LPAGE_MASK (~(LPAGE_SIZE - 1)) +#define SPAGE_MASK (~(SPAGE_SIZE - 1)) + +#define lv1ent_fault(sent) ((*(sent) == ZERO_LV2LINK) || \ + ((*(sent) & 3) == 0) || ((*(sent) & 3) == 3)) +#define lv1ent_zero(sent) (*(sent) == ZERO_LV2LINK) +#define lv1ent_page_zero(sent) ((*(sent) & 3) == 1) +#define lv1ent_page(sent) ((*(sent) != ZERO_LV2LINK) && \ + ((*(sent) & 3) == 1)) +#define lv1ent_section(sent) ((*(sent) & 3) == 2) + +#define lv2ent_fault(pent) ((*(pent) & 3) == 0) +#define lv2ent_small(pent) ((*(pent) & 2) == 2) +#define lv2ent_large(pent) ((*(pent) & 3) == 1) + +static u32 sysmmu_page_offset(sysmmu_iova_t iova, u32 size) +{ + return iova & (size - 1); +} + +#define section_phys(sent) (*(sent) & SECT_MASK) +#define section_offs(iova) sysmmu_page_offset((iova), SECT_SIZE) +#define lpage_phys(pent) (*(pent) & LPAGE_MASK) +#define lpage_offs(iova) sysmmu_page_offset((iova), LPAGE_SIZE) +#define spage_phys(pent) (*(pent) & SPAGE_MASK) +#define spage_offs(iova) sysmmu_page_offset((iova), SPAGE_SIZE) + +#define NUM_LV1ENTRIES 4096 +#define NUM_LV2ENTRIES (SECT_SIZE / SPAGE_SIZE) + +static u32 lv1ent_offset(sysmmu_iova_t iova) +{ + return iova >> SECT_ORDER; +} + +static u32 lv2ent_offset(sysmmu_iova_t iova) +{ + return (iova >> SPAGE_ORDER) & (NUM_LV2ENTRIES - 1); +} + +#define LV2TABLE_SIZE (NUM_LV2ENTRIES * sizeof(sysmmu_pte_t)) + +#define SPAGES_PER_LPAGE (LPAGE_SIZE / SPAGE_SIZE) + +#define lv2table_base(sent) (*(sent) & 0xFFFFFC00) + +#define mk_lv1ent_sect(pa) ((pa) | 2) +#define mk_lv1ent_page(pa) ((pa) | 1) +#define mk_lv2ent_lpage(pa) ((pa) | 1) +#define mk_lv2ent_spage(pa) ((pa) | 2) + +#define CTRL_ENABLE 0x5 +#define CTRL_BLOCK 0x7 +#define CTRL_DISABLE 0x0 + +#define CFG_LRU 0x1 +#define CFG_QOS(n) ((n & 0xF) << 7) +#define CFG_MASK 0x0150FFFF /* Selecting bit 0-15, 20, 22 and 24 */ +#define CFG_ACGEN (1 << 24) /* System MMU 3.3 only */ +#define CFG_SYSSEL (1 << 22) /* System MMU 3.2 only */ +#define CFG_FLPDCACHE (1 << 20) /* System MMU 3.2+ only */ + +#define REG_MMU_CTRL 0x000 +#define REG_MMU_CFG 0x004 +#define REG_MMU_STATUS 0x008 +#define REG_MMU_FLUSH 0x00C +#define REG_MMU_FLUSH_ENTRY 0x010 +#define REG_PT_BASE_ADDR 0x014 +#define REG_INT_STATUS 0x018 +#define REG_INT_CLEAR 0x01C + +#define REG_PAGE_FAULT_ADDR 0x024 +#define REG_AW_FAULT_ADDR 0x028 +#define REG_AR_FAULT_ADDR 0x02C +#define REG_DEFAULT_SLAVE_ADDR 0x030 + +#define REG_MMU_VERSION 0x034 + +#define MMU_MAJ_VER(val) ((val) >> 7) +#define MMU_MIN_VER(val) ((val) & 0x7F) +#define MMU_RAW_VER(reg) (((reg) >> 21) & ((1 << 11) - 1)) /* 11 bits */ + +#define MAKE_MMU_VER(maj, min) ((((maj) & 0xF) << 7) | ((min) & 0x7F)) + +#define REG_PB0_SADDR 0x04C +#define REG_PB0_EADDR 0x050 +#define REG_PB1_SADDR 0x054 +#define REG_PB1_EADDR 0x058 + +#define has_sysmmu(dev) (dev->archdata.iommu != NULL) + +static struct kmem_cache *lv2table_kmem_cache; +static sysmmu_pte_t *zero_lv2_table; +#define ZERO_LV2LINK mk_lv1ent_page(virt_to_phys(zero_lv2_table)) + +static sysmmu_pte_t *section_entry(sysmmu_pte_t *pgtable, sysmmu_iova_t iova) +{ + return pgtable + lv1ent_offset(iova); +} + +static sysmmu_pte_t *page_entry(sysmmu_pte_t *sent, sysmmu_iova_t iova) +{ + return (sysmmu_pte_t *)phys_to_virt( + lv2table_base(sent)) + lv2ent_offset(iova); +} + +enum exynos_sysmmu_inttype { + SYSMMU_PAGEFAULT, + SYSMMU_AR_MULTIHIT, + SYSMMU_AW_MULTIHIT, + SYSMMU_BUSERROR, + SYSMMU_AR_SECURITY, + SYSMMU_AR_ACCESS, + SYSMMU_AW_SECURITY, + SYSMMU_AW_PROTECTION, /* 7 */ + SYSMMU_FAULT_UNKNOWN, + SYSMMU_FAULTS_NUM +}; + +static unsigned short fault_reg_offset[SYSMMU_FAULTS_NUM] = { + REG_PAGE_FAULT_ADDR, + REG_AR_FAULT_ADDR, + REG_AW_FAULT_ADDR, + REG_DEFAULT_SLAVE_ADDR, + REG_AR_FAULT_ADDR, + REG_AR_FAULT_ADDR, + REG_AW_FAULT_ADDR, + REG_AW_FAULT_ADDR +}; + +static char *sysmmu_fault_name[SYSMMU_FAULTS_NUM] = { + "PAGE FAULT", + "AR MULTI-HIT FAULT", + "AW MULTI-HIT FAULT", + "BUS ERROR", + "AR SECURITY PROTECTION FAULT", + "AR ACCESS PROTECTION FAULT", + "AW SECURITY PROTECTION FAULT", + "AW ACCESS PROTECTION FAULT", + "UNKNOWN FAULT" +}; + +/* attached to dev.archdata.iommu of the master device */ +struct exynos_iommu_owner { + struct list_head client; /* entry of exynos_iommu_domain.clients */ + struct device *dev; + struct device *sysmmu; + struct iommu_domain *domain; + void *vmm_data; /* IO virtual memory manager's data */ + spinlock_t lock; /* Lock to preserve consistency of System MMU */ +}; + +struct exynos_iommu_domain { + struct list_head clients; /* list of sysmmu_drvdata.node */ + sysmmu_pte_t *pgtable; /* lv1 page table, 16KB */ + short *lv2entcnt; /* free lv2 entry counter for each section */ + spinlock_t lock; /* lock for this structure */ + spinlock_t pgtablelock; /* lock for modifying page table @ pgtable */ + struct iommu_domain domain; /* generic domain data structure */ +}; + +struct sysmmu_drvdata { + struct device *sysmmu; /* System MMU's device descriptor */ + struct device *master; /* Owner of system MMU */ + void __iomem *sfrbase; + struct clk *clk; + struct clk *clk_master; + int activations; + spinlock_t lock; + struct iommu_domain *domain; + phys_addr_t pgtable; +}; + +static struct exynos_iommu_domain *to_exynos_domain(struct iommu_domain *dom) +{ + return container_of(dom, struct exynos_iommu_domain, domain); +} + +static bool set_sysmmu_active(struct sysmmu_drvdata *data) +{ + /* return true if the System MMU was not active previously + and it needs to be initialized */ + return ++data->activations == 1; +} + +static bool set_sysmmu_inactive(struct sysmmu_drvdata *data) +{ + /* return true if the System MMU is needed to be disabled */ + BUG_ON(data->activations < 1); + return --data->activations == 0; +} + +static bool is_sysmmu_active(struct sysmmu_drvdata *data) +{ + return data->activations > 0; +} + +static void sysmmu_unblock(void __iomem *sfrbase) +{ + __raw_writel(CTRL_ENABLE, sfrbase + REG_MMU_CTRL); +} + +static unsigned int __raw_sysmmu_version(struct sysmmu_drvdata *data) +{ + return MMU_RAW_VER(__raw_readl(data->sfrbase + REG_MMU_VERSION)); +} + +static bool sysmmu_block(void __iomem *sfrbase) +{ + int i = 120; + + __raw_writel(CTRL_BLOCK, sfrbase + REG_MMU_CTRL); + while ((i > 0) && !(__raw_readl(sfrbase + REG_MMU_STATUS) & 1)) + --i; + + if (!(__raw_readl(sfrbase + REG_MMU_STATUS) & 1)) { + sysmmu_unblock(sfrbase); + return false; + } + + return true; +} + +static void __sysmmu_tlb_invalidate(void __iomem *sfrbase) +{ + __raw_writel(0x1, sfrbase + REG_MMU_FLUSH); +} + +static void __sysmmu_tlb_invalidate_entry(void __iomem *sfrbase, + sysmmu_iova_t iova, unsigned int num_inv) +{ + unsigned int i; + + for (i = 0; i < num_inv; i++) { + __raw_writel((iova & SPAGE_MASK) | 1, + sfrbase + REG_MMU_FLUSH_ENTRY); + iova += SPAGE_SIZE; + } +} + +static void __sysmmu_set_ptbase(void __iomem *sfrbase, + phys_addr_t pgd) +{ + __raw_writel(pgd, sfrbase + REG_PT_BASE_ADDR); + + __sysmmu_tlb_invalidate(sfrbase); +} + +static void show_fault_information(const char *name, + enum exynos_sysmmu_inttype itype, + phys_addr_t pgtable_base, sysmmu_iova_t fault_addr) +{ + sysmmu_pte_t *ent; + + if ((itype >= SYSMMU_FAULTS_NUM) || (itype < SYSMMU_PAGEFAULT)) + itype = SYSMMU_FAULT_UNKNOWN; + + pr_err("%s occurred at %#x by %s(Page table base: %pa)\n", + sysmmu_fault_name[itype], fault_addr, name, &pgtable_base); + + ent = section_entry(phys_to_virt(pgtable_base), fault_addr); + pr_err("\tLv1 entry: %#x\n", *ent); + + if (lv1ent_page(ent)) { + ent = page_entry(ent, fault_addr); + pr_err("\t Lv2 entry: %#x\n", *ent); + } +} + +static irqreturn_t exynos_sysmmu_irq(int irq, void *dev_id) +{ + /* SYSMMU is in blocked state when interrupt occurred. */ + struct sysmmu_drvdata *data = dev_id; + enum exynos_sysmmu_inttype itype; + sysmmu_iova_t addr = -1; + int ret = -ENOSYS; + + WARN_ON(!is_sysmmu_active(data)); + + spin_lock(&data->lock); + + if (!IS_ERR(data->clk_master)) + clk_enable(data->clk_master); + + itype = (enum exynos_sysmmu_inttype) + __ffs(__raw_readl(data->sfrbase + REG_INT_STATUS)); + if (WARN_ON(!((itype >= 0) && (itype < SYSMMU_FAULT_UNKNOWN)))) + itype = SYSMMU_FAULT_UNKNOWN; + else + addr = __raw_readl(data->sfrbase + fault_reg_offset[itype]); + + if (itype == SYSMMU_FAULT_UNKNOWN) { + pr_err("%s: Fault is not occurred by System MMU '%s'!\n", + __func__, dev_name(data->sysmmu)); + pr_err("%s: Please check if IRQ is correctly configured.\n", + __func__); + BUG(); + } else { + unsigned int base = + __raw_readl(data->sfrbase + REG_PT_BASE_ADDR); + show_fault_information(dev_name(data->sysmmu), + itype, base, addr); + if (data->domain) + ret = report_iommu_fault(data->domain, + data->master, addr, itype); + } + + /* fault is not recovered by fault handler */ + BUG_ON(ret != 0); + + __raw_writel(1 << itype, data->sfrbase + REG_INT_CLEAR); + + sysmmu_unblock(data->sfrbase); + + if (!IS_ERR(data->clk_master)) + clk_disable(data->clk_master); + + spin_unlock(&data->lock); + + return IRQ_HANDLED; +} + +static void __sysmmu_disable_nocount(struct sysmmu_drvdata *data) +{ + if (!IS_ERR(data->clk_master)) + clk_enable(data->clk_master); + + __raw_writel(CTRL_DISABLE, data->sfrbase + REG_MMU_CTRL); + __raw_writel(0, data->sfrbase + REG_MMU_CFG); + + clk_disable(data->clk); + if (!IS_ERR(data->clk_master)) + clk_disable(data->clk_master); +} + +static bool __sysmmu_disable(struct sysmmu_drvdata *data) +{ + bool disabled; + unsigned long flags; + + spin_lock_irqsave(&data->lock, flags); + + disabled = set_sysmmu_inactive(data); + + if (disabled) { + data->pgtable = 0; + data->domain = NULL; + + __sysmmu_disable_nocount(data); + + dev_dbg(data->sysmmu, "Disabled\n"); + } else { + dev_dbg(data->sysmmu, "%d times left to disable\n", + data->activations); + } + + spin_unlock_irqrestore(&data->lock, flags); + + return disabled; +} + +static void __sysmmu_init_config(struct sysmmu_drvdata *data) +{ + unsigned int cfg = CFG_LRU | CFG_QOS(15); + unsigned int ver; + + ver = __raw_sysmmu_version(data); + if (MMU_MAJ_VER(ver) == 3) { + if (MMU_MIN_VER(ver) >= 2) { + cfg |= CFG_FLPDCACHE; + if (MMU_MIN_VER(ver) == 3) { + cfg |= CFG_ACGEN; + cfg &= ~CFG_LRU; + } else { + cfg |= CFG_SYSSEL; + } + } + } + + __raw_writel(cfg, data->sfrbase + REG_MMU_CFG); +} + +static void __sysmmu_enable_nocount(struct sysmmu_drvdata *data) +{ + if (!IS_ERR(data->clk_master)) + clk_enable(data->clk_master); + clk_enable(data->clk); + + __raw_writel(CTRL_BLOCK, data->sfrbase + REG_MMU_CTRL); + + __sysmmu_init_config(data); + + __sysmmu_set_ptbase(data->sfrbase, data->pgtable); + + __raw_writel(CTRL_ENABLE, data->sfrbase + REG_MMU_CTRL); + + if (!IS_ERR(data->clk_master)) + clk_disable(data->clk_master); +} + +static int __sysmmu_enable(struct sysmmu_drvdata *data, + phys_addr_t pgtable, struct iommu_domain *domain) +{ + int ret = 0; + unsigned long flags; + + spin_lock_irqsave(&data->lock, flags); + if (set_sysmmu_active(data)) { + data->pgtable = pgtable; + data->domain = domain; + + __sysmmu_enable_nocount(data); + + dev_dbg(data->sysmmu, "Enabled\n"); + } else { + ret = (pgtable == data->pgtable) ? 1 : -EBUSY; + + dev_dbg(data->sysmmu, "already enabled\n"); + } + + if (WARN_ON(ret < 0)) + set_sysmmu_inactive(data); /* decrement count */ + + spin_unlock_irqrestore(&data->lock, flags); + + return ret; +} + +/* __exynos_sysmmu_enable: Enables System MMU + * + * returns -error if an error occurred and System MMU is not enabled, + * 0 if the System MMU has been just enabled and 1 if System MMU was already + * enabled before. + */ +static int __exynos_sysmmu_enable(struct device *dev, phys_addr_t pgtable, + struct iommu_domain *domain) +{ + int ret = 0; + unsigned long flags; + struct exynos_iommu_owner *owner = dev->archdata.iommu; + struct sysmmu_drvdata *data; + + BUG_ON(!has_sysmmu(dev)); + + spin_lock_irqsave(&owner->lock, flags); + + data = dev_get_drvdata(owner->sysmmu); + + ret = __sysmmu_enable(data, pgtable, domain); + if (ret >= 0) + data->master = dev; + + spin_unlock_irqrestore(&owner->lock, flags); + + return ret; +} + +int exynos_sysmmu_enable(struct device *dev, phys_addr_t pgtable) +{ + BUG_ON(!memblock_is_memory(pgtable)); + + return __exynos_sysmmu_enable(dev, pgtable, NULL); +} + +static bool exynos_sysmmu_disable(struct device *dev) +{ + unsigned long flags; + bool disabled = true; + struct exynos_iommu_owner *owner = dev->archdata.iommu; + struct sysmmu_drvdata *data; + + BUG_ON(!has_sysmmu(dev)); + + spin_lock_irqsave(&owner->lock, flags); + + data = dev_get_drvdata(owner->sysmmu); + + disabled = __sysmmu_disable(data); + if (disabled) + data->master = NULL; + + spin_unlock_irqrestore(&owner->lock, flags); + + return disabled; +} + +static void __sysmmu_tlb_invalidate_flpdcache(struct sysmmu_drvdata *data, + sysmmu_iova_t iova) +{ + if (__raw_sysmmu_version(data) == MAKE_MMU_VER(3, 3)) + __raw_writel(iova | 0x1, data->sfrbase + REG_MMU_FLUSH_ENTRY); +} + +static void sysmmu_tlb_invalidate_flpdcache(struct device *dev, + sysmmu_iova_t iova) +{ + unsigned long flags; + struct exynos_iommu_owner *owner = dev->archdata.iommu; + struct sysmmu_drvdata *data = dev_get_drvdata(owner->sysmmu); + + if (!IS_ERR(data->clk_master)) + clk_enable(data->clk_master); + + spin_lock_irqsave(&data->lock, flags); + if (is_sysmmu_active(data)) + __sysmmu_tlb_invalidate_flpdcache(data, iova); + spin_unlock_irqrestore(&data->lock, flags); + + if (!IS_ERR(data->clk_master)) + clk_disable(data->clk_master); +} + +static void sysmmu_tlb_invalidate_entry(struct device *dev, sysmmu_iova_t iova, + size_t size) +{ + struct exynos_iommu_owner *owner = dev->archdata.iommu; + unsigned long flags; + struct sysmmu_drvdata *data; + + data = dev_get_drvdata(owner->sysmmu); + + spin_lock_irqsave(&data->lock, flags); + if (is_sysmmu_active(data)) { + unsigned int num_inv = 1; + + if (!IS_ERR(data->clk_master)) + clk_enable(data->clk_master); + + /* + * L2TLB invalidation required + * 4KB page: 1 invalidation + * 64KB page: 16 invalidations + * 1MB page: 64 invalidations + * because it is set-associative TLB + * with 8-way and 64 sets. + * 1MB page can be cached in one of all sets. + * 64KB page can be one of 16 consecutive sets. + */ + if (MMU_MAJ_VER(__raw_sysmmu_version(data)) == 2) + num_inv = min_t(unsigned int, size / PAGE_SIZE, 64); + + if (sysmmu_block(data->sfrbase)) { + __sysmmu_tlb_invalidate_entry( + data->sfrbase, iova, num_inv); + sysmmu_unblock(data->sfrbase); + } + if (!IS_ERR(data->clk_master)) + clk_disable(data->clk_master); + } else { + dev_dbg(dev, "disabled. Skipping TLB invalidation @ %#x\n", + iova); + } + spin_unlock_irqrestore(&data->lock, flags); +} + +void exynos_sysmmu_tlb_invalidate(struct device *dev) +{ + struct exynos_iommu_owner *owner = dev->archdata.iommu; + unsigned long flags; + struct sysmmu_drvdata *data; + + data = dev_get_drvdata(owner->sysmmu); + + spin_lock_irqsave(&data->lock, flags); + if (is_sysmmu_active(data)) { + if (!IS_ERR(data->clk_master)) + clk_enable(data->clk_master); + if (sysmmu_block(data->sfrbase)) { + __sysmmu_tlb_invalidate(data->sfrbase); + sysmmu_unblock(data->sfrbase); + } + if (!IS_ERR(data->clk_master)) + clk_disable(data->clk_master); + } else { + dev_dbg(dev, "disabled. Skipping TLB invalidation\n"); + } + spin_unlock_irqrestore(&data->lock, flags); +} + +static int __init exynos_sysmmu_probe(struct platform_device *pdev) +{ + int irq, ret; + struct device *dev = &pdev->dev; + struct sysmmu_drvdata *data; + struct resource *res; + + data = devm_kzalloc(dev, sizeof(*data), GFP_KERNEL); + if (!data) + return -ENOMEM; + + res = platform_get_resource(pdev, IORESOURCE_MEM, 0); + data->sfrbase = devm_ioremap_resource(dev, res); + if (IS_ERR(data->sfrbase)) + return PTR_ERR(data->sfrbase); + + irq = platform_get_irq(pdev, 0); + if (irq <= 0) { + dev_err(dev, "Unable to find IRQ resource\n"); + return irq; + } + + ret = devm_request_irq(dev, irq, exynos_sysmmu_irq, 0, + dev_name(dev), data); + if (ret) { + dev_err(dev, "Unabled to register handler of irq %d\n", irq); + return ret; + } + + data->clk = devm_clk_get(dev, "sysmmu"); + if (IS_ERR(data->clk)) { + dev_err(dev, "Failed to get clock!\n"); + return PTR_ERR(data->clk); + } else { + ret = clk_prepare(data->clk); + if (ret) { + dev_err(dev, "Failed to prepare clk\n"); + return ret; + } + } + + data->clk_master = devm_clk_get(dev, "master"); + if (!IS_ERR(data->clk_master)) { + ret = clk_prepare(data->clk_master); + if (ret) { + clk_unprepare(data->clk); + dev_err(dev, "Failed to prepare master's clk\n"); + return ret; + } + } + + data->sysmmu = dev; + spin_lock_init(&data->lock); + + platform_set_drvdata(pdev, data); + + pm_runtime_enable(dev); + + return 0; +} + +static const struct of_device_id sysmmu_of_match[] __initconst = { + { .compatible = "samsung,exynos-sysmmu", }, + { }, +}; + +static struct platform_driver exynos_sysmmu_driver __refdata = { + .probe = exynos_sysmmu_probe, + .driver = { + .name = "exynos-sysmmu", + .of_match_table = sysmmu_of_match, + } +}; + +static inline void pgtable_flush(void *vastart, void *vaend) +{ + dmac_flush_range(vastart, vaend); + outer_flush_range(virt_to_phys(vastart), + virt_to_phys(vaend)); +} + +static struct iommu_domain *exynos_iommu_domain_alloc(unsigned type) +{ + struct exynos_iommu_domain *exynos_domain; + int i; + + if (type != IOMMU_DOMAIN_UNMANAGED) + return NULL; + + exynos_domain = kzalloc(sizeof(*exynos_domain), GFP_KERNEL); + if (!exynos_domain) + return NULL; + + exynos_domain->pgtable = (sysmmu_pte_t *)__get_free_pages(GFP_KERNEL, 2); + if (!exynos_domain->pgtable) + goto err_pgtable; + + exynos_domain->lv2entcnt = (short *)__get_free_pages(GFP_KERNEL | __GFP_ZERO, 1); + if (!exynos_domain->lv2entcnt) + goto err_counter; + + /* Workaround for System MMU v3.3 to prevent caching 1MiB mapping */ + for (i = 0; i < NUM_LV1ENTRIES; i += 8) { + exynos_domain->pgtable[i + 0] = ZERO_LV2LINK; + exynos_domain->pgtable[i + 1] = ZERO_LV2LINK; + exynos_domain->pgtable[i + 2] = ZERO_LV2LINK; + exynos_domain->pgtable[i + 3] = ZERO_LV2LINK; + exynos_domain->pgtable[i + 4] = ZERO_LV2LINK; + exynos_domain->pgtable[i + 5] = ZERO_LV2LINK; + exynos_domain->pgtable[i + 6] = ZERO_LV2LINK; + exynos_domain->pgtable[i + 7] = ZERO_LV2LINK; + } + + pgtable_flush(exynos_domain->pgtable, exynos_domain->pgtable + NUM_LV1ENTRIES); + + spin_lock_init(&exynos_domain->lock); + spin_lock_init(&exynos_domain->pgtablelock); + INIT_LIST_HEAD(&exynos_domain->clients); + + exynos_domain->domain.geometry.aperture_start = 0; + exynos_domain->domain.geometry.aperture_end = ~0UL; + exynos_domain->domain.geometry.force_aperture = true; + + return &exynos_domain->domain; + +err_counter: + free_pages((unsigned long)exynos_domain->pgtable, 2); +err_pgtable: + kfree(exynos_domain); + return NULL; +} + +static void exynos_iommu_domain_free(struct iommu_domain *domain) +{ + struct exynos_iommu_domain *priv = to_exynos_domain(domain); + struct exynos_iommu_owner *owner; + unsigned long flags; + int i; + + WARN_ON(!list_empty(&priv->clients)); + + spin_lock_irqsave(&priv->lock, flags); + + list_for_each_entry(owner, &priv->clients, client) { + while (!exynos_sysmmu_disable(owner->dev)) + ; /* until System MMU is actually disabled */ + } + + while (!list_empty(&priv->clients)) + list_del_init(priv->clients.next); + + spin_unlock_irqrestore(&priv->lock, flags); + + for (i = 0; i < NUM_LV1ENTRIES; i++) + if (lv1ent_page(priv->pgtable + i)) + kmem_cache_free(lv2table_kmem_cache, + phys_to_virt(lv2table_base(priv->pgtable + i))); + + free_pages((unsigned long)priv->pgtable, 2); + free_pages((unsigned long)priv->lv2entcnt, 1); + kfree(priv); +} + +static int exynos_iommu_attach_device(struct iommu_domain *domain, + struct device *dev) +{ + struct exynos_iommu_owner *owner = dev->archdata.iommu; + struct exynos_iommu_domain *priv = to_exynos_domain(domain); + phys_addr_t pagetable = virt_to_phys(priv->pgtable); + unsigned long flags; + int ret; + + spin_lock_irqsave(&priv->lock, flags); + + ret = __exynos_sysmmu_enable(dev, pagetable, domain); + if (ret == 0) { + list_add_tail(&owner->client, &priv->clients); + owner->domain = domain; + } + + spin_unlock_irqrestore(&priv->lock, flags); + + if (ret < 0) { + dev_err(dev, "%s: Failed to attach IOMMU with pgtable %pa\n", + __func__, &pagetable); + return ret; + } + + dev_dbg(dev, "%s: Attached IOMMU with pgtable %pa %s\n", + __func__, &pagetable, (ret == 0) ? "" : ", again"); + + return ret; +} + +static void exynos_iommu_detach_device(struct iommu_domain *domain, + struct device *dev) +{ + struct exynos_iommu_owner *owner; + struct exynos_iommu_domain *priv = to_exynos_domain(domain); + phys_addr_t pagetable = virt_to_phys(priv->pgtable); + unsigned long flags; + + spin_lock_irqsave(&priv->lock, flags); + + list_for_each_entry(owner, &priv->clients, client) { + if (owner == dev->archdata.iommu) { + if (exynos_sysmmu_disable(dev)) { + list_del_init(&owner->client); + owner->domain = NULL; + } + break; + } + } + + spin_unlock_irqrestore(&priv->lock, flags); + + if (owner == dev->archdata.iommu) + dev_dbg(dev, "%s: Detached IOMMU with pgtable %pa\n", + __func__, &pagetable); + else + dev_err(dev, "%s: No IOMMU is attached\n", __func__); +} + +static sysmmu_pte_t *alloc_lv2entry(struct exynos_iommu_domain *priv, + sysmmu_pte_t *sent, sysmmu_iova_t iova, short *pgcounter) +{ + if (lv1ent_section(sent)) { + WARN(1, "Trying mapping on %#08x mapped with 1MiB page", iova); + return ERR_PTR(-EADDRINUSE); + } + + if (lv1ent_fault(sent)) { + sysmmu_pte_t *pent; + bool need_flush_flpd_cache = lv1ent_zero(sent); + + pent = kmem_cache_zalloc(lv2table_kmem_cache, GFP_ATOMIC); + BUG_ON((unsigned int)pent & (LV2TABLE_SIZE - 1)); + if (!pent) + return ERR_PTR(-ENOMEM); + + *sent = mk_lv1ent_page(virt_to_phys(pent)); + *pgcounter = NUM_LV2ENTRIES; + pgtable_flush(pent, pent + NUM_LV2ENTRIES); + pgtable_flush(sent, sent + 1); + + /* + * If pre-fetched SLPD is a faulty SLPD in zero_l2_table, + * FLPD cache may cache the address of zero_l2_table. This + * function replaces the zero_l2_table with new L2 page table + * to write valid mappings. + * Accessing the valid area may cause page fault since FLPD + * cache may still cache zero_l2_table for the valid area + * instead of new L2 page table that has the mapping + * information of the valid area. + * Thus any replacement of zero_l2_table with other valid L2 + * page table must involve FLPD cache invalidation for System + * MMU v3.3. + * FLPD cache invalidation is performed with TLB invalidation + * by VPN without blocking. It is safe to invalidate TLB without + * blocking because the target address of TLB invalidation is + * not currently mapped. + */ + if (need_flush_flpd_cache) { + struct exynos_iommu_owner *owner; + + spin_lock(&priv->lock); + list_for_each_entry(owner, &priv->clients, client) + sysmmu_tlb_invalidate_flpdcache( + owner->dev, iova); + spin_unlock(&priv->lock); + } + } + + return page_entry(sent, iova); +} + +static int lv1set_section(struct exynos_iommu_domain *priv, + sysmmu_pte_t *sent, sysmmu_iova_t iova, + phys_addr_t paddr, short *pgcnt) +{ + if (lv1ent_section(sent)) { + WARN(1, "Trying mapping on 1MiB@%#08x that is mapped", + iova); + return -EADDRINUSE; + } + + if (lv1ent_page(sent)) { + if (*pgcnt != NUM_LV2ENTRIES) { + WARN(1, "Trying mapping on 1MiB@%#08x that is mapped", + iova); + return -EADDRINUSE; + } + + kmem_cache_free(lv2table_kmem_cache, page_entry(sent, 0)); + *pgcnt = 0; + } + + *sent = mk_lv1ent_sect(paddr); + + pgtable_flush(sent, sent + 1); + + spin_lock(&priv->lock); + if (lv1ent_page_zero(sent)) { + struct exynos_iommu_owner *owner; + /* + * Flushing FLPD cache in System MMU v3.3 that may cache a FLPD + * entry by speculative prefetch of SLPD which has no mapping. + */ + list_for_each_entry(owner, &priv->clients, client) + sysmmu_tlb_invalidate_flpdcache(owner->dev, iova); + } + spin_unlock(&priv->lock); + + return 0; +} + +static int lv2set_page(sysmmu_pte_t *pent, phys_addr_t paddr, size_t size, + short *pgcnt) +{ + if (size == SPAGE_SIZE) { + if (WARN_ON(!lv2ent_fault(pent))) + return -EADDRINUSE; + + *pent = mk_lv2ent_spage(paddr); + pgtable_flush(pent, pent + 1); + *pgcnt -= 1; + } else { /* size == LPAGE_SIZE */ + int i; + + for (i = 0; i < SPAGES_PER_LPAGE; i++, pent++) { + if (WARN_ON(!lv2ent_fault(pent))) { + if (i > 0) + memset(pent - i, 0, sizeof(*pent) * i); + return -EADDRINUSE; + } + + *pent = mk_lv2ent_lpage(paddr); + } + pgtable_flush(pent - SPAGES_PER_LPAGE, pent); + *pgcnt -= SPAGES_PER_LPAGE; + } + + return 0; +} + +/* + * *CAUTION* to the I/O virtual memory managers that support exynos-iommu: + * + * System MMU v3.x has advanced logic to improve address translation + * performance with caching more page table entries by a page table walk. + * However, the logic has a bug that while caching faulty page table entries, + * System MMU reports page fault if the cached fault entry is hit even though + * the fault entry is updated to a valid entry after the entry is cached. + * To prevent caching faulty page table entries which may be updated to valid + * entries later, the virtual memory manager should care about the workaround + * for the problem. The following describes the workaround. + * + * Any two consecutive I/O virtual address regions must have a hole of 128KiB + * at maximum to prevent misbehavior of System MMU 3.x (workaround for h/w bug). + * + * Precisely, any start address of I/O virtual region must be aligned with + * the following sizes for System MMU v3.1 and v3.2. + * System MMU v3.1: 128KiB + * System MMU v3.2: 256KiB + * + * Because System MMU v3.3 caches page table entries more aggressively, it needs + * more workarounds. + * - Any two consecutive I/O virtual regions must have a hole of size larger + * than or equal to 128KiB. + * - Start address of an I/O virtual region must be aligned by 128KiB. + */ +static int exynos_iommu_map(struct iommu_domain *domain, unsigned long l_iova, + phys_addr_t paddr, size_t size, int prot) +{ + struct exynos_iommu_domain *priv = to_exynos_domain(domain); + sysmmu_pte_t *entry; + sysmmu_iova_t iova = (sysmmu_iova_t)l_iova; + unsigned long flags; + int ret = -ENOMEM; + + BUG_ON(priv->pgtable == NULL); + + spin_lock_irqsave(&priv->pgtablelock, flags); + + entry = section_entry(priv->pgtable, iova); + + if (size == SECT_SIZE) { + ret = lv1set_section(priv, entry, iova, paddr, + &priv->lv2entcnt[lv1ent_offset(iova)]); + } else { + sysmmu_pte_t *pent; + + pent = alloc_lv2entry(priv, entry, iova, + &priv->lv2entcnt[lv1ent_offset(iova)]); + + if (IS_ERR(pent)) + ret = PTR_ERR(pent); + else + ret = lv2set_page(pent, paddr, size, + &priv->lv2entcnt[lv1ent_offset(iova)]); + } + + if (ret) + pr_err("%s: Failed(%d) to map %#zx bytes @ %#x\n", + __func__, ret, size, iova); + + spin_unlock_irqrestore(&priv->pgtablelock, flags); + + return ret; +} + +static void exynos_iommu_tlb_invalidate_entry(struct exynos_iommu_domain *priv, + sysmmu_iova_t iova, size_t size) +{ + struct exynos_iommu_owner *owner; + unsigned long flags; + + spin_lock_irqsave(&priv->lock, flags); + + list_for_each_entry(owner, &priv->clients, client) + sysmmu_tlb_invalidate_entry(owner->dev, iova, size); + + spin_unlock_irqrestore(&priv->lock, flags); +} + +static size_t exynos_iommu_unmap(struct iommu_domain *domain, + unsigned long l_iova, size_t size) +{ + struct exynos_iommu_domain *priv = to_exynos_domain(domain); + sysmmu_iova_t iova = (sysmmu_iova_t)l_iova; + sysmmu_pte_t *ent; + size_t err_pgsize; + unsigned long flags; + + BUG_ON(priv->pgtable == NULL); + + spin_lock_irqsave(&priv->pgtablelock, flags); + + ent = section_entry(priv->pgtable, iova); + + if (lv1ent_section(ent)) { + if (WARN_ON(size < SECT_SIZE)) { + err_pgsize = SECT_SIZE; + goto err; + } + + /* workaround for h/w bug in System MMU v3.3 */ + *ent = ZERO_LV2LINK; + pgtable_flush(ent, ent + 1); + size = SECT_SIZE; + goto done; + } + + if (unlikely(lv1ent_fault(ent))) { + if (size > SECT_SIZE) + size = SECT_SIZE; + goto done; + } + + /* lv1ent_page(sent) == true here */ + + ent = page_entry(ent, iova); + + if (unlikely(lv2ent_fault(ent))) { + size = SPAGE_SIZE; + goto done; + } + + if (lv2ent_small(ent)) { + *ent = 0; + size = SPAGE_SIZE; + pgtable_flush(ent, ent + 1); + priv->lv2entcnt[lv1ent_offset(iova)] += 1; + goto done; + } + + /* lv1ent_large(ent) == true here */ + if (WARN_ON(size < LPAGE_SIZE)) { + err_pgsize = LPAGE_SIZE; + goto err; + } + + memset(ent, 0, sizeof(*ent) * SPAGES_PER_LPAGE); + pgtable_flush(ent, ent + SPAGES_PER_LPAGE); + + size = LPAGE_SIZE; + priv->lv2entcnt[lv1ent_offset(iova)] += SPAGES_PER_LPAGE; +done: + spin_unlock_irqrestore(&priv->pgtablelock, flags); + + exynos_iommu_tlb_invalidate_entry(priv, iova, size); + + return size; +err: + spin_unlock_irqrestore(&priv->pgtablelock, flags); + + pr_err("%s: Failed: size(%#zx) @ %#x is smaller than page size %#zx\n", + __func__, size, iova, err_pgsize); + + return 0; +} + +static phys_addr_t exynos_iommu_iova_to_phys(struct iommu_domain *domain, + dma_addr_t iova) +{ + struct exynos_iommu_domain *priv = to_exynos_domain(domain); + sysmmu_pte_t *entry; + unsigned long flags; + phys_addr_t phys = 0; + + spin_lock_irqsave(&priv->pgtablelock, flags); + + entry = section_entry(priv->pgtable, iova); + + if (lv1ent_section(entry)) { + phys = section_phys(entry) + section_offs(iova); + } else if (lv1ent_page(entry)) { + entry = page_entry(entry, iova); + + if (lv2ent_large(entry)) + phys = lpage_phys(entry) + lpage_offs(iova); + else if (lv2ent_small(entry)) + phys = spage_phys(entry) + spage_offs(iova); + } + + spin_unlock_irqrestore(&priv->pgtablelock, flags); + + return phys; +} + +static int exynos_iommu_add_device(struct device *dev) +{ + struct iommu_group *group; + int ret; + + group = iommu_group_get(dev); + + if (!group) { + group = iommu_group_alloc(); + if (IS_ERR(group)) { + dev_err(dev, "Failed to allocate IOMMU group\n"); + return PTR_ERR(group); + } + } + + ret = iommu_group_add_device(group, dev); + iommu_group_put(group); + + return ret; +} + +static void exynos_iommu_remove_device(struct device *dev) +{ + iommu_group_remove_device(dev); +} + +static const struct iommu_ops exynos_iommu_ops = { + .domain_alloc = exynos_iommu_domain_alloc, + .domain_free = exynos_iommu_domain_free, + .attach_dev = exynos_iommu_attach_device, + .detach_dev = exynos_iommu_detach_device, + .map = exynos_iommu_map, + .unmap = exynos_iommu_unmap, + .map_sg = default_iommu_map_sg, + .iova_to_phys = exynos_iommu_iova_to_phys, + .add_device = exynos_iommu_add_device, + .remove_device = exynos_iommu_remove_device, + .pgsize_bitmap = SECT_SIZE | LPAGE_SIZE | SPAGE_SIZE, +}; + +static int __init exynos_iommu_init(void) +{ + struct device_node *np; + int ret; + + np = of_find_matching_node(NULL, sysmmu_of_match); + if (!np) + return 0; + + of_node_put(np); + + lv2table_kmem_cache = kmem_cache_create("exynos-iommu-lv2table", + LV2TABLE_SIZE, LV2TABLE_SIZE, 0, NULL); + if (!lv2table_kmem_cache) { + pr_err("%s: Failed to create kmem cache\n", __func__); + return -ENOMEM; + } + + ret = platform_driver_register(&exynos_sysmmu_driver); + if (ret) { + pr_err("%s: Failed to register driver\n", __func__); + goto err_reg_driver; + } + + zero_lv2_table = kmem_cache_zalloc(lv2table_kmem_cache, GFP_KERNEL); + if (zero_lv2_table == NULL) { + pr_err("%s: Failed to allocate zero level2 page table\n", + __func__); + ret = -ENOMEM; + goto err_zero_lv2; + } + + ret = bus_set_iommu(&platform_bus_type, &exynos_iommu_ops); + if (ret) { + pr_err("%s: Failed to register exynos-iommu driver.\n", + __func__); + goto err_set_iommu; + } + + return 0; +err_set_iommu: + kmem_cache_free(lv2table_kmem_cache, zero_lv2_table); +err_zero_lv2: + platform_driver_unregister(&exynos_sysmmu_driver); +err_reg_driver: + kmem_cache_destroy(lv2table_kmem_cache); + return ret; +} +subsys_initcall(exynos_iommu_init); diff --git a/drivers/iommu/fsl_pamu.c b/drivers/iommu/fsl_pamu.c new file mode 100644 index 000000000..abeedc9a7 --- /dev/null +++ b/drivers/iommu/fsl_pamu.c @@ -0,0 +1,1283 @@ +/* + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License, version 2, as + * published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. + * + * Copyright (C) 2013 Freescale Semiconductor, Inc. + * + */ + +#define pr_fmt(fmt) "fsl-pamu: %s: " fmt, __func__ + +#include "fsl_pamu.h" + +#include <linux/interrupt.h> +#include <linux/genalloc.h> + +#include <asm/mpc85xx.h> +#include <asm/fsl_guts.h> + +/* define indexes for each operation mapping scenario */ +#define OMI_QMAN 0x00 +#define OMI_FMAN 0x01 +#define OMI_QMAN_PRIV 0x02 +#define OMI_CAAM 0x03 + +#define make64(high, low) (((u64)(high) << 32) | (low)) + +struct pamu_isr_data { + void __iomem *pamu_reg_base; /* Base address of PAMU regs */ + unsigned int count; /* The number of PAMUs */ +}; + +static struct paace *ppaact; +static struct paace *spaact; +static struct ome *omt __initdata; + +/* + * Table for matching compatible strings, for device tree + * guts node, for QorIQ SOCs. + * "fsl,qoriq-device-config-2.0" corresponds to T4 & B4 + * SOCs. For the older SOCs "fsl,qoriq-device-config-1.0" + * string would be used. + */ +static const struct of_device_id guts_device_ids[] __initconst = { + { .compatible = "fsl,qoriq-device-config-1.0", }, + { .compatible = "fsl,qoriq-device-config-2.0", }, + {} +}; + +/* + * Table for matching compatible strings, for device tree + * L3 cache controller node. + * "fsl,t4240-l3-cache-controller" corresponds to T4, + * "fsl,b4860-l3-cache-controller" corresponds to B4 & + * "fsl,p4080-l3-cache-controller" corresponds to other, + * SOCs. + */ +static const struct of_device_id l3_device_ids[] = { + { .compatible = "fsl,t4240-l3-cache-controller", }, + { .compatible = "fsl,b4860-l3-cache-controller", }, + { .compatible = "fsl,p4080-l3-cache-controller", }, + {} +}; + +/* maximum subwindows permitted per liodn */ +static u32 max_subwindow_count; + +/* Pool for fspi allocation */ +static struct gen_pool *spaace_pool; + +/** + * pamu_get_max_subwin_cnt() - Return the maximum supported + * subwindow count per liodn. + * + */ +u32 pamu_get_max_subwin_cnt(void) +{ + return max_subwindow_count; +} + +/** + * pamu_get_ppaace() - Return the primary PACCE + * @liodn: liodn PAACT index for desired PAACE + * + * Returns the ppace pointer upon success else return + * null. + */ +static struct paace *pamu_get_ppaace(int liodn) +{ + if (!ppaact || liodn >= PAACE_NUMBER_ENTRIES) { + pr_debug("PPAACT doesn't exist\n"); + return NULL; + } + + return &ppaact[liodn]; +} + +/** + * pamu_enable_liodn() - Set valid bit of PACCE + * @liodn: liodn PAACT index for desired PAACE + * + * Returns 0 upon success else error code < 0 returned + */ +int pamu_enable_liodn(int liodn) +{ + struct paace *ppaace; + + ppaace = pamu_get_ppaace(liodn); + if (!ppaace) { + pr_debug("Invalid primary paace entry\n"); + return -ENOENT; + } + + if (!get_bf(ppaace->addr_bitfields, PPAACE_AF_WSE)) { + pr_debug("liodn %d not configured\n", liodn); + return -EINVAL; + } + + /* Ensure that all other stores to the ppaace complete first */ + mb(); + + set_bf(ppaace->addr_bitfields, PAACE_AF_V, PAACE_V_VALID); + mb(); + + return 0; +} + +/** + * pamu_disable_liodn() - Clears valid bit of PACCE + * @liodn: liodn PAACT index for desired PAACE + * + * Returns 0 upon success else error code < 0 returned + */ +int pamu_disable_liodn(int liodn) +{ + struct paace *ppaace; + + ppaace = pamu_get_ppaace(liodn); + if (!ppaace) { + pr_debug("Invalid primary paace entry\n"); + return -ENOENT; + } + + set_bf(ppaace->addr_bitfields, PAACE_AF_V, PAACE_V_INVALID); + mb(); + + return 0; +} + +/* Derive the window size encoding for a particular PAACE entry */ +static unsigned int map_addrspace_size_to_wse(phys_addr_t addrspace_size) +{ + /* Bug if not a power of 2 */ + BUG_ON(addrspace_size & (addrspace_size - 1)); + + /* window size is 2^(WSE+1) bytes */ + return fls64(addrspace_size) - 2; +} + +/* Derive the PAACE window count encoding for the subwindow count */ +static unsigned int map_subwindow_cnt_to_wce(u32 subwindow_cnt) +{ + /* window count is 2^(WCE+1) bytes */ + return __ffs(subwindow_cnt) - 1; +} + +/* + * Set the PAACE type as primary and set the coherency required domain + * attribute + */ +static void pamu_init_ppaace(struct paace *ppaace) +{ + set_bf(ppaace->addr_bitfields, PAACE_AF_PT, PAACE_PT_PRIMARY); + + set_bf(ppaace->domain_attr.to_host.coherency_required, PAACE_DA_HOST_CR, + PAACE_M_COHERENCE_REQ); +} + +/* + * Set the PAACE type as secondary and set the coherency required domain + * attribute. + */ +static void pamu_init_spaace(struct paace *spaace) +{ + set_bf(spaace->addr_bitfields, PAACE_AF_PT, PAACE_PT_SECONDARY); + set_bf(spaace->domain_attr.to_host.coherency_required, PAACE_DA_HOST_CR, + PAACE_M_COHERENCE_REQ); +} + +/* + * Return the spaace (corresponding to the secondary window index) + * for a particular ppaace. + */ +static struct paace *pamu_get_spaace(struct paace *paace, u32 wnum) +{ + u32 subwin_cnt; + struct paace *spaace = NULL; + + subwin_cnt = 1UL << (get_bf(paace->impl_attr, PAACE_IA_WCE) + 1); + + if (wnum < subwin_cnt) + spaace = &spaact[paace->fspi + wnum]; + else + pr_debug("secondary paace out of bounds\n"); + + return spaace; +} + +/** + * pamu_get_fspi_and_allocate() - Allocates fspi index and reserves subwindows + * required for primary PAACE in the secondary + * PAACE table. + * @subwin_cnt: Number of subwindows to be reserved. + * + * A PPAACE entry may have a number of associated subwindows. A subwindow + * corresponds to a SPAACE entry in the SPAACT table. Each PAACE entry stores + * the index (fspi) of the first SPAACE entry in the SPAACT table. This + * function returns the index of the first SPAACE entry. The remaining + * SPAACE entries are reserved contiguously from that index. + * + * Returns a valid fspi index in the range of 0 - SPAACE_NUMBER_ENTRIES on success. + * If no SPAACE entry is available or the allocator can not reserve the required + * number of contiguous entries function returns ULONG_MAX indicating a failure. + * + */ +static unsigned long pamu_get_fspi_and_allocate(u32 subwin_cnt) +{ + unsigned long spaace_addr; + + spaace_addr = gen_pool_alloc(spaace_pool, subwin_cnt * sizeof(struct paace)); + if (!spaace_addr) + return ULONG_MAX; + + return (spaace_addr - (unsigned long)spaact) / (sizeof(struct paace)); +} + +/* Release the subwindows reserved for a particular LIODN */ +void pamu_free_subwins(int liodn) +{ + struct paace *ppaace; + u32 subwin_cnt, size; + + ppaace = pamu_get_ppaace(liodn); + if (!ppaace) { + pr_debug("Invalid liodn entry\n"); + return; + } + + if (get_bf(ppaace->addr_bitfields, PPAACE_AF_MW)) { + subwin_cnt = 1UL << (get_bf(ppaace->impl_attr, PAACE_IA_WCE) + 1); + size = (subwin_cnt - 1) * sizeof(struct paace); + gen_pool_free(spaace_pool, (unsigned long)&spaact[ppaace->fspi], size); + set_bf(ppaace->addr_bitfields, PPAACE_AF_MW, 0); + } +} + +/* + * Function used for updating stash destination for the coressponding + * LIODN. + */ +int pamu_update_paace_stash(int liodn, u32 subwin, u32 value) +{ + struct paace *paace; + + paace = pamu_get_ppaace(liodn); + if (!paace) { + pr_debug("Invalid liodn entry\n"); + return -ENOENT; + } + if (subwin) { + paace = pamu_get_spaace(paace, subwin - 1); + if (!paace) + return -ENOENT; + } + set_bf(paace->impl_attr, PAACE_IA_CID, value); + + mb(); + + return 0; +} + +/* Disable a subwindow corresponding to the LIODN */ +int pamu_disable_spaace(int liodn, u32 subwin) +{ + struct paace *paace; + + paace = pamu_get_ppaace(liodn); + if (!paace) { + pr_debug("Invalid liodn entry\n"); + return -ENOENT; + } + if (subwin) { + paace = pamu_get_spaace(paace, subwin - 1); + if (!paace) + return -ENOENT; + set_bf(paace->addr_bitfields, PAACE_AF_V, PAACE_V_INVALID); + } else { + set_bf(paace->addr_bitfields, PAACE_AF_AP, + PAACE_AP_PERMS_DENIED); + } + + mb(); + + return 0; +} + +/** + * pamu_config_paace() - Sets up PPAACE entry for specified liodn + * + * @liodn: Logical IO device number + * @win_addr: starting address of DSA window + * @win-size: size of DSA window + * @omi: Operation mapping index -- if ~omi == 0 then omi not defined + * @rpn: real (true physical) page number + * @stashid: cache stash id for associated cpu -- if ~stashid == 0 then + * stashid not defined + * @snoopid: snoop id for hardware coherency -- if ~snoopid == 0 then + * snoopid not defined + * @subwin_cnt: number of sub-windows + * @prot: window permissions + * + * Returns 0 upon success else error code < 0 returned + */ +int pamu_config_ppaace(int liodn, phys_addr_t win_addr, phys_addr_t win_size, + u32 omi, unsigned long rpn, u32 snoopid, u32 stashid, + u32 subwin_cnt, int prot) +{ + struct paace *ppaace; + unsigned long fspi; + + if ((win_size & (win_size - 1)) || win_size < PAMU_PAGE_SIZE) { + pr_debug("window size too small or not a power of two %pa\n", + &win_size); + return -EINVAL; + } + + if (win_addr & (win_size - 1)) { + pr_debug("window address is not aligned with window size\n"); + return -EINVAL; + } + + ppaace = pamu_get_ppaace(liodn); + if (!ppaace) + return -ENOENT; + + /* window size is 2^(WSE+1) bytes */ + set_bf(ppaace->addr_bitfields, PPAACE_AF_WSE, + map_addrspace_size_to_wse(win_size)); + + pamu_init_ppaace(ppaace); + + ppaace->wbah = win_addr >> (PAMU_PAGE_SHIFT + 20); + set_bf(ppaace->addr_bitfields, PPAACE_AF_WBAL, + (win_addr >> PAMU_PAGE_SHIFT)); + + /* set up operation mapping if it's configured */ + if (omi < OME_NUMBER_ENTRIES) { + set_bf(ppaace->impl_attr, PAACE_IA_OTM, PAACE_OTM_INDEXED); + ppaace->op_encode.index_ot.omi = omi; + } else if (~omi != 0) { + pr_debug("bad operation mapping index: %d\n", omi); + return -EINVAL; + } + + /* configure stash id */ + if (~stashid != 0) + set_bf(ppaace->impl_attr, PAACE_IA_CID, stashid); + + /* configure snoop id */ + if (~snoopid != 0) + ppaace->domain_attr.to_host.snpid = snoopid; + + if (subwin_cnt) { + /* The first entry is in the primary PAACE instead */ + fspi = pamu_get_fspi_and_allocate(subwin_cnt - 1); + if (fspi == ULONG_MAX) { + pr_debug("spaace indexes exhausted\n"); + return -EINVAL; + } + + /* window count is 2^(WCE+1) bytes */ + set_bf(ppaace->impl_attr, PAACE_IA_WCE, + map_subwindow_cnt_to_wce(subwin_cnt)); + set_bf(ppaace->addr_bitfields, PPAACE_AF_MW, 0x1); + ppaace->fspi = fspi; + } else { + set_bf(ppaace->impl_attr, PAACE_IA_ATM, PAACE_ATM_WINDOW_XLATE); + ppaace->twbah = rpn >> 20; + set_bf(ppaace->win_bitfields, PAACE_WIN_TWBAL, rpn); + set_bf(ppaace->addr_bitfields, PAACE_AF_AP, prot); + set_bf(ppaace->impl_attr, PAACE_IA_WCE, 0); + set_bf(ppaace->addr_bitfields, PPAACE_AF_MW, 0); + } + mb(); + + return 0; +} + +/** + * pamu_config_spaace() - Sets up SPAACE entry for specified subwindow + * + * @liodn: Logical IO device number + * @subwin_cnt: number of sub-windows associated with dma-window + * @subwin: subwindow index + * @subwin_size: size of subwindow + * @omi: Operation mapping index + * @rpn: real (true physical) page number + * @snoopid: snoop id for hardware coherency -- if ~snoopid == 0 then + * snoopid not defined + * @stashid: cache stash id for associated cpu + * @enable: enable/disable subwindow after reconfiguration + * @prot: sub window permissions + * + * Returns 0 upon success else error code < 0 returned + */ +int pamu_config_spaace(int liodn, u32 subwin_cnt, u32 subwin, + phys_addr_t subwin_size, u32 omi, unsigned long rpn, + u32 snoopid, u32 stashid, int enable, int prot) +{ + struct paace *paace; + + /* setup sub-windows */ + if (!subwin_cnt) { + pr_debug("Invalid subwindow count\n"); + return -EINVAL; + } + + paace = pamu_get_ppaace(liodn); + if (subwin > 0 && subwin < subwin_cnt && paace) { + paace = pamu_get_spaace(paace, subwin - 1); + + if (paace && !(paace->addr_bitfields & PAACE_V_VALID)) { + pamu_init_spaace(paace); + set_bf(paace->addr_bitfields, SPAACE_AF_LIODN, liodn); + } + } + + if (!paace) { + pr_debug("Invalid liodn entry\n"); + return -ENOENT; + } + + if ((subwin_size & (subwin_size - 1)) || subwin_size < PAMU_PAGE_SIZE) { + pr_debug("subwindow size out of range, or not a power of 2\n"); + return -EINVAL; + } + + if (rpn == ULONG_MAX) { + pr_debug("real page number out of range\n"); + return -EINVAL; + } + + /* window size is 2^(WSE+1) bytes */ + set_bf(paace->win_bitfields, PAACE_WIN_SWSE, + map_addrspace_size_to_wse(subwin_size)); + + set_bf(paace->impl_attr, PAACE_IA_ATM, PAACE_ATM_WINDOW_XLATE); + paace->twbah = rpn >> 20; + set_bf(paace->win_bitfields, PAACE_WIN_TWBAL, rpn); + set_bf(paace->addr_bitfields, PAACE_AF_AP, prot); + + /* configure snoop id */ + if (~snoopid != 0) + paace->domain_attr.to_host.snpid = snoopid; + + /* set up operation mapping if it's configured */ + if (omi < OME_NUMBER_ENTRIES) { + set_bf(paace->impl_attr, PAACE_IA_OTM, PAACE_OTM_INDEXED); + paace->op_encode.index_ot.omi = omi; + } else if (~omi != 0) { + pr_debug("bad operation mapping index: %d\n", omi); + return -EINVAL; + } + + if (~stashid != 0) + set_bf(paace->impl_attr, PAACE_IA_CID, stashid); + + smp_wmb(); + + if (enable) + set_bf(paace->addr_bitfields, PAACE_AF_V, PAACE_V_VALID); + + mb(); + + return 0; +} + +/** + * get_ome_index() - Returns the index in the operation mapping table + * for device. + * @*omi_index: pointer for storing the index value + * + */ +void get_ome_index(u32 *omi_index, struct device *dev) +{ + if (of_device_is_compatible(dev->of_node, "fsl,qman-portal")) + *omi_index = OMI_QMAN; + if (of_device_is_compatible(dev->of_node, "fsl,qman")) + *omi_index = OMI_QMAN_PRIV; +} + +/** + * get_stash_id - Returns stash destination id corresponding to a + * cache type and vcpu. + * @stash_dest_hint: L1, L2 or L3 + * @vcpu: vpcu target for a particular cache type. + * + * Returs stash on success or ~(u32)0 on failure. + * + */ +u32 get_stash_id(u32 stash_dest_hint, u32 vcpu) +{ + const u32 *prop; + struct device_node *node; + u32 cache_level; + int len, found = 0; + int i; + + /* Fastpath, exit early if L3/CPC cache is target for stashing */ + if (stash_dest_hint == PAMU_ATTR_CACHE_L3) { + node = of_find_matching_node(NULL, l3_device_ids); + if (node) { + prop = of_get_property(node, "cache-stash-id", NULL); + if (!prop) { + pr_debug("missing cache-stash-id at %s\n", + node->full_name); + of_node_put(node); + return ~(u32)0; + } + of_node_put(node); + return be32_to_cpup(prop); + } + return ~(u32)0; + } + + for_each_node_by_type(node, "cpu") { + prop = of_get_property(node, "reg", &len); + for (i = 0; i < len / sizeof(u32); i++) { + if (be32_to_cpup(&prop[i]) == vcpu) { + found = 1; + goto found_cpu_node; + } + } + } +found_cpu_node: + + /* find the hwnode that represents the cache */ + for (cache_level = PAMU_ATTR_CACHE_L1; (cache_level < PAMU_ATTR_CACHE_L3) && found; cache_level++) { + if (stash_dest_hint == cache_level) { + prop = of_get_property(node, "cache-stash-id", NULL); + if (!prop) { + pr_debug("missing cache-stash-id at %s\n", + node->full_name); + of_node_put(node); + return ~(u32)0; + } + of_node_put(node); + return be32_to_cpup(prop); + } + + prop = of_get_property(node, "next-level-cache", NULL); + if (!prop) { + pr_debug("can't find next-level-cache at %s\n", + node->full_name); + of_node_put(node); + return ~(u32)0; /* can't traverse any further */ + } + of_node_put(node); + + /* advance to next node in cache hierarchy */ + node = of_find_node_by_phandle(*prop); + if (!node) { + pr_debug("Invalid node for cache hierarchy\n"); + return ~(u32)0; + } + } + + pr_debug("stash dest not found for %d on vcpu %d\n", + stash_dest_hint, vcpu); + return ~(u32)0; +} + +/* Identify if the PAACT table entry belongs to QMAN, BMAN or QMAN Portal */ +#define QMAN_PAACE 1 +#define QMAN_PORTAL_PAACE 2 +#define BMAN_PAACE 3 + +/** + * Setup operation mapping and stash destinations for QMAN and QMAN portal. + * Memory accesses to QMAN and BMAN private memory need not be coherent, so + * clear the PAACE entry coherency attribute for them. + */ +static void __init setup_qbman_paace(struct paace *ppaace, int paace_type) +{ + switch (paace_type) { + case QMAN_PAACE: + set_bf(ppaace->impl_attr, PAACE_IA_OTM, PAACE_OTM_INDEXED); + ppaace->op_encode.index_ot.omi = OMI_QMAN_PRIV; + /* setup QMAN Private data stashing for the L3 cache */ + set_bf(ppaace->impl_attr, PAACE_IA_CID, get_stash_id(PAMU_ATTR_CACHE_L3, 0)); + set_bf(ppaace->domain_attr.to_host.coherency_required, PAACE_DA_HOST_CR, + 0); + break; + case QMAN_PORTAL_PAACE: + set_bf(ppaace->impl_attr, PAACE_IA_OTM, PAACE_OTM_INDEXED); + ppaace->op_encode.index_ot.omi = OMI_QMAN; + /* Set DQRR and Frame stashing for the L3 cache */ + set_bf(ppaace->impl_attr, PAACE_IA_CID, get_stash_id(PAMU_ATTR_CACHE_L3, 0)); + break; + case BMAN_PAACE: + set_bf(ppaace->domain_attr.to_host.coherency_required, PAACE_DA_HOST_CR, + 0); + break; + } +} + +/** + * Setup the operation mapping table for various devices. This is a static + * table where each table index corresponds to a particular device. PAMU uses + * this table to translate device transaction to appropriate corenet + * transaction. + */ +static void __init setup_omt(struct ome *omt) +{ + struct ome *ome; + + /* Configure OMI_QMAN */ + ome = &omt[OMI_QMAN]; + + ome->moe[IOE_READ_IDX] = EOE_VALID | EOE_READ; + ome->moe[IOE_EREAD0_IDX] = EOE_VALID | EOE_RSA; + ome->moe[IOE_WRITE_IDX] = EOE_VALID | EOE_WRITE; + ome->moe[IOE_EWRITE0_IDX] = EOE_VALID | EOE_WWSAO; + + ome->moe[IOE_DIRECT0_IDX] = EOE_VALID | EOE_LDEC; + ome->moe[IOE_DIRECT1_IDX] = EOE_VALID | EOE_LDECPE; + + /* Configure OMI_FMAN */ + ome = &omt[OMI_FMAN]; + ome->moe[IOE_READ_IDX] = EOE_VALID | EOE_READI; + ome->moe[IOE_WRITE_IDX] = EOE_VALID | EOE_WRITE; + + /* Configure OMI_QMAN private */ + ome = &omt[OMI_QMAN_PRIV]; + ome->moe[IOE_READ_IDX] = EOE_VALID | EOE_READ; + ome->moe[IOE_WRITE_IDX] = EOE_VALID | EOE_WRITE; + ome->moe[IOE_EREAD0_IDX] = EOE_VALID | EOE_RSA; + ome->moe[IOE_EWRITE0_IDX] = EOE_VALID | EOE_WWSA; + + /* Configure OMI_CAAM */ + ome = &omt[OMI_CAAM]; + ome->moe[IOE_READ_IDX] = EOE_VALID | EOE_READI; + ome->moe[IOE_WRITE_IDX] = EOE_VALID | EOE_WRITE; +} + +/* + * Get the maximum number of PAACT table entries + * and subwindows supported by PAMU + */ +static void __init get_pamu_cap_values(unsigned long pamu_reg_base) +{ + u32 pc_val; + + pc_val = in_be32((u32 *)(pamu_reg_base + PAMU_PC3)); + /* Maximum number of subwindows per liodn */ + max_subwindow_count = 1 << (1 + PAMU_PC3_MWCE(pc_val)); +} + +/* Setup PAMU registers pointing to PAACT, SPAACT and OMT */ +static int __init setup_one_pamu(unsigned long pamu_reg_base, unsigned long pamu_reg_size, + phys_addr_t ppaact_phys, phys_addr_t spaact_phys, + phys_addr_t omt_phys) +{ + u32 *pc; + struct pamu_mmap_regs *pamu_regs; + + pc = (u32 *) (pamu_reg_base + PAMU_PC); + pamu_regs = (struct pamu_mmap_regs *) + (pamu_reg_base + PAMU_MMAP_REGS_BASE); + + /* set up pointers to corenet control blocks */ + + out_be32(&pamu_regs->ppbah, upper_32_bits(ppaact_phys)); + out_be32(&pamu_regs->ppbal, lower_32_bits(ppaact_phys)); + ppaact_phys = ppaact_phys + PAACT_SIZE; + out_be32(&pamu_regs->pplah, upper_32_bits(ppaact_phys)); + out_be32(&pamu_regs->pplal, lower_32_bits(ppaact_phys)); + + out_be32(&pamu_regs->spbah, upper_32_bits(spaact_phys)); + out_be32(&pamu_regs->spbal, lower_32_bits(spaact_phys)); + spaact_phys = spaact_phys + SPAACT_SIZE; + out_be32(&pamu_regs->splah, upper_32_bits(spaact_phys)); + out_be32(&pamu_regs->splal, lower_32_bits(spaact_phys)); + + out_be32(&pamu_regs->obah, upper_32_bits(omt_phys)); + out_be32(&pamu_regs->obal, lower_32_bits(omt_phys)); + omt_phys = omt_phys + OMT_SIZE; + out_be32(&pamu_regs->olah, upper_32_bits(omt_phys)); + out_be32(&pamu_regs->olal, lower_32_bits(omt_phys)); + + /* + * set PAMU enable bit, + * allow ppaact & omt to be cached + * & enable PAMU access violation interrupts. + */ + + out_be32((u32 *)(pamu_reg_base + PAMU_PICS), + PAMU_ACCESS_VIOLATION_ENABLE); + out_be32(pc, PAMU_PC_PE | PAMU_PC_OCE | PAMU_PC_SPCC | PAMU_PC_PPCC); + return 0; +} + +/* Enable all device LIODNS */ +static void __init setup_liodns(void) +{ + int i, len; + struct paace *ppaace; + struct device_node *node = NULL; + const u32 *prop; + + for_each_node_with_property(node, "fsl,liodn") { + prop = of_get_property(node, "fsl,liodn", &len); + for (i = 0; i < len / sizeof(u32); i++) { + int liodn; + + liodn = be32_to_cpup(&prop[i]); + if (liodn >= PAACE_NUMBER_ENTRIES) { + pr_debug("Invalid LIODN value %d\n", liodn); + continue; + } + ppaace = pamu_get_ppaace(liodn); + pamu_init_ppaace(ppaace); + /* window size is 2^(WSE+1) bytes */ + set_bf(ppaace->addr_bitfields, PPAACE_AF_WSE, 35); + ppaace->wbah = 0; + set_bf(ppaace->addr_bitfields, PPAACE_AF_WBAL, 0); + set_bf(ppaace->impl_attr, PAACE_IA_ATM, + PAACE_ATM_NO_XLATE); + set_bf(ppaace->addr_bitfields, PAACE_AF_AP, + PAACE_AP_PERMS_ALL); + if (of_device_is_compatible(node, "fsl,qman-portal")) + setup_qbman_paace(ppaace, QMAN_PORTAL_PAACE); + if (of_device_is_compatible(node, "fsl,qman")) + setup_qbman_paace(ppaace, QMAN_PAACE); + if (of_device_is_compatible(node, "fsl,bman")) + setup_qbman_paace(ppaace, BMAN_PAACE); + mb(); + pamu_enable_liodn(liodn); + } + } +} + +static irqreturn_t pamu_av_isr(int irq, void *arg) +{ + struct pamu_isr_data *data = arg; + phys_addr_t phys; + unsigned int i, j, ret; + + pr_emerg("access violation interrupt\n"); + + for (i = 0; i < data->count; i++) { + void __iomem *p = data->pamu_reg_base + i * PAMU_OFFSET; + u32 pics = in_be32(p + PAMU_PICS); + + if (pics & PAMU_ACCESS_VIOLATION_STAT) { + u32 avs1 = in_be32(p + PAMU_AVS1); + struct paace *paace; + + pr_emerg("POES1=%08x\n", in_be32(p + PAMU_POES1)); + pr_emerg("POES2=%08x\n", in_be32(p + PAMU_POES2)); + pr_emerg("AVS1=%08x\n", avs1); + pr_emerg("AVS2=%08x\n", in_be32(p + PAMU_AVS2)); + pr_emerg("AVA=%016llx\n", + make64(in_be32(p + PAMU_AVAH), + in_be32(p + PAMU_AVAL))); + pr_emerg("UDAD=%08x\n", in_be32(p + PAMU_UDAD)); + pr_emerg("POEA=%016llx\n", + make64(in_be32(p + PAMU_POEAH), + in_be32(p + PAMU_POEAL))); + + phys = make64(in_be32(p + PAMU_POEAH), + in_be32(p + PAMU_POEAL)); + + /* Assume that POEA points to a PAACE */ + if (phys) { + u32 *paace = phys_to_virt(phys); + + /* Only the first four words are relevant */ + for (j = 0; j < 4; j++) + pr_emerg("PAACE[%u]=%08x\n", + j, in_be32(paace + j)); + } + + /* clear access violation condition */ + out_be32(p + PAMU_AVS1, avs1 & PAMU_AV_MASK); + paace = pamu_get_ppaace(avs1 >> PAMU_AVS1_LIODN_SHIFT); + BUG_ON(!paace); + /* check if we got a violation for a disabled LIODN */ + if (!get_bf(paace->addr_bitfields, PAACE_AF_V)) { + /* + * As per hardware erratum A-003638, access + * violation can be reported for a disabled + * LIODN. If we hit that condition, disable + * access violation reporting. + */ + pics &= ~PAMU_ACCESS_VIOLATION_ENABLE; + } else { + /* Disable the LIODN */ + ret = pamu_disable_liodn(avs1 >> PAMU_AVS1_LIODN_SHIFT); + BUG_ON(ret); + pr_emerg("Disabling liodn %x\n", + avs1 >> PAMU_AVS1_LIODN_SHIFT); + } + out_be32((p + PAMU_PICS), pics); + } + } + + return IRQ_HANDLED; +} + +#define LAWAR_EN 0x80000000 +#define LAWAR_TARGET_MASK 0x0FF00000 +#define LAWAR_TARGET_SHIFT 20 +#define LAWAR_SIZE_MASK 0x0000003F +#define LAWAR_CSDID_MASK 0x000FF000 +#define LAWAR_CSDID_SHIFT 12 + +#define LAW_SIZE_4K 0xb + +struct ccsr_law { + u32 lawbarh; /* LAWn base address high */ + u32 lawbarl; /* LAWn base address low */ + u32 lawar; /* LAWn attributes */ + u32 reserved; +}; + +/* + * Create a coherence subdomain for a given memory block. + */ +static int __init create_csd(phys_addr_t phys, size_t size, u32 csd_port_id) +{ + struct device_node *np; + const __be32 *iprop; + void __iomem *lac = NULL; /* Local Access Control registers */ + struct ccsr_law __iomem *law; + void __iomem *ccm = NULL; + u32 __iomem *csdids; + unsigned int i, num_laws, num_csds; + u32 law_target = 0; + u32 csd_id = 0; + int ret = 0; + + np = of_find_compatible_node(NULL, NULL, "fsl,corenet-law"); + if (!np) + return -ENODEV; + + iprop = of_get_property(np, "fsl,num-laws", NULL); + if (!iprop) { + ret = -ENODEV; + goto error; + } + + num_laws = be32_to_cpup(iprop); + if (!num_laws) { + ret = -ENODEV; + goto error; + } + + lac = of_iomap(np, 0); + if (!lac) { + ret = -ENODEV; + goto error; + } + + /* LAW registers are at offset 0xC00 */ + law = lac + 0xC00; + + of_node_put(np); + + np = of_find_compatible_node(NULL, NULL, "fsl,corenet-cf"); + if (!np) { + ret = -ENODEV; + goto error; + } + + iprop = of_get_property(np, "fsl,ccf-num-csdids", NULL); + if (!iprop) { + ret = -ENODEV; + goto error; + } + + num_csds = be32_to_cpup(iprop); + if (!num_csds) { + ret = -ENODEV; + goto error; + } + + ccm = of_iomap(np, 0); + if (!ccm) { + ret = -ENOMEM; + goto error; + } + + /* The undocumented CSDID registers are at offset 0x600 */ + csdids = ccm + 0x600; + + of_node_put(np); + np = NULL; + + /* Find an unused coherence subdomain ID */ + for (csd_id = 0; csd_id < num_csds; csd_id++) { + if (!csdids[csd_id]) + break; + } + + /* Store the Port ID in the (undocumented) proper CIDMRxx register */ + csdids[csd_id] = csd_port_id; + + /* Find the DDR LAW that maps to our buffer. */ + for (i = 0; i < num_laws; i++) { + if (law[i].lawar & LAWAR_EN) { + phys_addr_t law_start, law_end; + + law_start = make64(law[i].lawbarh, law[i].lawbarl); + law_end = law_start + + (2ULL << (law[i].lawar & LAWAR_SIZE_MASK)); + + if (law_start <= phys && phys < law_end) { + law_target = law[i].lawar & LAWAR_TARGET_MASK; + break; + } + } + } + + if (i == 0 || i == num_laws) { + /* This should never happen */ + ret = -ENOENT; + goto error; + } + + /* Find a free LAW entry */ + while (law[--i].lawar & LAWAR_EN) { + if (i == 0) { + /* No higher priority LAW slots available */ + ret = -ENOENT; + goto error; + } + } + + law[i].lawbarh = upper_32_bits(phys); + law[i].lawbarl = lower_32_bits(phys); + wmb(); + law[i].lawar = LAWAR_EN | law_target | (csd_id << LAWAR_CSDID_SHIFT) | + (LAW_SIZE_4K + get_order(size)); + wmb(); + +error: + if (ccm) + iounmap(ccm); + + if (lac) + iounmap(lac); + + if (np) + of_node_put(np); + + return ret; +} + +/* + * Table of SVRs and the corresponding PORT_ID values. Port ID corresponds to a + * bit map of snoopers for a given range of memory mapped by a LAW. + * + * All future CoreNet-enabled SOCs will have this erratum(A-004510) fixed, so this + * table should never need to be updated. SVRs are guaranteed to be unique, so + * there is no worry that a future SOC will inadvertently have one of these + * values. + */ +static const struct { + u32 svr; + u32 port_id; +} port_id_map[] __initconst = { + {(SVR_P2040 << 8) | 0x10, 0xFF000000}, /* P2040 1.0 */ + {(SVR_P2040 << 8) | 0x11, 0xFF000000}, /* P2040 1.1 */ + {(SVR_P2041 << 8) | 0x10, 0xFF000000}, /* P2041 1.0 */ + {(SVR_P2041 << 8) | 0x11, 0xFF000000}, /* P2041 1.1 */ + {(SVR_P3041 << 8) | 0x10, 0xFF000000}, /* P3041 1.0 */ + {(SVR_P3041 << 8) | 0x11, 0xFF000000}, /* P3041 1.1 */ + {(SVR_P4040 << 8) | 0x20, 0xFFF80000}, /* P4040 2.0 */ + {(SVR_P4080 << 8) | 0x20, 0xFFF80000}, /* P4080 2.0 */ + {(SVR_P5010 << 8) | 0x10, 0xFC000000}, /* P5010 1.0 */ + {(SVR_P5010 << 8) | 0x20, 0xFC000000}, /* P5010 2.0 */ + {(SVR_P5020 << 8) | 0x10, 0xFC000000}, /* P5020 1.0 */ + {(SVR_P5021 << 8) | 0x10, 0xFF800000}, /* P5021 1.0 */ + {(SVR_P5040 << 8) | 0x10, 0xFF800000}, /* P5040 1.0 */ +}; + +#define SVR_SECURITY 0x80000 /* The Security (E) bit */ + +static int __init fsl_pamu_probe(struct platform_device *pdev) +{ + struct device *dev = &pdev->dev; + void __iomem *pamu_regs = NULL; + struct ccsr_guts __iomem *guts_regs = NULL; + u32 pamubypenr, pamu_counter; + unsigned long pamu_reg_off; + unsigned long pamu_reg_base; + struct pamu_isr_data *data = NULL; + struct device_node *guts_node; + u64 size; + struct page *p; + int ret = 0; + int irq; + phys_addr_t ppaact_phys; + phys_addr_t spaact_phys; + phys_addr_t omt_phys; + size_t mem_size = 0; + unsigned int order = 0; + u32 csd_port_id = 0; + unsigned i; + /* + * enumerate all PAMUs and allocate and setup PAMU tables + * for each of them, + * NOTE : All PAMUs share the same LIODN tables. + */ + + pamu_regs = of_iomap(dev->of_node, 0); + if (!pamu_regs) { + dev_err(dev, "ioremap of PAMU node failed\n"); + return -ENOMEM; + } + of_get_address(dev->of_node, 0, &size, NULL); + + irq = irq_of_parse_and_map(dev->of_node, 0); + if (irq == NO_IRQ) { + dev_warn(dev, "no interrupts listed in PAMU node\n"); + goto error; + } + + data = kzalloc(sizeof(*data), GFP_KERNEL); + if (!data) { + ret = -ENOMEM; + goto error; + } + data->pamu_reg_base = pamu_regs; + data->count = size / PAMU_OFFSET; + + /* The ISR needs access to the regs, so we won't iounmap them */ + ret = request_irq(irq, pamu_av_isr, 0, "pamu", data); + if (ret < 0) { + dev_err(dev, "error %i installing ISR for irq %i\n", ret, irq); + goto error; + } + + guts_node = of_find_matching_node(NULL, guts_device_ids); + if (!guts_node) { + dev_err(dev, "could not find GUTS node %s\n", + dev->of_node->full_name); + ret = -ENODEV; + goto error; + } + + guts_regs = of_iomap(guts_node, 0); + of_node_put(guts_node); + if (!guts_regs) { + dev_err(dev, "ioremap of GUTS node failed\n"); + ret = -ENODEV; + goto error; + } + + /* read in the PAMU capability registers */ + get_pamu_cap_values((unsigned long)pamu_regs); + /* + * To simplify the allocation of a coherency domain, we allocate the + * PAACT and the OMT in the same memory buffer. Unfortunately, this + * wastes more memory compared to allocating the buffers separately. + */ + /* Determine how much memory we need */ + mem_size = (PAGE_SIZE << get_order(PAACT_SIZE)) + + (PAGE_SIZE << get_order(SPAACT_SIZE)) + + (PAGE_SIZE << get_order(OMT_SIZE)); + order = get_order(mem_size); + + p = alloc_pages(GFP_KERNEL | __GFP_ZERO, order); + if (!p) { + dev_err(dev, "unable to allocate PAACT/SPAACT/OMT block\n"); + ret = -ENOMEM; + goto error; + } + + ppaact = page_address(p); + ppaact_phys = page_to_phys(p); + + /* Make sure the memory is naturally aligned */ + if (ppaact_phys & ((PAGE_SIZE << order) - 1)) { + dev_err(dev, "PAACT/OMT block is unaligned\n"); + ret = -ENOMEM; + goto error; + } + + spaact = (void *)ppaact + (PAGE_SIZE << get_order(PAACT_SIZE)); + omt = (void *)spaact + (PAGE_SIZE << get_order(SPAACT_SIZE)); + + dev_dbg(dev, "ppaact virt=%p phys=%pa\n", ppaact, &ppaact_phys); + + /* Check to see if we need to implement the work-around on this SOC */ + + /* Determine the Port ID for our coherence subdomain */ + for (i = 0; i < ARRAY_SIZE(port_id_map); i++) { + if (port_id_map[i].svr == (mfspr(SPRN_SVR) & ~SVR_SECURITY)) { + csd_port_id = port_id_map[i].port_id; + dev_dbg(dev, "found matching SVR %08x\n", + port_id_map[i].svr); + break; + } + } + + if (csd_port_id) { + dev_dbg(dev, "creating coherency subdomain at address %pa, size %zu, port id 0x%08x", + &ppaact_phys, mem_size, csd_port_id); + + ret = create_csd(ppaact_phys, mem_size, csd_port_id); + if (ret) { + dev_err(dev, "could not create coherence subdomain\n"); + return ret; + } + } + + spaact_phys = virt_to_phys(spaact); + omt_phys = virt_to_phys(omt); + + spaace_pool = gen_pool_create(ilog2(sizeof(struct paace)), -1); + if (!spaace_pool) { + ret = -ENOMEM; + dev_err(dev, "Failed to allocate spaace gen pool\n"); + goto error; + } + + ret = gen_pool_add(spaace_pool, (unsigned long)spaact, SPAACT_SIZE, -1); + if (ret) + goto error_genpool; + + pamubypenr = in_be32(&guts_regs->pamubypenr); + + for (pamu_reg_off = 0, pamu_counter = 0x80000000; pamu_reg_off < size; + pamu_reg_off += PAMU_OFFSET, pamu_counter >>= 1) { + + pamu_reg_base = (unsigned long)pamu_regs + pamu_reg_off; + setup_one_pamu(pamu_reg_base, pamu_reg_off, ppaact_phys, + spaact_phys, omt_phys); + /* Disable PAMU bypass for this PAMU */ + pamubypenr &= ~pamu_counter; + } + + setup_omt(omt); + + /* Enable all relevant PAMU(s) */ + out_be32(&guts_regs->pamubypenr, pamubypenr); + + iounmap(guts_regs); + + /* Enable DMA for the LIODNs in the device tree */ + + setup_liodns(); + + return 0; + +error_genpool: + gen_pool_destroy(spaace_pool); + +error: + if (irq != NO_IRQ) + free_irq(irq, data); + + if (data) { + memset(data, 0, sizeof(struct pamu_isr_data)); + kfree(data); + } + + if (pamu_regs) + iounmap(pamu_regs); + + if (guts_regs) + iounmap(guts_regs); + + if (ppaact) + free_pages((unsigned long)ppaact, order); + + ppaact = NULL; + + return ret; +} + +static struct platform_driver fsl_of_pamu_driver __initdata = { + .driver = { + .name = "fsl-of-pamu", + }, + .probe = fsl_pamu_probe, +}; + +static __init int fsl_pamu_init(void) +{ + struct platform_device *pdev = NULL; + struct device_node *np; + int ret; + + /* + * The normal OF process calls the probe function at some + * indeterminate later time, after most drivers have loaded. This is + * too late for us, because PAMU clients (like the Qman driver) + * depend on PAMU being initialized early. + * + * So instead, we "manually" call our probe function by creating the + * platform devices ourselves. + */ + + /* + * We assume that there is only one PAMU node in the device tree. A + * single PAMU node represents all of the PAMU devices in the SOC + * already. Everything else already makes that assumption, and the + * binding for the PAMU nodes doesn't allow for any parent-child + * relationships anyway. In other words, support for more than one + * PAMU node would require significant changes to a lot of code. + */ + + np = of_find_compatible_node(NULL, NULL, "fsl,pamu"); + if (!np) { + pr_err("could not find a PAMU node\n"); + return -ENODEV; + } + + ret = platform_driver_register(&fsl_of_pamu_driver); + if (ret) { + pr_err("could not register driver (err=%i)\n", ret); + goto error_driver_register; + } + + pdev = platform_device_alloc("fsl-of-pamu", 0); + if (!pdev) { + pr_err("could not allocate device %s\n", + np->full_name); + ret = -ENOMEM; + goto error_device_alloc; + } + pdev->dev.of_node = of_node_get(np); + + ret = pamu_domain_init(); + if (ret) + goto error_device_add; + + ret = platform_device_add(pdev); + if (ret) { + pr_err("could not add device %s (err=%i)\n", + np->full_name, ret); + goto error_device_add; + } + + return 0; + +error_device_add: + of_node_put(pdev->dev.of_node); + pdev->dev.of_node = NULL; + + platform_device_put(pdev); + +error_device_alloc: + platform_driver_unregister(&fsl_of_pamu_driver); + +error_driver_register: + of_node_put(np); + + return ret; +} +arch_initcall(fsl_pamu_init); diff --git a/drivers/iommu/fsl_pamu.h b/drivers/iommu/fsl_pamu.h new file mode 100644 index 000000000..aab723f91 --- /dev/null +++ b/drivers/iommu/fsl_pamu.h @@ -0,0 +1,411 @@ +/* + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License, version 2, as + * published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. + * + * Copyright (C) 2013 Freescale Semiconductor, Inc. + * + */ + +#ifndef __FSL_PAMU_H +#define __FSL_PAMU_H + +#include <linux/iommu.h> + +#include <asm/fsl_pamu_stash.h> + +/* Bit Field macros + * v = bit field variable; m = mask, m##_SHIFT = shift, x = value to load + */ +#define set_bf(v, m, x) (v = ((v) & ~(m)) | (((x) << m##_SHIFT) & (m))) +#define get_bf(v, m) (((v) & (m)) >> m##_SHIFT) + +/* PAMU CCSR space */ +#define PAMU_PGC 0x00000000 /* Allows all peripheral accesses */ +#define PAMU_PE 0x40000000 /* enable PAMU */ + +/* PAMU_OFFSET to the next pamu space in ccsr */ +#define PAMU_OFFSET 0x1000 + +#define PAMU_MMAP_REGS_BASE 0 + +struct pamu_mmap_regs { + u32 ppbah; + u32 ppbal; + u32 pplah; + u32 pplal; + u32 spbah; + u32 spbal; + u32 splah; + u32 splal; + u32 obah; + u32 obal; + u32 olah; + u32 olal; +}; + +/* PAMU Error Registers */ +#define PAMU_POES1 0x0040 +#define PAMU_POES2 0x0044 +#define PAMU_POEAH 0x0048 +#define PAMU_POEAL 0x004C +#define PAMU_AVS1 0x0050 +#define PAMU_AVS1_AV 0x1 +#define PAMU_AVS1_OTV 0x6 +#define PAMU_AVS1_APV 0x78 +#define PAMU_AVS1_WAV 0x380 +#define PAMU_AVS1_LAV 0x1c00 +#define PAMU_AVS1_GCV 0x2000 +#define PAMU_AVS1_PDV 0x4000 +#define PAMU_AV_MASK (PAMU_AVS1_AV | PAMU_AVS1_OTV | PAMU_AVS1_APV | PAMU_AVS1_WAV \ + | PAMU_AVS1_LAV | PAMU_AVS1_GCV | PAMU_AVS1_PDV) +#define PAMU_AVS1_LIODN_SHIFT 16 +#define PAMU_LAV_LIODN_NOT_IN_PPAACT 0x400 + +#define PAMU_AVS2 0x0054 +#define PAMU_AVAH 0x0058 +#define PAMU_AVAL 0x005C +#define PAMU_EECTL 0x0060 +#define PAMU_EEDIS 0x0064 +#define PAMU_EEINTEN 0x0068 +#define PAMU_EEDET 0x006C +#define PAMU_EEATTR 0x0070 +#define PAMU_EEAHI 0x0074 +#define PAMU_EEALO 0x0078 +#define PAMU_EEDHI 0X007C +#define PAMU_EEDLO 0x0080 +#define PAMU_EECC 0x0084 +#define PAMU_UDAD 0x0090 + +/* PAMU Revision Registers */ +#define PAMU_PR1 0x0BF8 +#define PAMU_PR2 0x0BFC + +/* PAMU version mask */ +#define PAMU_PR1_MASK 0xffff + +/* PAMU Capabilities Registers */ +#define PAMU_PC1 0x0C00 +#define PAMU_PC2 0x0C04 +#define PAMU_PC3 0x0C08 +#define PAMU_PC4 0x0C0C + +/* PAMU Control Register */ +#define PAMU_PC 0x0C10 + +/* PAMU control defs */ +#define PAMU_CONTROL 0x0C10 +#define PAMU_PC_PGC 0x80000000 /* PAMU gate closed bit */ +#define PAMU_PC_PE 0x40000000 /* PAMU enable bit */ +#define PAMU_PC_SPCC 0x00000010 /* sPAACE cache enable */ +#define PAMU_PC_PPCC 0x00000001 /* pPAACE cache enable */ +#define PAMU_PC_OCE 0x00001000 /* OMT cache enable */ + +#define PAMU_PFA1 0x0C14 +#define PAMU_PFA2 0x0C18 + +#define PAMU_PC2_MLIODN(X) ((X) >> 16) +#define PAMU_PC3_MWCE(X) (((X) >> 21) & 0xf) + +/* PAMU Interrupt control and Status Register */ +#define PAMU_PICS 0x0C1C +#define PAMU_ACCESS_VIOLATION_STAT 0x8 +#define PAMU_ACCESS_VIOLATION_ENABLE 0x4 + +/* PAMU Debug Registers */ +#define PAMU_PD1 0x0F00 +#define PAMU_PD2 0x0F04 +#define PAMU_PD3 0x0F08 +#define PAMU_PD4 0x0F0C + +#define PAACE_AP_PERMS_DENIED 0x0 +#define PAACE_AP_PERMS_QUERY 0x1 +#define PAACE_AP_PERMS_UPDATE 0x2 +#define PAACE_AP_PERMS_ALL 0x3 + +#define PAACE_DD_TO_HOST 0x0 +#define PAACE_DD_TO_IO 0x1 +#define PAACE_PT_PRIMARY 0x0 +#define PAACE_PT_SECONDARY 0x1 +#define PAACE_V_INVALID 0x0 +#define PAACE_V_VALID 0x1 +#define PAACE_MW_SUBWINDOWS 0x1 + +#define PAACE_WSE_4K 0xB +#define PAACE_WSE_8K 0xC +#define PAACE_WSE_16K 0xD +#define PAACE_WSE_32K 0xE +#define PAACE_WSE_64K 0xF +#define PAACE_WSE_128K 0x10 +#define PAACE_WSE_256K 0x11 +#define PAACE_WSE_512K 0x12 +#define PAACE_WSE_1M 0x13 +#define PAACE_WSE_2M 0x14 +#define PAACE_WSE_4M 0x15 +#define PAACE_WSE_8M 0x16 +#define PAACE_WSE_16M 0x17 +#define PAACE_WSE_32M 0x18 +#define PAACE_WSE_64M 0x19 +#define PAACE_WSE_128M 0x1A +#define PAACE_WSE_256M 0x1B +#define PAACE_WSE_512M 0x1C +#define PAACE_WSE_1G 0x1D +#define PAACE_WSE_2G 0x1E +#define PAACE_WSE_4G 0x1F + +#define PAACE_DID_PCI_EXPRESS_1 0x00 +#define PAACE_DID_PCI_EXPRESS_2 0x01 +#define PAACE_DID_PCI_EXPRESS_3 0x02 +#define PAACE_DID_PCI_EXPRESS_4 0x03 +#define PAACE_DID_LOCAL_BUS 0x04 +#define PAACE_DID_SRIO 0x0C +#define PAACE_DID_MEM_1 0x10 +#define PAACE_DID_MEM_2 0x11 +#define PAACE_DID_MEM_3 0x12 +#define PAACE_DID_MEM_4 0x13 +#define PAACE_DID_MEM_1_2 0x14 +#define PAACE_DID_MEM_3_4 0x15 +#define PAACE_DID_MEM_1_4 0x16 +#define PAACE_DID_BM_SW_PORTAL 0x18 +#define PAACE_DID_PAMU 0x1C +#define PAACE_DID_CAAM 0x21 +#define PAACE_DID_QM_SW_PORTAL 0x3C +#define PAACE_DID_CORE0_INST 0x80 +#define PAACE_DID_CORE0_DATA 0x81 +#define PAACE_DID_CORE1_INST 0x82 +#define PAACE_DID_CORE1_DATA 0x83 +#define PAACE_DID_CORE2_INST 0x84 +#define PAACE_DID_CORE2_DATA 0x85 +#define PAACE_DID_CORE3_INST 0x86 +#define PAACE_DID_CORE3_DATA 0x87 +#define PAACE_DID_CORE4_INST 0x88 +#define PAACE_DID_CORE4_DATA 0x89 +#define PAACE_DID_CORE5_INST 0x8A +#define PAACE_DID_CORE5_DATA 0x8B +#define PAACE_DID_CORE6_INST 0x8C +#define PAACE_DID_CORE6_DATA 0x8D +#define PAACE_DID_CORE7_INST 0x8E +#define PAACE_DID_CORE7_DATA 0x8F +#define PAACE_DID_BROADCAST 0xFF + +#define PAACE_ATM_NO_XLATE 0x00 +#define PAACE_ATM_WINDOW_XLATE 0x01 +#define PAACE_ATM_PAGE_XLATE 0x02 +#define PAACE_ATM_WIN_PG_XLATE (PAACE_ATM_WINDOW_XLATE | PAACE_ATM_PAGE_XLATE) +#define PAACE_OTM_NO_XLATE 0x00 +#define PAACE_OTM_IMMEDIATE 0x01 +#define PAACE_OTM_INDEXED 0x02 +#define PAACE_OTM_RESERVED 0x03 + +#define PAACE_M_COHERENCE_REQ 0x01 + +#define PAACE_PID_0 0x0 +#define PAACE_PID_1 0x1 +#define PAACE_PID_2 0x2 +#define PAACE_PID_3 0x3 +#define PAACE_PID_4 0x4 +#define PAACE_PID_5 0x5 +#define PAACE_PID_6 0x6 +#define PAACE_PID_7 0x7 + +#define PAACE_TCEF_FORMAT0_8B 0x00 +#define PAACE_TCEF_FORMAT1_RSVD 0x01 +/* + * Hard coded value for the PAACT size to accommodate + * maximum LIODN value generated by u-boot. + */ +#define PAACE_NUMBER_ENTRIES 0x500 +/* Hard coded value for the SPAACT size */ +#define SPAACE_NUMBER_ENTRIES 0x800 + +#define OME_NUMBER_ENTRIES 16 + +/* PAACE Bit Field Defines */ +#define PPAACE_AF_WBAL 0xfffff000 +#define PPAACE_AF_WBAL_SHIFT 12 +#define PPAACE_AF_WSE 0x00000fc0 +#define PPAACE_AF_WSE_SHIFT 6 +#define PPAACE_AF_MW 0x00000020 +#define PPAACE_AF_MW_SHIFT 5 + +#define SPAACE_AF_LIODN 0xffff0000 +#define SPAACE_AF_LIODN_SHIFT 16 + +#define PAACE_AF_AP 0x00000018 +#define PAACE_AF_AP_SHIFT 3 +#define PAACE_AF_DD 0x00000004 +#define PAACE_AF_DD_SHIFT 2 +#define PAACE_AF_PT 0x00000002 +#define PAACE_AF_PT_SHIFT 1 +#define PAACE_AF_V 0x00000001 +#define PAACE_AF_V_SHIFT 0 + +#define PAACE_DA_HOST_CR 0x80 +#define PAACE_DA_HOST_CR_SHIFT 7 + +#define PAACE_IA_CID 0x00FF0000 +#define PAACE_IA_CID_SHIFT 16 +#define PAACE_IA_WCE 0x000000F0 +#define PAACE_IA_WCE_SHIFT 4 +#define PAACE_IA_ATM 0x0000000C +#define PAACE_IA_ATM_SHIFT 2 +#define PAACE_IA_OTM 0x00000003 +#define PAACE_IA_OTM_SHIFT 0 + +#define PAACE_WIN_TWBAL 0xfffff000 +#define PAACE_WIN_TWBAL_SHIFT 12 +#define PAACE_WIN_SWSE 0x00000fc0 +#define PAACE_WIN_SWSE_SHIFT 6 + +/* PAMU Data Structures */ +/* primary / secondary paact structure */ +struct paace { + /* PAACE Offset 0x00 */ + u32 wbah; /* only valid for Primary PAACE */ + u32 addr_bitfields; /* See P/S PAACE_AF_* */ + + /* PAACE Offset 0x08 */ + /* Interpretation of first 32 bits dependent on DD above */ + union { + struct { + /* Destination ID, see PAACE_DID_* defines */ + u8 did; + /* Partition ID */ + u8 pid; + /* Snoop ID */ + u8 snpid; + /* coherency_required : 1 reserved : 7 */ + u8 coherency_required; /* See PAACE_DA_* */ + } to_host; + struct { + /* Destination ID, see PAACE_DID_* defines */ + u8 did; + u8 reserved1; + u16 reserved2; + } to_io; + } domain_attr; + + /* Implementation attributes + window count + address & operation translation modes */ + u32 impl_attr; /* See PAACE_IA_* */ + + /* PAACE Offset 0x10 */ + /* Translated window base address */ + u32 twbah; + u32 win_bitfields; /* See PAACE_WIN_* */ + + /* PAACE Offset 0x18 */ + /* first secondary paace entry */ + u32 fspi; /* only valid for Primary PAACE */ + union { + struct { + u8 ioea; + u8 moea; + u8 ioeb; + u8 moeb; + } immed_ot; + struct { + u16 reserved; + u16 omi; + } index_ot; + } op_encode; + + /* PAACE Offsets 0x20-0x38 */ + u32 reserved[8]; /* not currently implemented */ +}; + +/* OME : Operation mapping entry + * MOE : Mapped Operation Encodings + * The operation mapping table is table containing operation mapping entries (OME). + * The index of a particular OME is programmed in the PAACE entry for translation + * in bound I/O operations corresponding to an LIODN. The OMT is used for translation + * specifically in case of the indexed translation mode. Each OME contains a 128 + * byte mapped operation encoding (MOE), where each byte represents an MOE. + */ +#define NUM_MOE 128 +struct ome { + u8 moe[NUM_MOE]; +} __packed; + +#define PAACT_SIZE (sizeof(struct paace) * PAACE_NUMBER_ENTRIES) +#define SPAACT_SIZE (sizeof(struct paace) * SPAACE_NUMBER_ENTRIES) +#define OMT_SIZE (sizeof(struct ome) * OME_NUMBER_ENTRIES) + +#define PAMU_PAGE_SHIFT 12 +#define PAMU_PAGE_SIZE 4096ULL + +#define IOE_READ 0x00 +#define IOE_READ_IDX 0x00 +#define IOE_WRITE 0x81 +#define IOE_WRITE_IDX 0x01 +#define IOE_EREAD0 0x82 /* Enhanced read type 0 */ +#define IOE_EREAD0_IDX 0x02 /* Enhanced read type 0 */ +#define IOE_EWRITE0 0x83 /* Enhanced write type 0 */ +#define IOE_EWRITE0_IDX 0x03 /* Enhanced write type 0 */ +#define IOE_DIRECT0 0x84 /* Directive type 0 */ +#define IOE_DIRECT0_IDX 0x04 /* Directive type 0 */ +#define IOE_EREAD1 0x85 /* Enhanced read type 1 */ +#define IOE_EREAD1_IDX 0x05 /* Enhanced read type 1 */ +#define IOE_EWRITE1 0x86 /* Enhanced write type 1 */ +#define IOE_EWRITE1_IDX 0x06 /* Enhanced write type 1 */ +#define IOE_DIRECT1 0x87 /* Directive type 1 */ +#define IOE_DIRECT1_IDX 0x07 /* Directive type 1 */ +#define IOE_RAC 0x8c /* Read with Atomic clear */ +#define IOE_RAC_IDX 0x0c /* Read with Atomic clear */ +#define IOE_RAS 0x8d /* Read with Atomic set */ +#define IOE_RAS_IDX 0x0d /* Read with Atomic set */ +#define IOE_RAD 0x8e /* Read with Atomic decrement */ +#define IOE_RAD_IDX 0x0e /* Read with Atomic decrement */ +#define IOE_RAI 0x8f /* Read with Atomic increment */ +#define IOE_RAI_IDX 0x0f /* Read with Atomic increment */ + +#define EOE_READ 0x00 +#define EOE_WRITE 0x01 +#define EOE_RAC 0x0c /* Read with Atomic clear */ +#define EOE_RAS 0x0d /* Read with Atomic set */ +#define EOE_RAD 0x0e /* Read with Atomic decrement */ +#define EOE_RAI 0x0f /* Read with Atomic increment */ +#define EOE_LDEC 0x10 /* Load external cache */ +#define EOE_LDECL 0x11 /* Load external cache with stash lock */ +#define EOE_LDECPE 0x12 /* Load external cache with preferred exclusive */ +#define EOE_LDECPEL 0x13 /* Load external cache with preferred exclusive and lock */ +#define EOE_LDECFE 0x14 /* Load external cache with forced exclusive */ +#define EOE_LDECFEL 0x15 /* Load external cache with forced exclusive and lock */ +#define EOE_RSA 0x16 /* Read with stash allocate */ +#define EOE_RSAU 0x17 /* Read with stash allocate and unlock */ +#define EOE_READI 0x18 /* Read with invalidate */ +#define EOE_RWNITC 0x19 /* Read with no intention to cache */ +#define EOE_WCI 0x1a /* Write cache inhibited */ +#define EOE_WWSA 0x1b /* Write with stash allocate */ +#define EOE_WWSAL 0x1c /* Write with stash allocate and lock */ +#define EOE_WWSAO 0x1d /* Write with stash allocate only */ +#define EOE_WWSAOL 0x1e /* Write with stash allocate only and lock */ +#define EOE_VALID 0x80 + +/* Function prototypes */ +int pamu_domain_init(void); +int pamu_enable_liodn(int liodn); +int pamu_disable_liodn(int liodn); +void pamu_free_subwins(int liodn); +int pamu_config_ppaace(int liodn, phys_addr_t win_addr, phys_addr_t win_size, + u32 omi, unsigned long rpn, u32 snoopid, uint32_t stashid, + u32 subwin_cnt, int prot); +int pamu_config_spaace(int liodn, u32 subwin_cnt, u32 subwin_addr, + phys_addr_t subwin_size, u32 omi, unsigned long rpn, + uint32_t snoopid, u32 stashid, int enable, int prot); + +u32 get_stash_id(u32 stash_dest_hint, u32 vcpu); +void get_ome_index(u32 *omi_index, struct device *dev); +int pamu_update_paace_stash(int liodn, u32 subwin, u32 value); +int pamu_disable_spaace(int liodn, u32 subwin); +u32 pamu_get_max_subwin_cnt(void); + +#endif /* __FSL_PAMU_H */ diff --git a/drivers/iommu/fsl_pamu_domain.c b/drivers/iommu/fsl_pamu_domain.c new file mode 100644 index 000000000..1d452930c --- /dev/null +++ b/drivers/iommu/fsl_pamu_domain.c @@ -0,0 +1,1089 @@ +/* + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License, version 2, as + * published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. + * + * Copyright (C) 2013 Freescale Semiconductor, Inc. + * Author: Varun Sethi <varun.sethi@freescale.com> + * + */ + +#define pr_fmt(fmt) "fsl-pamu-domain: %s: " fmt, __func__ + +#include "fsl_pamu_domain.h" + +#include <sysdev/fsl_pci.h> + +/* + * Global spinlock that needs to be held while + * configuring PAMU. + */ +static DEFINE_SPINLOCK(iommu_lock); + +static struct kmem_cache *fsl_pamu_domain_cache; +static struct kmem_cache *iommu_devinfo_cache; +static DEFINE_SPINLOCK(device_domain_lock); + +static struct fsl_dma_domain *to_fsl_dma_domain(struct iommu_domain *dom) +{ + return container_of(dom, struct fsl_dma_domain, iommu_domain); +} + +static int __init iommu_init_mempool(void) +{ + fsl_pamu_domain_cache = kmem_cache_create("fsl_pamu_domain", + sizeof(struct fsl_dma_domain), + 0, + SLAB_HWCACHE_ALIGN, + NULL); + if (!fsl_pamu_domain_cache) { + pr_debug("Couldn't create fsl iommu_domain cache\n"); + return -ENOMEM; + } + + iommu_devinfo_cache = kmem_cache_create("iommu_devinfo", + sizeof(struct device_domain_info), + 0, + SLAB_HWCACHE_ALIGN, + NULL); + if (!iommu_devinfo_cache) { + pr_debug("Couldn't create devinfo cache\n"); + kmem_cache_destroy(fsl_pamu_domain_cache); + return -ENOMEM; + } + + return 0; +} + +static phys_addr_t get_phys_addr(struct fsl_dma_domain *dma_domain, dma_addr_t iova) +{ + u32 win_cnt = dma_domain->win_cnt; + struct dma_window *win_ptr = &dma_domain->win_arr[0]; + struct iommu_domain_geometry *geom; + + geom = &dma_domain->iommu_domain.geometry; + + if (!win_cnt || !dma_domain->geom_size) { + pr_debug("Number of windows/geometry not configured for the domain\n"); + return 0; + } + + if (win_cnt > 1) { + u64 subwin_size; + dma_addr_t subwin_iova; + u32 wnd; + + subwin_size = dma_domain->geom_size >> ilog2(win_cnt); + subwin_iova = iova & ~(subwin_size - 1); + wnd = (subwin_iova - geom->aperture_start) >> ilog2(subwin_size); + win_ptr = &dma_domain->win_arr[wnd]; + } + + if (win_ptr->valid) + return win_ptr->paddr + (iova & (win_ptr->size - 1)); + + return 0; +} + +static int map_subwins(int liodn, struct fsl_dma_domain *dma_domain) +{ + struct dma_window *sub_win_ptr = &dma_domain->win_arr[0]; + int i, ret; + unsigned long rpn, flags; + + for (i = 0; i < dma_domain->win_cnt; i++) { + if (sub_win_ptr[i].valid) { + rpn = sub_win_ptr[i].paddr >> PAMU_PAGE_SHIFT; + spin_lock_irqsave(&iommu_lock, flags); + ret = pamu_config_spaace(liodn, dma_domain->win_cnt, i, + sub_win_ptr[i].size, + ~(u32)0, + rpn, + dma_domain->snoop_id, + dma_domain->stash_id, + (i > 0) ? 1 : 0, + sub_win_ptr[i].prot); + spin_unlock_irqrestore(&iommu_lock, flags); + if (ret) { + pr_debug("SPAACE configuration failed for liodn %d\n", + liodn); + return ret; + } + } + } + + return ret; +} + +static int map_win(int liodn, struct fsl_dma_domain *dma_domain) +{ + int ret; + struct dma_window *wnd = &dma_domain->win_arr[0]; + phys_addr_t wnd_addr = dma_domain->iommu_domain.geometry.aperture_start; + unsigned long flags; + + spin_lock_irqsave(&iommu_lock, flags); + ret = pamu_config_ppaace(liodn, wnd_addr, + wnd->size, + ~(u32)0, + wnd->paddr >> PAMU_PAGE_SHIFT, + dma_domain->snoop_id, dma_domain->stash_id, + 0, wnd->prot); + spin_unlock_irqrestore(&iommu_lock, flags); + if (ret) + pr_debug("PAACE configuration failed for liodn %d\n", liodn); + + return ret; +} + +/* Map the DMA window corresponding to the LIODN */ +static int map_liodn(int liodn, struct fsl_dma_domain *dma_domain) +{ + if (dma_domain->win_cnt > 1) + return map_subwins(liodn, dma_domain); + else + return map_win(liodn, dma_domain); +} + +/* Update window/subwindow mapping for the LIODN */ +static int update_liodn(int liodn, struct fsl_dma_domain *dma_domain, u32 wnd_nr) +{ + int ret; + struct dma_window *wnd = &dma_domain->win_arr[wnd_nr]; + unsigned long flags; + + spin_lock_irqsave(&iommu_lock, flags); + if (dma_domain->win_cnt > 1) { + ret = pamu_config_spaace(liodn, dma_domain->win_cnt, wnd_nr, + wnd->size, + ~(u32)0, + wnd->paddr >> PAMU_PAGE_SHIFT, + dma_domain->snoop_id, + dma_domain->stash_id, + (wnd_nr > 0) ? 1 : 0, + wnd->prot); + if (ret) + pr_debug("Subwindow reconfiguration failed for liodn %d\n", + liodn); + } else { + phys_addr_t wnd_addr; + + wnd_addr = dma_domain->iommu_domain.geometry.aperture_start; + + ret = pamu_config_ppaace(liodn, wnd_addr, + wnd->size, + ~(u32)0, + wnd->paddr >> PAMU_PAGE_SHIFT, + dma_domain->snoop_id, dma_domain->stash_id, + 0, wnd->prot); + if (ret) + pr_debug("Window reconfiguration failed for liodn %d\n", + liodn); + } + + spin_unlock_irqrestore(&iommu_lock, flags); + + return ret; +} + +static int update_liodn_stash(int liodn, struct fsl_dma_domain *dma_domain, + u32 val) +{ + int ret = 0, i; + unsigned long flags; + + spin_lock_irqsave(&iommu_lock, flags); + if (!dma_domain->win_arr) { + pr_debug("Windows not configured, stash destination update failed for liodn %d\n", + liodn); + spin_unlock_irqrestore(&iommu_lock, flags); + return -EINVAL; + } + + for (i = 0; i < dma_domain->win_cnt; i++) { + ret = pamu_update_paace_stash(liodn, i, val); + if (ret) { + pr_debug("Failed to update SPAACE %d field for liodn %d\n ", + i, liodn); + spin_unlock_irqrestore(&iommu_lock, flags); + return ret; + } + } + + spin_unlock_irqrestore(&iommu_lock, flags); + + return ret; +} + +/* Set the geometry parameters for a LIODN */ +static int pamu_set_liodn(int liodn, struct device *dev, + struct fsl_dma_domain *dma_domain, + struct iommu_domain_geometry *geom_attr, + u32 win_cnt) +{ + phys_addr_t window_addr, window_size; + phys_addr_t subwin_size; + int ret = 0, i; + u32 omi_index = ~(u32)0; + unsigned long flags; + + /* + * Configure the omi_index at the geometry setup time. + * This is a static value which depends on the type of + * device and would not change thereafter. + */ + get_ome_index(&omi_index, dev); + + window_addr = geom_attr->aperture_start; + window_size = dma_domain->geom_size; + + spin_lock_irqsave(&iommu_lock, flags); + ret = pamu_disable_liodn(liodn); + if (!ret) + ret = pamu_config_ppaace(liodn, window_addr, window_size, omi_index, + 0, dma_domain->snoop_id, + dma_domain->stash_id, win_cnt, 0); + spin_unlock_irqrestore(&iommu_lock, flags); + if (ret) { + pr_debug("PAACE configuration failed for liodn %d, win_cnt =%d\n", + liodn, win_cnt); + return ret; + } + + if (win_cnt > 1) { + subwin_size = window_size >> ilog2(win_cnt); + for (i = 0; i < win_cnt; i++) { + spin_lock_irqsave(&iommu_lock, flags); + ret = pamu_disable_spaace(liodn, i); + if (!ret) + ret = pamu_config_spaace(liodn, win_cnt, i, + subwin_size, omi_index, + 0, dma_domain->snoop_id, + dma_domain->stash_id, + 0, 0); + spin_unlock_irqrestore(&iommu_lock, flags); + if (ret) { + pr_debug("SPAACE configuration failed for liodn %d\n", + liodn); + return ret; + } + } + } + + return ret; +} + +static int check_size(u64 size, dma_addr_t iova) +{ + /* + * Size must be a power of two and at least be equal + * to PAMU page size. + */ + if ((size & (size - 1)) || size < PAMU_PAGE_SIZE) { + pr_debug("Size too small or not a power of two\n"); + return -EINVAL; + } + + /* iova must be page size aligned */ + if (iova & (size - 1)) { + pr_debug("Address is not aligned with window size\n"); + return -EINVAL; + } + + return 0; +} + +static struct fsl_dma_domain *iommu_alloc_dma_domain(void) +{ + struct fsl_dma_domain *domain; + + domain = kmem_cache_zalloc(fsl_pamu_domain_cache, GFP_KERNEL); + if (!domain) + return NULL; + + domain->stash_id = ~(u32)0; + domain->snoop_id = ~(u32)0; + domain->win_cnt = pamu_get_max_subwin_cnt(); + domain->geom_size = 0; + + INIT_LIST_HEAD(&domain->devices); + + spin_lock_init(&domain->domain_lock); + + return domain; +} + +static void remove_device_ref(struct device_domain_info *info, u32 win_cnt) +{ + unsigned long flags; + + list_del(&info->link); + spin_lock_irqsave(&iommu_lock, flags); + if (win_cnt > 1) + pamu_free_subwins(info->liodn); + pamu_disable_liodn(info->liodn); + spin_unlock_irqrestore(&iommu_lock, flags); + spin_lock_irqsave(&device_domain_lock, flags); + info->dev->archdata.iommu_domain = NULL; + kmem_cache_free(iommu_devinfo_cache, info); + spin_unlock_irqrestore(&device_domain_lock, flags); +} + +static void detach_device(struct device *dev, struct fsl_dma_domain *dma_domain) +{ + struct device_domain_info *info, *tmp; + unsigned long flags; + + spin_lock_irqsave(&dma_domain->domain_lock, flags); + /* Remove the device from the domain device list */ + list_for_each_entry_safe(info, tmp, &dma_domain->devices, link) { + if (!dev || (info->dev == dev)) + remove_device_ref(info, dma_domain->win_cnt); + } + spin_unlock_irqrestore(&dma_domain->domain_lock, flags); +} + +static void attach_device(struct fsl_dma_domain *dma_domain, int liodn, struct device *dev) +{ + struct device_domain_info *info, *old_domain_info; + unsigned long flags; + + spin_lock_irqsave(&device_domain_lock, flags); + /* + * Check here if the device is already attached to domain or not. + * If the device is already attached to a domain detach it. + */ + old_domain_info = dev->archdata.iommu_domain; + if (old_domain_info && old_domain_info->domain != dma_domain) { + spin_unlock_irqrestore(&device_domain_lock, flags); + detach_device(dev, old_domain_info->domain); + spin_lock_irqsave(&device_domain_lock, flags); + } + + info = kmem_cache_zalloc(iommu_devinfo_cache, GFP_ATOMIC); + + info->dev = dev; + info->liodn = liodn; + info->domain = dma_domain; + + list_add(&info->link, &dma_domain->devices); + /* + * In case of devices with multiple LIODNs just store + * the info for the first LIODN as all + * LIODNs share the same domain + */ + if (!dev->archdata.iommu_domain) + dev->archdata.iommu_domain = info; + spin_unlock_irqrestore(&device_domain_lock, flags); +} + +static phys_addr_t fsl_pamu_iova_to_phys(struct iommu_domain *domain, + dma_addr_t iova) +{ + struct fsl_dma_domain *dma_domain = to_fsl_dma_domain(domain); + + if (iova < domain->geometry.aperture_start || + iova > domain->geometry.aperture_end) + return 0; + + return get_phys_addr(dma_domain, iova); +} + +static bool fsl_pamu_capable(enum iommu_cap cap) +{ + return cap == IOMMU_CAP_CACHE_COHERENCY; +} + +static void fsl_pamu_domain_free(struct iommu_domain *domain) +{ + struct fsl_dma_domain *dma_domain = to_fsl_dma_domain(domain); + + /* remove all the devices from the device list */ + detach_device(NULL, dma_domain); + + dma_domain->enabled = 0; + dma_domain->mapped = 0; + + kmem_cache_free(fsl_pamu_domain_cache, dma_domain); +} + +static struct iommu_domain *fsl_pamu_domain_alloc(unsigned type) +{ + struct fsl_dma_domain *dma_domain; + + if (type != IOMMU_DOMAIN_UNMANAGED) + return NULL; + + dma_domain = iommu_alloc_dma_domain(); + if (!dma_domain) { + pr_debug("dma_domain allocation failed\n"); + return NULL; + } + /* defaul geometry 64 GB i.e. maximum system address */ + dma_domain->iommu_domain. geometry.aperture_start = 0; + dma_domain->iommu_domain.geometry.aperture_end = (1ULL << 36) - 1; + dma_domain->iommu_domain.geometry.force_aperture = true; + + return &dma_domain->iommu_domain; +} + +/* Configure geometry settings for all LIODNs associated with domain */ +static int pamu_set_domain_geometry(struct fsl_dma_domain *dma_domain, + struct iommu_domain_geometry *geom_attr, + u32 win_cnt) +{ + struct device_domain_info *info; + int ret = 0; + + list_for_each_entry(info, &dma_domain->devices, link) { + ret = pamu_set_liodn(info->liodn, info->dev, dma_domain, + geom_attr, win_cnt); + if (ret) + break; + } + + return ret; +} + +/* Update stash destination for all LIODNs associated with the domain */ +static int update_domain_stash(struct fsl_dma_domain *dma_domain, u32 val) +{ + struct device_domain_info *info; + int ret = 0; + + list_for_each_entry(info, &dma_domain->devices, link) { + ret = update_liodn_stash(info->liodn, dma_domain, val); + if (ret) + break; + } + + return ret; +} + +/* Update domain mappings for all LIODNs associated with the domain */ +static int update_domain_mapping(struct fsl_dma_domain *dma_domain, u32 wnd_nr) +{ + struct device_domain_info *info; + int ret = 0; + + list_for_each_entry(info, &dma_domain->devices, link) { + ret = update_liodn(info->liodn, dma_domain, wnd_nr); + if (ret) + break; + } + return ret; +} + +static int disable_domain_win(struct fsl_dma_domain *dma_domain, u32 wnd_nr) +{ + struct device_domain_info *info; + int ret = 0; + + list_for_each_entry(info, &dma_domain->devices, link) { + if (dma_domain->win_cnt == 1 && dma_domain->enabled) { + ret = pamu_disable_liodn(info->liodn); + if (!ret) + dma_domain->enabled = 0; + } else { + ret = pamu_disable_spaace(info->liodn, wnd_nr); + } + } + + return ret; +} + +static void fsl_pamu_window_disable(struct iommu_domain *domain, u32 wnd_nr) +{ + struct fsl_dma_domain *dma_domain = to_fsl_dma_domain(domain); + unsigned long flags; + int ret; + + spin_lock_irqsave(&dma_domain->domain_lock, flags); + if (!dma_domain->win_arr) { + pr_debug("Number of windows not configured\n"); + spin_unlock_irqrestore(&dma_domain->domain_lock, flags); + return; + } + + if (wnd_nr >= dma_domain->win_cnt) { + pr_debug("Invalid window index\n"); + spin_unlock_irqrestore(&dma_domain->domain_lock, flags); + return; + } + + if (dma_domain->win_arr[wnd_nr].valid) { + ret = disable_domain_win(dma_domain, wnd_nr); + if (!ret) { + dma_domain->win_arr[wnd_nr].valid = 0; + dma_domain->mapped--; + } + } + + spin_unlock_irqrestore(&dma_domain->domain_lock, flags); +} + +static int fsl_pamu_window_enable(struct iommu_domain *domain, u32 wnd_nr, + phys_addr_t paddr, u64 size, int prot) +{ + struct fsl_dma_domain *dma_domain = to_fsl_dma_domain(domain); + struct dma_window *wnd; + int pamu_prot = 0; + int ret; + unsigned long flags; + u64 win_size; + + if (prot & IOMMU_READ) + pamu_prot |= PAACE_AP_PERMS_QUERY; + if (prot & IOMMU_WRITE) + pamu_prot |= PAACE_AP_PERMS_UPDATE; + + spin_lock_irqsave(&dma_domain->domain_lock, flags); + if (!dma_domain->win_arr) { + pr_debug("Number of windows not configured\n"); + spin_unlock_irqrestore(&dma_domain->domain_lock, flags); + return -ENODEV; + } + + if (wnd_nr >= dma_domain->win_cnt) { + pr_debug("Invalid window index\n"); + spin_unlock_irqrestore(&dma_domain->domain_lock, flags); + return -EINVAL; + } + + win_size = dma_domain->geom_size >> ilog2(dma_domain->win_cnt); + if (size > win_size) { + pr_debug("Invalid window size\n"); + spin_unlock_irqrestore(&dma_domain->domain_lock, flags); + return -EINVAL; + } + + if (dma_domain->win_cnt == 1) { + if (dma_domain->enabled) { + pr_debug("Disable the window before updating the mapping\n"); + spin_unlock_irqrestore(&dma_domain->domain_lock, flags); + return -EBUSY; + } + + ret = check_size(size, domain->geometry.aperture_start); + if (ret) { + pr_debug("Aperture start not aligned to the size\n"); + spin_unlock_irqrestore(&dma_domain->domain_lock, flags); + return -EINVAL; + } + } + + wnd = &dma_domain->win_arr[wnd_nr]; + if (!wnd->valid) { + wnd->paddr = paddr; + wnd->size = size; + wnd->prot = pamu_prot; + + ret = update_domain_mapping(dma_domain, wnd_nr); + if (!ret) { + wnd->valid = 1; + dma_domain->mapped++; + } + } else { + pr_debug("Disable the window before updating the mapping\n"); + ret = -EBUSY; + } + + spin_unlock_irqrestore(&dma_domain->domain_lock, flags); + + return ret; +} + +/* + * Attach the LIODN to the DMA domain and configure the geometry + * and window mappings. + */ +static int handle_attach_device(struct fsl_dma_domain *dma_domain, + struct device *dev, const u32 *liodn, + int num) +{ + unsigned long flags; + struct iommu_domain *domain = &dma_domain->iommu_domain; + int ret = 0; + int i; + + spin_lock_irqsave(&dma_domain->domain_lock, flags); + for (i = 0; i < num; i++) { + /* Ensure that LIODN value is valid */ + if (liodn[i] >= PAACE_NUMBER_ENTRIES) { + pr_debug("Invalid liodn %d, attach device failed for %s\n", + liodn[i], dev->of_node->full_name); + ret = -EINVAL; + break; + } + + attach_device(dma_domain, liodn[i], dev); + /* + * Check if geometry has already been configured + * for the domain. If yes, set the geometry for + * the LIODN. + */ + if (dma_domain->win_arr) { + u32 win_cnt = dma_domain->win_cnt > 1 ? dma_domain->win_cnt : 0; + + ret = pamu_set_liodn(liodn[i], dev, dma_domain, + &domain->geometry, win_cnt); + if (ret) + break; + if (dma_domain->mapped) { + /* + * Create window/subwindow mapping for + * the LIODN. + */ + ret = map_liodn(liodn[i], dma_domain); + if (ret) + break; + } + } + } + spin_unlock_irqrestore(&dma_domain->domain_lock, flags); + + return ret; +} + +static int fsl_pamu_attach_device(struct iommu_domain *domain, + struct device *dev) +{ + struct fsl_dma_domain *dma_domain = to_fsl_dma_domain(domain); + const u32 *liodn; + u32 liodn_cnt; + int len, ret = 0; + struct pci_dev *pdev = NULL; + struct pci_controller *pci_ctl; + + /* + * Use LIODN of the PCI controller while attaching a + * PCI device. + */ + if (dev_is_pci(dev)) { + pdev = to_pci_dev(dev); + pci_ctl = pci_bus_to_host(pdev->bus); + /* + * make dev point to pci controller device + * so we can get the LIODN programmed by + * u-boot. + */ + dev = pci_ctl->parent; + } + + liodn = of_get_property(dev->of_node, "fsl,liodn", &len); + if (liodn) { + liodn_cnt = len / sizeof(u32); + ret = handle_attach_device(dma_domain, dev, liodn, liodn_cnt); + } else { + pr_debug("missing fsl,liodn property at %s\n", + dev->of_node->full_name); + ret = -EINVAL; + } + + return ret; +} + +static void fsl_pamu_detach_device(struct iommu_domain *domain, + struct device *dev) +{ + struct fsl_dma_domain *dma_domain = to_fsl_dma_domain(domain); + const u32 *prop; + int len; + struct pci_dev *pdev = NULL; + struct pci_controller *pci_ctl; + + /* + * Use LIODN of the PCI controller while detaching a + * PCI device. + */ + if (dev_is_pci(dev)) { + pdev = to_pci_dev(dev); + pci_ctl = pci_bus_to_host(pdev->bus); + /* + * make dev point to pci controller device + * so we can get the LIODN programmed by + * u-boot. + */ + dev = pci_ctl->parent; + } + + prop = of_get_property(dev->of_node, "fsl,liodn", &len); + if (prop) + detach_device(dev, dma_domain); + else + pr_debug("missing fsl,liodn property at %s\n", + dev->of_node->full_name); +} + +static int configure_domain_geometry(struct iommu_domain *domain, void *data) +{ + struct iommu_domain_geometry *geom_attr = data; + struct fsl_dma_domain *dma_domain = to_fsl_dma_domain(domain); + dma_addr_t geom_size; + unsigned long flags; + + geom_size = geom_attr->aperture_end - geom_attr->aperture_start + 1; + /* + * Sanity check the geometry size. Also, we do not support + * DMA outside of the geometry. + */ + if (check_size(geom_size, geom_attr->aperture_start) || + !geom_attr->force_aperture) { + pr_debug("Invalid PAMU geometry attributes\n"); + return -EINVAL; + } + + spin_lock_irqsave(&dma_domain->domain_lock, flags); + if (dma_domain->enabled) { + pr_debug("Can't set geometry attributes as domain is active\n"); + spin_unlock_irqrestore(&dma_domain->domain_lock, flags); + return -EBUSY; + } + + /* Copy the domain geometry information */ + memcpy(&domain->geometry, geom_attr, + sizeof(struct iommu_domain_geometry)); + dma_domain->geom_size = geom_size; + + spin_unlock_irqrestore(&dma_domain->domain_lock, flags); + + return 0; +} + +/* Set the domain stash attribute */ +static int configure_domain_stash(struct fsl_dma_domain *dma_domain, void *data) +{ + struct pamu_stash_attribute *stash_attr = data; + unsigned long flags; + int ret; + + spin_lock_irqsave(&dma_domain->domain_lock, flags); + + memcpy(&dma_domain->dma_stash, stash_attr, + sizeof(struct pamu_stash_attribute)); + + dma_domain->stash_id = get_stash_id(stash_attr->cache, + stash_attr->cpu); + if (dma_domain->stash_id == ~(u32)0) { + pr_debug("Invalid stash attributes\n"); + spin_unlock_irqrestore(&dma_domain->domain_lock, flags); + return -EINVAL; + } + + ret = update_domain_stash(dma_domain, dma_domain->stash_id); + + spin_unlock_irqrestore(&dma_domain->domain_lock, flags); + + return ret; +} + +/* Configure domain dma state i.e. enable/disable DMA */ +static int configure_domain_dma_state(struct fsl_dma_domain *dma_domain, bool enable) +{ + struct device_domain_info *info; + unsigned long flags; + int ret; + + spin_lock_irqsave(&dma_domain->domain_lock, flags); + + if (enable && !dma_domain->mapped) { + pr_debug("Can't enable DMA domain without valid mapping\n"); + spin_unlock_irqrestore(&dma_domain->domain_lock, flags); + return -ENODEV; + } + + dma_domain->enabled = enable; + list_for_each_entry(info, &dma_domain->devices, link) { + ret = (enable) ? pamu_enable_liodn(info->liodn) : + pamu_disable_liodn(info->liodn); + if (ret) + pr_debug("Unable to set dma state for liodn %d", + info->liodn); + } + spin_unlock_irqrestore(&dma_domain->domain_lock, flags); + + return 0; +} + +static int fsl_pamu_set_domain_attr(struct iommu_domain *domain, + enum iommu_attr attr_type, void *data) +{ + struct fsl_dma_domain *dma_domain = to_fsl_dma_domain(domain); + int ret = 0; + + switch (attr_type) { + case DOMAIN_ATTR_GEOMETRY: + ret = configure_domain_geometry(domain, data); + break; + case DOMAIN_ATTR_FSL_PAMU_STASH: + ret = configure_domain_stash(dma_domain, data); + break; + case DOMAIN_ATTR_FSL_PAMU_ENABLE: + ret = configure_domain_dma_state(dma_domain, *(int *)data); + break; + default: + pr_debug("Unsupported attribute type\n"); + ret = -EINVAL; + break; + } + + return ret; +} + +static int fsl_pamu_get_domain_attr(struct iommu_domain *domain, + enum iommu_attr attr_type, void *data) +{ + struct fsl_dma_domain *dma_domain = to_fsl_dma_domain(domain); + int ret = 0; + + switch (attr_type) { + case DOMAIN_ATTR_FSL_PAMU_STASH: + memcpy(data, &dma_domain->dma_stash, + sizeof(struct pamu_stash_attribute)); + break; + case DOMAIN_ATTR_FSL_PAMU_ENABLE: + *(int *)data = dma_domain->enabled; + break; + case DOMAIN_ATTR_FSL_PAMUV1: + *(int *)data = DOMAIN_ATTR_FSL_PAMUV1; + break; + default: + pr_debug("Unsupported attribute type\n"); + ret = -EINVAL; + break; + } + + return ret; +} + +static struct iommu_group *get_device_iommu_group(struct device *dev) +{ + struct iommu_group *group; + + group = iommu_group_get(dev); + if (!group) + group = iommu_group_alloc(); + + return group; +} + +static bool check_pci_ctl_endpt_part(struct pci_controller *pci_ctl) +{ + u32 version; + + /* Check the PCI controller version number by readding BRR1 register */ + version = in_be32(pci_ctl->cfg_addr + (PCI_FSL_BRR1 >> 2)); + version &= PCI_FSL_BRR1_VER; + /* If PCI controller version is >= 0x204 we can partition endpoints */ + return version >= 0x204; +} + +/* Get iommu group information from peer devices or devices on the parent bus */ +static struct iommu_group *get_shared_pci_device_group(struct pci_dev *pdev) +{ + struct pci_dev *tmp; + struct iommu_group *group; + struct pci_bus *bus = pdev->bus; + + /* + * Traverese the pci bus device list to get + * the shared iommu group. + */ + while (bus) { + list_for_each_entry(tmp, &bus->devices, bus_list) { + if (tmp == pdev) + continue; + group = iommu_group_get(&tmp->dev); + if (group) + return group; + } + + bus = bus->parent; + } + + return NULL; +} + +static struct iommu_group *get_pci_device_group(struct pci_dev *pdev) +{ + struct pci_controller *pci_ctl; + bool pci_endpt_partioning; + struct iommu_group *group = NULL; + + pci_ctl = pci_bus_to_host(pdev->bus); + pci_endpt_partioning = check_pci_ctl_endpt_part(pci_ctl); + /* We can partition PCIe devices so assign device group to the device */ + if (pci_endpt_partioning) { + group = iommu_group_get_for_dev(&pdev->dev); + + /* + * PCIe controller is not a paritionable entity + * free the controller device iommu_group. + */ + if (pci_ctl->parent->iommu_group) + iommu_group_remove_device(pci_ctl->parent); + } else { + /* + * All devices connected to the controller will share the + * PCI controllers device group. If this is the first + * device to be probed for the pci controller, copy the + * device group information from the PCI controller device + * node and remove the PCI controller iommu group. + * For subsequent devices, the iommu group information can + * be obtained from sibling devices (i.e. from the bus_devices + * link list). + */ + if (pci_ctl->parent->iommu_group) { + group = get_device_iommu_group(pci_ctl->parent); + iommu_group_remove_device(pci_ctl->parent); + } else { + group = get_shared_pci_device_group(pdev); + } + } + + if (!group) + group = ERR_PTR(-ENODEV); + + return group; +} + +static int fsl_pamu_add_device(struct device *dev) +{ + struct iommu_group *group = ERR_PTR(-ENODEV); + struct pci_dev *pdev; + const u32 *prop; + int ret = 0, len; + + /* + * For platform devices we allocate a separate group for + * each of the devices. + */ + if (dev_is_pci(dev)) { + pdev = to_pci_dev(dev); + /* Don't create device groups for virtual PCI bridges */ + if (pdev->subordinate) + return 0; + + group = get_pci_device_group(pdev); + + } else { + prop = of_get_property(dev->of_node, "fsl,liodn", &len); + if (prop) + group = get_device_iommu_group(dev); + } + + if (IS_ERR(group)) + return PTR_ERR(group); + + /* + * Check if device has already been added to an iommu group. + * Group could have already been created for a PCI device in + * the iommu_group_get_for_dev path. + */ + if (!dev->iommu_group) + ret = iommu_group_add_device(group, dev); + + iommu_group_put(group); + return ret; +} + +static void fsl_pamu_remove_device(struct device *dev) +{ + iommu_group_remove_device(dev); +} + +static int fsl_pamu_set_windows(struct iommu_domain *domain, u32 w_count) +{ + struct fsl_dma_domain *dma_domain = to_fsl_dma_domain(domain); + unsigned long flags; + int ret; + + spin_lock_irqsave(&dma_domain->domain_lock, flags); + /* Ensure domain is inactive i.e. DMA should be disabled for the domain */ + if (dma_domain->enabled) { + pr_debug("Can't set geometry attributes as domain is active\n"); + spin_unlock_irqrestore(&dma_domain->domain_lock, flags); + return -EBUSY; + } + + /* Ensure that the geometry has been set for the domain */ + if (!dma_domain->geom_size) { + pr_debug("Please configure geometry before setting the number of windows\n"); + spin_unlock_irqrestore(&dma_domain->domain_lock, flags); + return -EINVAL; + } + + /* + * Ensure we have valid window count i.e. it should be less than + * maximum permissible limit and should be a power of two. + */ + if (w_count > pamu_get_max_subwin_cnt() || !is_power_of_2(w_count)) { + pr_debug("Invalid window count\n"); + spin_unlock_irqrestore(&dma_domain->domain_lock, flags); + return -EINVAL; + } + + ret = pamu_set_domain_geometry(dma_domain, &domain->geometry, + w_count > 1 ? w_count : 0); + if (!ret) { + kfree(dma_domain->win_arr); + dma_domain->win_arr = kcalloc(w_count, + sizeof(*dma_domain->win_arr), + GFP_ATOMIC); + if (!dma_domain->win_arr) { + spin_unlock_irqrestore(&dma_domain->domain_lock, flags); + return -ENOMEM; + } + dma_domain->win_cnt = w_count; + } + spin_unlock_irqrestore(&dma_domain->domain_lock, flags); + + return ret; +} + +static u32 fsl_pamu_get_windows(struct iommu_domain *domain) +{ + struct fsl_dma_domain *dma_domain = to_fsl_dma_domain(domain); + + return dma_domain->win_cnt; +} + +static const struct iommu_ops fsl_pamu_ops = { + .capable = fsl_pamu_capable, + .domain_alloc = fsl_pamu_domain_alloc, + .domain_free = fsl_pamu_domain_free, + .attach_dev = fsl_pamu_attach_device, + .detach_dev = fsl_pamu_detach_device, + .domain_window_enable = fsl_pamu_window_enable, + .domain_window_disable = fsl_pamu_window_disable, + .domain_get_windows = fsl_pamu_get_windows, + .domain_set_windows = fsl_pamu_set_windows, + .iova_to_phys = fsl_pamu_iova_to_phys, + .domain_set_attr = fsl_pamu_set_domain_attr, + .domain_get_attr = fsl_pamu_get_domain_attr, + .add_device = fsl_pamu_add_device, + .remove_device = fsl_pamu_remove_device, +}; + +int __init pamu_domain_init(void) +{ + int ret = 0; + + ret = iommu_init_mempool(); + if (ret) + return ret; + + bus_set_iommu(&platform_bus_type, &fsl_pamu_ops); + bus_set_iommu(&pci_bus_type, &fsl_pamu_ops); + + return ret; +} diff --git a/drivers/iommu/fsl_pamu_domain.h b/drivers/iommu/fsl_pamu_domain.h new file mode 100644 index 000000000..f2b0f741d --- /dev/null +++ b/drivers/iommu/fsl_pamu_domain.h @@ -0,0 +1,85 @@ +/* + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License, version 2, as + * published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. + * + * Copyright (C) 2013 Freescale Semiconductor, Inc. + * + */ + +#ifndef __FSL_PAMU_DOMAIN_H +#define __FSL_PAMU_DOMAIN_H + +#include "fsl_pamu.h" + +struct dma_window { + phys_addr_t paddr; + u64 size; + int valid; + int prot; +}; + +struct fsl_dma_domain { + /* + * Indicates the geometry size for the domain. + * This would be set when the geometry is + * configured for the domain. + */ + dma_addr_t geom_size; + /* + * Number of windows assocaited with this domain. + * During domain initialization, it is set to the + * the maximum number of subwindows allowed for a LIODN. + * Minimum value for this is 1 indicating a single PAMU + * window, without any sub windows. Value can be set/ + * queried by set_attr/get_attr API for DOMAIN_ATTR_WINDOWS. + * Value can only be set once the geometry has been configured. + */ + u32 win_cnt; + /* + * win_arr contains information of the configured + * windows for a domain. This is allocated only + * when the number of windows for the domain are + * set. + */ + struct dma_window *win_arr; + /* list of devices associated with the domain */ + struct list_head devices; + /* dma_domain states: + * mapped - A particular mapping has been created + * within the configured geometry. + * enabled - DMA has been enabled for the given + * domain. This translates to setting of the + * valid bit for the primary PAACE in the PAMU + * PAACT table. Domain geometry should be set and + * it must have a valid mapping before DMA can be + * enabled for it. + * + */ + int mapped; + int enabled; + /* stash_id obtained from the stash attribute details */ + u32 stash_id; + struct pamu_stash_attribute dma_stash; + u32 snoop_id; + struct iommu_domain iommu_domain; + spinlock_t domain_lock; +}; + +/* domain-device relationship */ +struct device_domain_info { + struct list_head link; /* link to domain siblings */ + struct device *dev; + u32 liodn; + struct fsl_dma_domain *domain; /* pointer to domain */ +}; +#endif /* __FSL_PAMU_DOMAIN_H */ diff --git a/drivers/iommu/intel-iommu.c b/drivers/iommu/intel-iommu.c new file mode 100644 index 000000000..5ecfaf299 --- /dev/null +++ b/drivers/iommu/intel-iommu.c @@ -0,0 +1,4744 @@ +/* + * Copyright © 2006-2014 Intel Corporation. + * + * This program is free software; you can redistribute it and/or modify it + * under the terms and conditions of the GNU General Public License, + * version 2, as published by the Free Software Foundation. + * + * This program is distributed in the hope it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for + * more details. + * + * Authors: David Woodhouse <dwmw2@infradead.org>, + * Ashok Raj <ashok.raj@intel.com>, + * Shaohua Li <shaohua.li@intel.com>, + * Anil S Keshavamurthy <anil.s.keshavamurthy@intel.com>, + * Fenghua Yu <fenghua.yu@intel.com> + */ + +#include <linux/init.h> +#include <linux/bitmap.h> +#include <linux/debugfs.h> +#include <linux/export.h> +#include <linux/slab.h> +#include <linux/irq.h> +#include <linux/interrupt.h> +#include <linux/spinlock.h> +#include <linux/pci.h> +#include <linux/dmar.h> +#include <linux/dma-mapping.h> +#include <linux/mempool.h> +#include <linux/memory.h> +#include <linux/timer.h> +#include <linux/iova.h> +#include <linux/iommu.h> +#include <linux/intel-iommu.h> +#include <linux/syscore_ops.h> +#include <linux/tboot.h> +#include <linux/dmi.h> +#include <linux/pci-ats.h> +#include <linux/memblock.h> +#include <linux/dma-contiguous.h> +#include <asm/irq_remapping.h> +#include <asm/cacheflush.h> +#include <asm/iommu.h> + +#include "irq_remapping.h" + +#define ROOT_SIZE VTD_PAGE_SIZE +#define CONTEXT_SIZE VTD_PAGE_SIZE + +#define IS_GFX_DEVICE(pdev) ((pdev->class >> 16) == PCI_BASE_CLASS_DISPLAY) +#define IS_USB_DEVICE(pdev) ((pdev->class >> 8) == PCI_CLASS_SERIAL_USB) +#define IS_ISA_DEVICE(pdev) ((pdev->class >> 8) == PCI_CLASS_BRIDGE_ISA) +#define IS_AZALIA(pdev) ((pdev)->vendor == 0x8086 && (pdev)->device == 0x3a3e) + +#define IOAPIC_RANGE_START (0xfee00000) +#define IOAPIC_RANGE_END (0xfeefffff) +#define IOVA_START_ADDR (0x1000) + +#define DEFAULT_DOMAIN_ADDRESS_WIDTH 48 + +#define MAX_AGAW_WIDTH 64 +#define MAX_AGAW_PFN_WIDTH (MAX_AGAW_WIDTH - VTD_PAGE_SHIFT) + +#define __DOMAIN_MAX_PFN(gaw) ((((uint64_t)1) << (gaw-VTD_PAGE_SHIFT)) - 1) +#define __DOMAIN_MAX_ADDR(gaw) ((((uint64_t)1) << gaw) - 1) + +/* We limit DOMAIN_MAX_PFN to fit in an unsigned long, and DOMAIN_MAX_ADDR + to match. That way, we can use 'unsigned long' for PFNs with impunity. */ +#define DOMAIN_MAX_PFN(gaw) ((unsigned long) min_t(uint64_t, \ + __DOMAIN_MAX_PFN(gaw), (unsigned long)-1)) +#define DOMAIN_MAX_ADDR(gaw) (((uint64_t)__DOMAIN_MAX_PFN(gaw)) << VTD_PAGE_SHIFT) + +/* IO virtual address start page frame number */ +#define IOVA_START_PFN (1) + +#define IOVA_PFN(addr) ((addr) >> PAGE_SHIFT) +#define DMA_32BIT_PFN IOVA_PFN(DMA_BIT_MASK(32)) +#define DMA_64BIT_PFN IOVA_PFN(DMA_BIT_MASK(64)) + +/* page table handling */ +#define LEVEL_STRIDE (9) +#define LEVEL_MASK (((u64)1 << LEVEL_STRIDE) - 1) + +/* + * This bitmap is used to advertise the page sizes our hardware support + * to the IOMMU core, which will then use this information to split + * physically contiguous memory regions it is mapping into page sizes + * that we support. + * + * Traditionally the IOMMU core just handed us the mappings directly, + * after making sure the size is an order of a 4KiB page and that the + * mapping has natural alignment. + * + * To retain this behavior, we currently advertise that we support + * all page sizes that are an order of 4KiB. + * + * If at some point we'd like to utilize the IOMMU core's new behavior, + * we could change this to advertise the real page sizes we support. + */ +#define INTEL_IOMMU_PGSIZES (~0xFFFUL) + +static inline int agaw_to_level(int agaw) +{ + return agaw + 2; +} + +static inline int agaw_to_width(int agaw) +{ + return min_t(int, 30 + agaw * LEVEL_STRIDE, MAX_AGAW_WIDTH); +} + +static inline int width_to_agaw(int width) +{ + return DIV_ROUND_UP(width - 30, LEVEL_STRIDE); +} + +static inline unsigned int level_to_offset_bits(int level) +{ + return (level - 1) * LEVEL_STRIDE; +} + +static inline int pfn_level_offset(unsigned long pfn, int level) +{ + return (pfn >> level_to_offset_bits(level)) & LEVEL_MASK; +} + +static inline unsigned long level_mask(int level) +{ + return -1UL << level_to_offset_bits(level); +} + +static inline unsigned long level_size(int level) +{ + return 1UL << level_to_offset_bits(level); +} + +static inline unsigned long align_to_level(unsigned long pfn, int level) +{ + return (pfn + level_size(level) - 1) & level_mask(level); +} + +static inline unsigned long lvl_to_nr_pages(unsigned int lvl) +{ + return 1 << min_t(int, (lvl - 1) * LEVEL_STRIDE, MAX_AGAW_PFN_WIDTH); +} + +/* VT-d pages must always be _smaller_ than MM pages. Otherwise things + are never going to work. */ +static inline unsigned long dma_to_mm_pfn(unsigned long dma_pfn) +{ + return dma_pfn >> (PAGE_SHIFT - VTD_PAGE_SHIFT); +} + +static inline unsigned long mm_to_dma_pfn(unsigned long mm_pfn) +{ + return mm_pfn << (PAGE_SHIFT - VTD_PAGE_SHIFT); +} +static inline unsigned long page_to_dma_pfn(struct page *pg) +{ + return mm_to_dma_pfn(page_to_pfn(pg)); +} +static inline unsigned long virt_to_dma_pfn(void *p) +{ + return page_to_dma_pfn(virt_to_page(p)); +} + +/* global iommu list, set NULL for ignored DMAR units */ +static struct intel_iommu **g_iommus; + +static void __init check_tylersburg_isoch(void); +static int rwbf_quirk; + +/* + * set to 1 to panic kernel if can't successfully enable VT-d + * (used when kernel is launched w/ TXT) + */ +static int force_on = 0; + +/* + * 0: Present + * 1-11: Reserved + * 12-63: Context Ptr (12 - (haw-1)) + * 64-127: Reserved + */ +struct root_entry { + u64 lo; + u64 hi; +}; +#define ROOT_ENTRY_NR (VTD_PAGE_SIZE/sizeof(struct root_entry)) + + +/* + * low 64 bits: + * 0: present + * 1: fault processing disable + * 2-3: translation type + * 12-63: address space root + * high 64 bits: + * 0-2: address width + * 3-6: aval + * 8-23: domain id + */ +struct context_entry { + u64 lo; + u64 hi; +}; + +static inline bool context_present(struct context_entry *context) +{ + return (context->lo & 1); +} +static inline void context_set_present(struct context_entry *context) +{ + context->lo |= 1; +} + +static inline void context_set_fault_enable(struct context_entry *context) +{ + context->lo &= (((u64)-1) << 2) | 1; +} + +static inline void context_set_translation_type(struct context_entry *context, + unsigned long value) +{ + context->lo &= (((u64)-1) << 4) | 3; + context->lo |= (value & 3) << 2; +} + +static inline void context_set_address_root(struct context_entry *context, + unsigned long value) +{ + context->lo &= ~VTD_PAGE_MASK; + context->lo |= value & VTD_PAGE_MASK; +} + +static inline void context_set_address_width(struct context_entry *context, + unsigned long value) +{ + context->hi |= value & 7; +} + +static inline void context_set_domain_id(struct context_entry *context, + unsigned long value) +{ + context->hi |= (value & ((1 << 16) - 1)) << 8; +} + +static inline void context_clear_entry(struct context_entry *context) +{ + context->lo = 0; + context->hi = 0; +} + +/* + * 0: readable + * 1: writable + * 2-6: reserved + * 7: super page + * 8-10: available + * 11: snoop behavior + * 12-63: Host physcial address + */ +struct dma_pte { + u64 val; +}; + +static inline void dma_clear_pte(struct dma_pte *pte) +{ + pte->val = 0; +} + +static inline u64 dma_pte_addr(struct dma_pte *pte) +{ +#ifdef CONFIG_64BIT + return pte->val & VTD_PAGE_MASK; +#else + /* Must have a full atomic 64-bit read */ + return __cmpxchg64(&pte->val, 0ULL, 0ULL) & VTD_PAGE_MASK; +#endif +} + +static inline bool dma_pte_present(struct dma_pte *pte) +{ + return (pte->val & 3) != 0; +} + +static inline bool dma_pte_superpage(struct dma_pte *pte) +{ + return (pte->val & DMA_PTE_LARGE_PAGE); +} + +static inline int first_pte_in_page(struct dma_pte *pte) +{ + return !((unsigned long)pte & ~VTD_PAGE_MASK); +} + +/* + * This domain is a statically identity mapping domain. + * 1. This domain creats a static 1:1 mapping to all usable memory. + * 2. It maps to each iommu if successful. + * 3. Each iommu mapps to this domain if successful. + */ +static struct dmar_domain *si_domain; +static int hw_pass_through = 1; + +/* domain represents a virtual machine, more than one devices + * across iommus may be owned in one domain, e.g. kvm guest. + */ +#define DOMAIN_FLAG_VIRTUAL_MACHINE (1 << 0) + +/* si_domain contains mulitple devices */ +#define DOMAIN_FLAG_STATIC_IDENTITY (1 << 1) + +struct dmar_domain { + int id; /* domain id */ + int nid; /* node id */ + DECLARE_BITMAP(iommu_bmp, DMAR_UNITS_SUPPORTED); + /* bitmap of iommus this domain uses*/ + + struct list_head devices; /* all devices' list */ + struct iova_domain iovad; /* iova's that belong to this domain */ + + struct dma_pte *pgd; /* virtual address */ + int gaw; /* max guest address width */ + + /* adjusted guest address width, 0 is level 2 30-bit */ + int agaw; + + int flags; /* flags to find out type of domain */ + + int iommu_coherency;/* indicate coherency of iommu access */ + int iommu_snooping; /* indicate snooping control feature*/ + int iommu_count; /* reference count of iommu */ + int iommu_superpage;/* Level of superpages supported: + 0 == 4KiB (no superpages), 1 == 2MiB, + 2 == 1GiB, 3 == 512GiB, 4 == 1TiB */ + spinlock_t iommu_lock; /* protect iommu set in domain */ + u64 max_addr; /* maximum mapped address */ + + struct iommu_domain domain; /* generic domain data structure for + iommu core */ +}; + +/* PCI domain-device relationship */ +struct device_domain_info { + struct list_head link; /* link to domain siblings */ + struct list_head global; /* link to global list */ + u8 bus; /* PCI bus number */ + u8 devfn; /* PCI devfn number */ + struct device *dev; /* it's NULL for PCIe-to-PCI bridge */ + struct intel_iommu *iommu; /* IOMMU used by this device */ + struct dmar_domain *domain; /* pointer to domain */ +}; + +struct dmar_rmrr_unit { + struct list_head list; /* list of rmrr units */ + struct acpi_dmar_header *hdr; /* ACPI header */ + u64 base_address; /* reserved base address*/ + u64 end_address; /* reserved end address */ + struct dmar_dev_scope *devices; /* target devices */ + int devices_cnt; /* target device count */ +}; + +struct dmar_atsr_unit { + struct list_head list; /* list of ATSR units */ + struct acpi_dmar_header *hdr; /* ACPI header */ + struct dmar_dev_scope *devices; /* target devices */ + int devices_cnt; /* target device count */ + u8 include_all:1; /* include all ports */ +}; + +static LIST_HEAD(dmar_atsr_units); +static LIST_HEAD(dmar_rmrr_units); + +#define for_each_rmrr_units(rmrr) \ + list_for_each_entry(rmrr, &dmar_rmrr_units, list) + +static void flush_unmaps_timeout(unsigned long data); + +static DEFINE_TIMER(unmap_timer, flush_unmaps_timeout, 0, 0); + +#define HIGH_WATER_MARK 250 +struct deferred_flush_tables { + int next; + struct iova *iova[HIGH_WATER_MARK]; + struct dmar_domain *domain[HIGH_WATER_MARK]; + struct page *freelist[HIGH_WATER_MARK]; +}; + +static struct deferred_flush_tables *deferred_flush; + +/* bitmap for indexing intel_iommus */ +static int g_num_of_iommus; + +static DEFINE_SPINLOCK(async_umap_flush_lock); +static LIST_HEAD(unmaps_to_do); + +static int timer_on; +static long list_size; + +static void domain_exit(struct dmar_domain *domain); +static void domain_remove_dev_info(struct dmar_domain *domain); +static void domain_remove_one_dev_info(struct dmar_domain *domain, + struct device *dev); +static void iommu_detach_dependent_devices(struct intel_iommu *iommu, + struct device *dev); +static int domain_detach_iommu(struct dmar_domain *domain, + struct intel_iommu *iommu); + +#ifdef CONFIG_INTEL_IOMMU_DEFAULT_ON +int dmar_disabled = 0; +#else +int dmar_disabled = 1; +#endif /*CONFIG_INTEL_IOMMU_DEFAULT_ON*/ + +int intel_iommu_enabled = 0; +EXPORT_SYMBOL_GPL(intel_iommu_enabled); + +static int dmar_map_gfx = 1; +static int dmar_forcedac; +static int intel_iommu_strict; +static int intel_iommu_superpage = 1; +static int intel_iommu_ecs = 1; + +/* We only actually use ECS when PASID support (on the new bit 40) + * is also advertised. Some early implementations — the ones with + * PASID support on bit 28 — have issues even when we *only* use + * extended root/context tables. */ +#define ecs_enabled(iommu) (intel_iommu_ecs && ecap_ecs(iommu->ecap) && \ + ecap_pasid(iommu->ecap)) + +int intel_iommu_gfx_mapped; +EXPORT_SYMBOL_GPL(intel_iommu_gfx_mapped); + +#define DUMMY_DEVICE_DOMAIN_INFO ((struct device_domain_info *)(-1)) +static DEFINE_SPINLOCK(device_domain_lock); +static LIST_HEAD(device_domain_list); + +static const struct iommu_ops intel_iommu_ops; + +/* Convert generic 'struct iommu_domain to private struct dmar_domain */ +static struct dmar_domain *to_dmar_domain(struct iommu_domain *dom) +{ + return container_of(dom, struct dmar_domain, domain); +} + +static int __init intel_iommu_setup(char *str) +{ + if (!str) + return -EINVAL; + while (*str) { + if (!strncmp(str, "on", 2)) { + dmar_disabled = 0; + printk(KERN_INFO "Intel-IOMMU: enabled\n"); + } else if (!strncmp(str, "off", 3)) { + dmar_disabled = 1; + printk(KERN_INFO "Intel-IOMMU: disabled\n"); + } else if (!strncmp(str, "igfx_off", 8)) { + dmar_map_gfx = 0; + printk(KERN_INFO + "Intel-IOMMU: disable GFX device mapping\n"); + } else if (!strncmp(str, "forcedac", 8)) { + printk(KERN_INFO + "Intel-IOMMU: Forcing DAC for PCI devices\n"); + dmar_forcedac = 1; + } else if (!strncmp(str, "strict", 6)) { + printk(KERN_INFO + "Intel-IOMMU: disable batched IOTLB flush\n"); + intel_iommu_strict = 1; + } else if (!strncmp(str, "sp_off", 6)) { + printk(KERN_INFO + "Intel-IOMMU: disable supported super page\n"); + intel_iommu_superpage = 0; + } else if (!strncmp(str, "ecs_off", 7)) { + printk(KERN_INFO + "Intel-IOMMU: disable extended context table support\n"); + intel_iommu_ecs = 0; + } + + str += strcspn(str, ","); + while (*str == ',') + str++; + } + return 0; +} +__setup("intel_iommu=", intel_iommu_setup); + +static struct kmem_cache *iommu_domain_cache; +static struct kmem_cache *iommu_devinfo_cache; + +static inline void *alloc_pgtable_page(int node) +{ + struct page *page; + void *vaddr = NULL; + + page = alloc_pages_node(node, GFP_ATOMIC | __GFP_ZERO, 0); + if (page) + vaddr = page_address(page); + return vaddr; +} + +static inline void free_pgtable_page(void *vaddr) +{ + free_page((unsigned long)vaddr); +} + +static inline void *alloc_domain_mem(void) +{ + return kmem_cache_alloc(iommu_domain_cache, GFP_ATOMIC); +} + +static void free_domain_mem(void *vaddr) +{ + kmem_cache_free(iommu_domain_cache, vaddr); +} + +static inline void * alloc_devinfo_mem(void) +{ + return kmem_cache_alloc(iommu_devinfo_cache, GFP_ATOMIC); +} + +static inline void free_devinfo_mem(void *vaddr) +{ + kmem_cache_free(iommu_devinfo_cache, vaddr); +} + +static inline int domain_type_is_vm(struct dmar_domain *domain) +{ + return domain->flags & DOMAIN_FLAG_VIRTUAL_MACHINE; +} + +static inline int domain_type_is_vm_or_si(struct dmar_domain *domain) +{ + return domain->flags & (DOMAIN_FLAG_VIRTUAL_MACHINE | + DOMAIN_FLAG_STATIC_IDENTITY); +} + +static inline int domain_pfn_supported(struct dmar_domain *domain, + unsigned long pfn) +{ + int addr_width = agaw_to_width(domain->agaw) - VTD_PAGE_SHIFT; + + return !(addr_width < BITS_PER_LONG && pfn >> addr_width); +} + +static int __iommu_calculate_agaw(struct intel_iommu *iommu, int max_gaw) +{ + unsigned long sagaw; + int agaw = -1; + + sagaw = cap_sagaw(iommu->cap); + for (agaw = width_to_agaw(max_gaw); + agaw >= 0; agaw--) { + if (test_bit(agaw, &sagaw)) + break; + } + + return agaw; +} + +/* + * Calculate max SAGAW for each iommu. + */ +int iommu_calculate_max_sagaw(struct intel_iommu *iommu) +{ + return __iommu_calculate_agaw(iommu, MAX_AGAW_WIDTH); +} + +/* + * calculate agaw for each iommu. + * "SAGAW" may be different across iommus, use a default agaw, and + * get a supported less agaw for iommus that don't support the default agaw. + */ +int iommu_calculate_agaw(struct intel_iommu *iommu) +{ + return __iommu_calculate_agaw(iommu, DEFAULT_DOMAIN_ADDRESS_WIDTH); +} + +/* This functionin only returns single iommu in a domain */ +static struct intel_iommu *domain_get_iommu(struct dmar_domain *domain) +{ + int iommu_id; + + /* si_domain and vm domain should not get here. */ + BUG_ON(domain_type_is_vm_or_si(domain)); + iommu_id = find_first_bit(domain->iommu_bmp, g_num_of_iommus); + if (iommu_id < 0 || iommu_id >= g_num_of_iommus) + return NULL; + + return g_iommus[iommu_id]; +} + +static void domain_update_iommu_coherency(struct dmar_domain *domain) +{ + struct dmar_drhd_unit *drhd; + struct intel_iommu *iommu; + bool found = false; + int i; + + domain->iommu_coherency = 1; + + for_each_set_bit(i, domain->iommu_bmp, g_num_of_iommus) { + found = true; + if (!ecap_coherent(g_iommus[i]->ecap)) { + domain->iommu_coherency = 0; + break; + } + } + if (found) + return; + + /* No hardware attached; use lowest common denominator */ + rcu_read_lock(); + for_each_active_iommu(iommu, drhd) { + if (!ecap_coherent(iommu->ecap)) { + domain->iommu_coherency = 0; + break; + } + } + rcu_read_unlock(); +} + +static int domain_update_iommu_snooping(struct intel_iommu *skip) +{ + struct dmar_drhd_unit *drhd; + struct intel_iommu *iommu; + int ret = 1; + + rcu_read_lock(); + for_each_active_iommu(iommu, drhd) { + if (iommu != skip) { + if (!ecap_sc_support(iommu->ecap)) { + ret = 0; + break; + } + } + } + rcu_read_unlock(); + + return ret; +} + +static int domain_update_iommu_superpage(struct intel_iommu *skip) +{ + struct dmar_drhd_unit *drhd; + struct intel_iommu *iommu; + int mask = 0xf; + + if (!intel_iommu_superpage) { + return 0; + } + + /* set iommu_superpage to the smallest common denominator */ + rcu_read_lock(); + for_each_active_iommu(iommu, drhd) { + if (iommu != skip) { + mask &= cap_super_page_val(iommu->cap); + if (!mask) + break; + } + } + rcu_read_unlock(); + + return fls(mask); +} + +/* Some capabilities may be different across iommus */ +static void domain_update_iommu_cap(struct dmar_domain *domain) +{ + domain_update_iommu_coherency(domain); + domain->iommu_snooping = domain_update_iommu_snooping(NULL); + domain->iommu_superpage = domain_update_iommu_superpage(NULL); +} + +static inline struct context_entry *iommu_context_addr(struct intel_iommu *iommu, + u8 bus, u8 devfn, int alloc) +{ + struct root_entry *root = &iommu->root_entry[bus]; + struct context_entry *context; + u64 *entry; + + if (ecs_enabled(iommu)) { + if (devfn >= 0x80) { + devfn -= 0x80; + entry = &root->hi; + } + devfn *= 2; + } + entry = &root->lo; + if (*entry & 1) + context = phys_to_virt(*entry & VTD_PAGE_MASK); + else { + unsigned long phy_addr; + if (!alloc) + return NULL; + + context = alloc_pgtable_page(iommu->node); + if (!context) + return NULL; + + __iommu_flush_cache(iommu, (void *)context, CONTEXT_SIZE); + phy_addr = virt_to_phys((void *)context); + *entry = phy_addr | 1; + __iommu_flush_cache(iommu, entry, sizeof(*entry)); + } + return &context[devfn]; +} + +static int iommu_dummy(struct device *dev) +{ + return dev->archdata.iommu == DUMMY_DEVICE_DOMAIN_INFO; +} + +static struct intel_iommu *device_to_iommu(struct device *dev, u8 *bus, u8 *devfn) +{ + struct dmar_drhd_unit *drhd = NULL; + struct intel_iommu *iommu; + struct device *tmp; + struct pci_dev *ptmp, *pdev = NULL; + u16 segment = 0; + int i; + + if (iommu_dummy(dev)) + return NULL; + + if (dev_is_pci(dev)) { + pdev = to_pci_dev(dev); + segment = pci_domain_nr(pdev->bus); + } else if (has_acpi_companion(dev)) + dev = &ACPI_COMPANION(dev)->dev; + + rcu_read_lock(); + for_each_active_iommu(iommu, drhd) { + if (pdev && segment != drhd->segment) + continue; + + for_each_active_dev_scope(drhd->devices, + drhd->devices_cnt, i, tmp) { + if (tmp == dev) { + *bus = drhd->devices[i].bus; + *devfn = drhd->devices[i].devfn; + goto out; + } + + if (!pdev || !dev_is_pci(tmp)) + continue; + + ptmp = to_pci_dev(tmp); + if (ptmp->subordinate && + ptmp->subordinate->number <= pdev->bus->number && + ptmp->subordinate->busn_res.end >= pdev->bus->number) + goto got_pdev; + } + + if (pdev && drhd->include_all) { + got_pdev: + *bus = pdev->bus->number; + *devfn = pdev->devfn; + goto out; + } + } + iommu = NULL; + out: + rcu_read_unlock(); + + return iommu; +} + +static void domain_flush_cache(struct dmar_domain *domain, + void *addr, int size) +{ + if (!domain->iommu_coherency) + clflush_cache_range(addr, size); +} + +static int device_context_mapped(struct intel_iommu *iommu, u8 bus, u8 devfn) +{ + struct context_entry *context; + int ret = 0; + unsigned long flags; + + spin_lock_irqsave(&iommu->lock, flags); + context = iommu_context_addr(iommu, bus, devfn, 0); + if (context) + ret = context_present(context); + spin_unlock_irqrestore(&iommu->lock, flags); + return ret; +} + +static void clear_context_table(struct intel_iommu *iommu, u8 bus, u8 devfn) +{ + struct context_entry *context; + unsigned long flags; + + spin_lock_irqsave(&iommu->lock, flags); + context = iommu_context_addr(iommu, bus, devfn, 0); + if (context) { + context_clear_entry(context); + __iommu_flush_cache(iommu, context, sizeof(*context)); + } + spin_unlock_irqrestore(&iommu->lock, flags); +} + +static void free_context_table(struct intel_iommu *iommu) +{ + int i; + unsigned long flags; + struct context_entry *context; + + spin_lock_irqsave(&iommu->lock, flags); + if (!iommu->root_entry) { + goto out; + } + for (i = 0; i < ROOT_ENTRY_NR; i++) { + context = iommu_context_addr(iommu, i, 0, 0); + if (context) + free_pgtable_page(context); + + if (!ecs_enabled(iommu)) + continue; + + context = iommu_context_addr(iommu, i, 0x80, 0); + if (context) + free_pgtable_page(context); + + } + free_pgtable_page(iommu->root_entry); + iommu->root_entry = NULL; +out: + spin_unlock_irqrestore(&iommu->lock, flags); +} + +static struct dma_pte *pfn_to_dma_pte(struct dmar_domain *domain, + unsigned long pfn, int *target_level) +{ + struct dma_pte *parent, *pte = NULL; + int level = agaw_to_level(domain->agaw); + int offset; + + BUG_ON(!domain->pgd); + + if (!domain_pfn_supported(domain, pfn)) + /* Address beyond IOMMU's addressing capabilities. */ + return NULL; + + parent = domain->pgd; + + while (1) { + void *tmp_page; + + offset = pfn_level_offset(pfn, level); + pte = &parent[offset]; + if (!*target_level && (dma_pte_superpage(pte) || !dma_pte_present(pte))) + break; + if (level == *target_level) + break; + + if (!dma_pte_present(pte)) { + uint64_t pteval; + + tmp_page = alloc_pgtable_page(domain->nid); + + if (!tmp_page) + return NULL; + + domain_flush_cache(domain, tmp_page, VTD_PAGE_SIZE); + pteval = ((uint64_t)virt_to_dma_pfn(tmp_page) << VTD_PAGE_SHIFT) | DMA_PTE_READ | DMA_PTE_WRITE; + if (cmpxchg64(&pte->val, 0ULL, pteval)) + /* Someone else set it while we were thinking; use theirs. */ + free_pgtable_page(tmp_page); + else + domain_flush_cache(domain, pte, sizeof(*pte)); + } + if (level == 1) + break; + + parent = phys_to_virt(dma_pte_addr(pte)); + level--; + } + + if (!*target_level) + *target_level = level; + + return pte; +} + + +/* return address's pte at specific level */ +static struct dma_pte *dma_pfn_level_pte(struct dmar_domain *domain, + unsigned long pfn, + int level, int *large_page) +{ + struct dma_pte *parent, *pte = NULL; + int total = agaw_to_level(domain->agaw); + int offset; + + parent = domain->pgd; + while (level <= total) { + offset = pfn_level_offset(pfn, total); + pte = &parent[offset]; + if (level == total) + return pte; + + if (!dma_pte_present(pte)) { + *large_page = total; + break; + } + + if (dma_pte_superpage(pte)) { + *large_page = total; + return pte; + } + + parent = phys_to_virt(dma_pte_addr(pte)); + total--; + } + return NULL; +} + +/* clear last level pte, a tlb flush should be followed */ +static void dma_pte_clear_range(struct dmar_domain *domain, + unsigned long start_pfn, + unsigned long last_pfn) +{ + unsigned int large_page = 1; + struct dma_pte *first_pte, *pte; + + BUG_ON(!domain_pfn_supported(domain, start_pfn)); + BUG_ON(!domain_pfn_supported(domain, last_pfn)); + BUG_ON(start_pfn > last_pfn); + + /* we don't need lock here; nobody else touches the iova range */ + do { + large_page = 1; + first_pte = pte = dma_pfn_level_pte(domain, start_pfn, 1, &large_page); + if (!pte) { + start_pfn = align_to_level(start_pfn + 1, large_page + 1); + continue; + } + do { + dma_clear_pte(pte); + start_pfn += lvl_to_nr_pages(large_page); + pte++; + } while (start_pfn <= last_pfn && !first_pte_in_page(pte)); + + domain_flush_cache(domain, first_pte, + (void *)pte - (void *)first_pte); + + } while (start_pfn && start_pfn <= last_pfn); +} + +static void dma_pte_free_level(struct dmar_domain *domain, int level, + struct dma_pte *pte, unsigned long pfn, + unsigned long start_pfn, unsigned long last_pfn) +{ + pfn = max(start_pfn, pfn); + pte = &pte[pfn_level_offset(pfn, level)]; + + do { + unsigned long level_pfn; + struct dma_pte *level_pte; + + if (!dma_pte_present(pte) || dma_pte_superpage(pte)) + goto next; + + level_pfn = pfn & level_mask(level - 1); + level_pte = phys_to_virt(dma_pte_addr(pte)); + + if (level > 2) + dma_pte_free_level(domain, level - 1, level_pte, + level_pfn, start_pfn, last_pfn); + + /* If range covers entire pagetable, free it */ + if (!(start_pfn > level_pfn || + last_pfn < level_pfn + level_size(level) - 1)) { + dma_clear_pte(pte); + domain_flush_cache(domain, pte, sizeof(*pte)); + free_pgtable_page(level_pte); + } +next: + pfn += level_size(level); + } while (!first_pte_in_page(++pte) && pfn <= last_pfn); +} + +/* free page table pages. last level pte should already be cleared */ +static void dma_pte_free_pagetable(struct dmar_domain *domain, + unsigned long start_pfn, + unsigned long last_pfn) +{ + BUG_ON(!domain_pfn_supported(domain, start_pfn)); + BUG_ON(!domain_pfn_supported(domain, last_pfn)); + BUG_ON(start_pfn > last_pfn); + + dma_pte_clear_range(domain, start_pfn, last_pfn); + + /* We don't need lock here; nobody else touches the iova range */ + dma_pte_free_level(domain, agaw_to_level(domain->agaw), + domain->pgd, 0, start_pfn, last_pfn); + + /* free pgd */ + if (start_pfn == 0 && last_pfn == DOMAIN_MAX_PFN(domain->gaw)) { + free_pgtable_page(domain->pgd); + domain->pgd = NULL; + } +} + +/* When a page at a given level is being unlinked from its parent, we don't + need to *modify* it at all. All we need to do is make a list of all the + pages which can be freed just as soon as we've flushed the IOTLB and we + know the hardware page-walk will no longer touch them. + The 'pte' argument is the *parent* PTE, pointing to the page that is to + be freed. */ +static struct page *dma_pte_list_pagetables(struct dmar_domain *domain, + int level, struct dma_pte *pte, + struct page *freelist) +{ + struct page *pg; + + pg = pfn_to_page(dma_pte_addr(pte) >> PAGE_SHIFT); + pg->freelist = freelist; + freelist = pg; + + if (level == 1) + return freelist; + + pte = page_address(pg); + do { + if (dma_pte_present(pte) && !dma_pte_superpage(pte)) + freelist = dma_pte_list_pagetables(domain, level - 1, + pte, freelist); + pte++; + } while (!first_pte_in_page(pte)); + + return freelist; +} + +static struct page *dma_pte_clear_level(struct dmar_domain *domain, int level, + struct dma_pte *pte, unsigned long pfn, + unsigned long start_pfn, + unsigned long last_pfn, + struct page *freelist) +{ + struct dma_pte *first_pte = NULL, *last_pte = NULL; + + pfn = max(start_pfn, pfn); + pte = &pte[pfn_level_offset(pfn, level)]; + + do { + unsigned long level_pfn; + + if (!dma_pte_present(pte)) + goto next; + + level_pfn = pfn & level_mask(level); + + /* If range covers entire pagetable, free it */ + if (start_pfn <= level_pfn && + last_pfn >= level_pfn + level_size(level) - 1) { + /* These suborbinate page tables are going away entirely. Don't + bother to clear them; we're just going to *free* them. */ + if (level > 1 && !dma_pte_superpage(pte)) + freelist = dma_pte_list_pagetables(domain, level - 1, pte, freelist); + + dma_clear_pte(pte); + if (!first_pte) + first_pte = pte; + last_pte = pte; + } else if (level > 1) { + /* Recurse down into a level that isn't *entirely* obsolete */ + freelist = dma_pte_clear_level(domain, level - 1, + phys_to_virt(dma_pte_addr(pte)), + level_pfn, start_pfn, last_pfn, + freelist); + } +next: + pfn += level_size(level); + } while (!first_pte_in_page(++pte) && pfn <= last_pfn); + + if (first_pte) + domain_flush_cache(domain, first_pte, + (void *)++last_pte - (void *)first_pte); + + return freelist; +} + +/* We can't just free the pages because the IOMMU may still be walking + the page tables, and may have cached the intermediate levels. The + pages can only be freed after the IOTLB flush has been done. */ +struct page *domain_unmap(struct dmar_domain *domain, + unsigned long start_pfn, + unsigned long last_pfn) +{ + struct page *freelist = NULL; + + BUG_ON(!domain_pfn_supported(domain, start_pfn)); + BUG_ON(!domain_pfn_supported(domain, last_pfn)); + BUG_ON(start_pfn > last_pfn); + + /* we don't need lock here; nobody else touches the iova range */ + freelist = dma_pte_clear_level(domain, agaw_to_level(domain->agaw), + domain->pgd, 0, start_pfn, last_pfn, NULL); + + /* free pgd */ + if (start_pfn == 0 && last_pfn == DOMAIN_MAX_PFN(domain->gaw)) { + struct page *pgd_page = virt_to_page(domain->pgd); + pgd_page->freelist = freelist; + freelist = pgd_page; + + domain->pgd = NULL; + } + + return freelist; +} + +void dma_free_pagelist(struct page *freelist) +{ + struct page *pg; + + while ((pg = freelist)) { + freelist = pg->freelist; + free_pgtable_page(page_address(pg)); + } +} + +/* iommu handling */ +static int iommu_alloc_root_entry(struct intel_iommu *iommu) +{ + struct root_entry *root; + unsigned long flags; + + root = (struct root_entry *)alloc_pgtable_page(iommu->node); + if (!root) { + pr_err("IOMMU: allocating root entry for %s failed\n", + iommu->name); + return -ENOMEM; + } + + __iommu_flush_cache(iommu, root, ROOT_SIZE); + + spin_lock_irqsave(&iommu->lock, flags); + iommu->root_entry = root; + spin_unlock_irqrestore(&iommu->lock, flags); + + return 0; +} + +static void iommu_set_root_entry(struct intel_iommu *iommu) +{ + u64 addr; + u32 sts; + unsigned long flag; + + addr = virt_to_phys(iommu->root_entry); + if (ecs_enabled(iommu)) + addr |= DMA_RTADDR_RTT; + + raw_spin_lock_irqsave(&iommu->register_lock, flag); + dmar_writeq(iommu->reg + DMAR_RTADDR_REG, addr); + + writel(iommu->gcmd | DMA_GCMD_SRTP, iommu->reg + DMAR_GCMD_REG); + + /* Make sure hardware complete it */ + IOMMU_WAIT_OP(iommu, DMAR_GSTS_REG, + readl, (sts & DMA_GSTS_RTPS), sts); + + raw_spin_unlock_irqrestore(&iommu->register_lock, flag); +} + +static void iommu_flush_write_buffer(struct intel_iommu *iommu) +{ + u32 val; + unsigned long flag; + + if (!rwbf_quirk && !cap_rwbf(iommu->cap)) + return; + + raw_spin_lock_irqsave(&iommu->register_lock, flag); + writel(iommu->gcmd | DMA_GCMD_WBF, iommu->reg + DMAR_GCMD_REG); + + /* Make sure hardware complete it */ + IOMMU_WAIT_OP(iommu, DMAR_GSTS_REG, + readl, (!(val & DMA_GSTS_WBFS)), val); + + raw_spin_unlock_irqrestore(&iommu->register_lock, flag); +} + +/* return value determine if we need a write buffer flush */ +static void __iommu_flush_context(struct intel_iommu *iommu, + u16 did, u16 source_id, u8 function_mask, + u64 type) +{ + u64 val = 0; + unsigned long flag; + + switch (type) { + case DMA_CCMD_GLOBAL_INVL: + val = DMA_CCMD_GLOBAL_INVL; + break; + case DMA_CCMD_DOMAIN_INVL: + val = DMA_CCMD_DOMAIN_INVL|DMA_CCMD_DID(did); + break; + case DMA_CCMD_DEVICE_INVL: + val = DMA_CCMD_DEVICE_INVL|DMA_CCMD_DID(did) + | DMA_CCMD_SID(source_id) | DMA_CCMD_FM(function_mask); + break; + default: + BUG(); + } + val |= DMA_CCMD_ICC; + + raw_spin_lock_irqsave(&iommu->register_lock, flag); + dmar_writeq(iommu->reg + DMAR_CCMD_REG, val); + + /* Make sure hardware complete it */ + IOMMU_WAIT_OP(iommu, DMAR_CCMD_REG, + dmar_readq, (!(val & DMA_CCMD_ICC)), val); + + raw_spin_unlock_irqrestore(&iommu->register_lock, flag); +} + +/* return value determine if we need a write buffer flush */ +static void __iommu_flush_iotlb(struct intel_iommu *iommu, u16 did, + u64 addr, unsigned int size_order, u64 type) +{ + int tlb_offset = ecap_iotlb_offset(iommu->ecap); + u64 val = 0, val_iva = 0; + unsigned long flag; + + switch (type) { + case DMA_TLB_GLOBAL_FLUSH: + /* global flush doesn't need set IVA_REG */ + val = DMA_TLB_GLOBAL_FLUSH|DMA_TLB_IVT; + break; + case DMA_TLB_DSI_FLUSH: + val = DMA_TLB_DSI_FLUSH|DMA_TLB_IVT|DMA_TLB_DID(did); + break; + case DMA_TLB_PSI_FLUSH: + val = DMA_TLB_PSI_FLUSH|DMA_TLB_IVT|DMA_TLB_DID(did); + /* IH bit is passed in as part of address */ + val_iva = size_order | addr; + break; + default: + BUG(); + } + /* Note: set drain read/write */ +#if 0 + /* + * This is probably to be super secure.. Looks like we can + * ignore it without any impact. + */ + if (cap_read_drain(iommu->cap)) + val |= DMA_TLB_READ_DRAIN; +#endif + if (cap_write_drain(iommu->cap)) + val |= DMA_TLB_WRITE_DRAIN; + + raw_spin_lock_irqsave(&iommu->register_lock, flag); + /* Note: Only uses first TLB reg currently */ + if (val_iva) + dmar_writeq(iommu->reg + tlb_offset, val_iva); + dmar_writeq(iommu->reg + tlb_offset + 8, val); + + /* Make sure hardware complete it */ + IOMMU_WAIT_OP(iommu, tlb_offset + 8, + dmar_readq, (!(val & DMA_TLB_IVT)), val); + + raw_spin_unlock_irqrestore(&iommu->register_lock, flag); + + /* check IOTLB invalidation granularity */ + if (DMA_TLB_IAIG(val) == 0) + printk(KERN_ERR"IOMMU: flush IOTLB failed\n"); + if (DMA_TLB_IAIG(val) != DMA_TLB_IIRG(type)) + pr_debug("IOMMU: tlb flush request %Lx, actual %Lx\n", + (unsigned long long)DMA_TLB_IIRG(type), + (unsigned long long)DMA_TLB_IAIG(val)); +} + +static struct device_domain_info * +iommu_support_dev_iotlb (struct dmar_domain *domain, struct intel_iommu *iommu, + u8 bus, u8 devfn) +{ + bool found = false; + unsigned long flags; + struct device_domain_info *info; + struct pci_dev *pdev; + + if (!ecap_dev_iotlb_support(iommu->ecap)) + return NULL; + + if (!iommu->qi) + return NULL; + + spin_lock_irqsave(&device_domain_lock, flags); + list_for_each_entry(info, &domain->devices, link) + if (info->iommu == iommu && info->bus == bus && + info->devfn == devfn) { + found = true; + break; + } + spin_unlock_irqrestore(&device_domain_lock, flags); + + if (!found || !info->dev || !dev_is_pci(info->dev)) + return NULL; + + pdev = to_pci_dev(info->dev); + + if (!pci_find_ext_capability(pdev, PCI_EXT_CAP_ID_ATS)) + return NULL; + + if (!dmar_find_matched_atsr_unit(pdev)) + return NULL; + + return info; +} + +static void iommu_enable_dev_iotlb(struct device_domain_info *info) +{ + if (!info || !dev_is_pci(info->dev)) + return; + + pci_enable_ats(to_pci_dev(info->dev), VTD_PAGE_SHIFT); +} + +static void iommu_disable_dev_iotlb(struct device_domain_info *info) +{ + if (!info->dev || !dev_is_pci(info->dev) || + !pci_ats_enabled(to_pci_dev(info->dev))) + return; + + pci_disable_ats(to_pci_dev(info->dev)); +} + +static void iommu_flush_dev_iotlb(struct dmar_domain *domain, + u64 addr, unsigned mask) +{ + u16 sid, qdep; + unsigned long flags; + struct device_domain_info *info; + + spin_lock_irqsave(&device_domain_lock, flags); + list_for_each_entry(info, &domain->devices, link) { + struct pci_dev *pdev; + if (!info->dev || !dev_is_pci(info->dev)) + continue; + + pdev = to_pci_dev(info->dev); + if (!pci_ats_enabled(pdev)) + continue; + + sid = info->bus << 8 | info->devfn; + qdep = pci_ats_queue_depth(pdev); + qi_flush_dev_iotlb(info->iommu, sid, qdep, addr, mask); + } + spin_unlock_irqrestore(&device_domain_lock, flags); +} + +static void iommu_flush_iotlb_psi(struct intel_iommu *iommu, u16 did, + unsigned long pfn, unsigned int pages, int ih, int map) +{ + unsigned int mask = ilog2(__roundup_pow_of_two(pages)); + uint64_t addr = (uint64_t)pfn << VTD_PAGE_SHIFT; + + BUG_ON(pages == 0); + + if (ih) + ih = 1 << 6; + /* + * Fallback to domain selective flush if no PSI support or the size is + * too big. + * PSI requires page size to be 2 ^ x, and the base address is naturally + * aligned to the size + */ + if (!cap_pgsel_inv(iommu->cap) || mask > cap_max_amask_val(iommu->cap)) + iommu->flush.flush_iotlb(iommu, did, 0, 0, + DMA_TLB_DSI_FLUSH); + else + iommu->flush.flush_iotlb(iommu, did, addr | ih, mask, + DMA_TLB_PSI_FLUSH); + + /* + * In caching mode, changes of pages from non-present to present require + * flush. However, device IOTLB doesn't need to be flushed in this case. + */ + if (!cap_caching_mode(iommu->cap) || !map) + iommu_flush_dev_iotlb(iommu->domains[did], addr, mask); +} + +static void iommu_disable_protect_mem_regions(struct intel_iommu *iommu) +{ + u32 pmen; + unsigned long flags; + + raw_spin_lock_irqsave(&iommu->register_lock, flags); + pmen = readl(iommu->reg + DMAR_PMEN_REG); + pmen &= ~DMA_PMEN_EPM; + writel(pmen, iommu->reg + DMAR_PMEN_REG); + + /* wait for the protected region status bit to clear */ + IOMMU_WAIT_OP(iommu, DMAR_PMEN_REG, + readl, !(pmen & DMA_PMEN_PRS), pmen); + + raw_spin_unlock_irqrestore(&iommu->register_lock, flags); +} + +static void iommu_enable_translation(struct intel_iommu *iommu) +{ + u32 sts; + unsigned long flags; + + raw_spin_lock_irqsave(&iommu->register_lock, flags); + iommu->gcmd |= DMA_GCMD_TE; + writel(iommu->gcmd, iommu->reg + DMAR_GCMD_REG); + + /* Make sure hardware complete it */ + IOMMU_WAIT_OP(iommu, DMAR_GSTS_REG, + readl, (sts & DMA_GSTS_TES), sts); + + raw_spin_unlock_irqrestore(&iommu->register_lock, flags); +} + +static void iommu_disable_translation(struct intel_iommu *iommu) +{ + u32 sts; + unsigned long flag; + + raw_spin_lock_irqsave(&iommu->register_lock, flag); + iommu->gcmd &= ~DMA_GCMD_TE; + writel(iommu->gcmd, iommu->reg + DMAR_GCMD_REG); + + /* Make sure hardware complete it */ + IOMMU_WAIT_OP(iommu, DMAR_GSTS_REG, + readl, (!(sts & DMA_GSTS_TES)), sts); + + raw_spin_unlock_irqrestore(&iommu->register_lock, flag); +} + + +static int iommu_init_domains(struct intel_iommu *iommu) +{ + unsigned long ndomains; + unsigned long nlongs; + + ndomains = cap_ndoms(iommu->cap); + pr_debug("IOMMU%d: Number of Domains supported <%ld>\n", + iommu->seq_id, ndomains); + nlongs = BITS_TO_LONGS(ndomains); + + spin_lock_init(&iommu->lock); + + /* TBD: there might be 64K domains, + * consider other allocation for future chip + */ + iommu->domain_ids = kcalloc(nlongs, sizeof(unsigned long), GFP_KERNEL); + if (!iommu->domain_ids) { + pr_err("IOMMU%d: allocating domain id array failed\n", + iommu->seq_id); + return -ENOMEM; + } + iommu->domains = kcalloc(ndomains, sizeof(struct dmar_domain *), + GFP_KERNEL); + if (!iommu->domains) { + pr_err("IOMMU%d: allocating domain array failed\n", + iommu->seq_id); + kfree(iommu->domain_ids); + iommu->domain_ids = NULL; + return -ENOMEM; + } + + /* + * if Caching mode is set, then invalid translations are tagged + * with domainid 0. Hence we need to pre-allocate it. + */ + if (cap_caching_mode(iommu->cap)) + set_bit(0, iommu->domain_ids); + return 0; +} + +static void disable_dmar_iommu(struct intel_iommu *iommu) +{ + struct dmar_domain *domain; + int i; + + if ((iommu->domains) && (iommu->domain_ids)) { + for_each_set_bit(i, iommu->domain_ids, cap_ndoms(iommu->cap)) { + /* + * Domain id 0 is reserved for invalid translation + * if hardware supports caching mode. + */ + if (cap_caching_mode(iommu->cap) && i == 0) + continue; + + domain = iommu->domains[i]; + clear_bit(i, iommu->domain_ids); + if (domain_detach_iommu(domain, iommu) == 0 && + !domain_type_is_vm(domain)) + domain_exit(domain); + } + } + + if (iommu->gcmd & DMA_GCMD_TE) + iommu_disable_translation(iommu); +} + +static void free_dmar_iommu(struct intel_iommu *iommu) +{ + if ((iommu->domains) && (iommu->domain_ids)) { + kfree(iommu->domains); + kfree(iommu->domain_ids); + iommu->domains = NULL; + iommu->domain_ids = NULL; + } + + g_iommus[iommu->seq_id] = NULL; + + /* free context mapping */ + free_context_table(iommu); +} + +static struct dmar_domain *alloc_domain(int flags) +{ + /* domain id for virtual machine, it won't be set in context */ + static atomic_t vm_domid = ATOMIC_INIT(0); + struct dmar_domain *domain; + + domain = alloc_domain_mem(); + if (!domain) + return NULL; + + memset(domain, 0, sizeof(*domain)); + domain->nid = -1; + domain->flags = flags; + spin_lock_init(&domain->iommu_lock); + INIT_LIST_HEAD(&domain->devices); + if (flags & DOMAIN_FLAG_VIRTUAL_MACHINE) + domain->id = atomic_inc_return(&vm_domid); + + return domain; +} + +static int __iommu_attach_domain(struct dmar_domain *domain, + struct intel_iommu *iommu) +{ + int num; + unsigned long ndomains; + + ndomains = cap_ndoms(iommu->cap); + num = find_first_zero_bit(iommu->domain_ids, ndomains); + if (num < ndomains) { + set_bit(num, iommu->domain_ids); + iommu->domains[num] = domain; + } else { + num = -ENOSPC; + } + + return num; +} + +static int iommu_attach_domain(struct dmar_domain *domain, + struct intel_iommu *iommu) +{ + int num; + unsigned long flags; + + spin_lock_irqsave(&iommu->lock, flags); + num = __iommu_attach_domain(domain, iommu); + spin_unlock_irqrestore(&iommu->lock, flags); + if (num < 0) + pr_err("IOMMU: no free domain ids\n"); + + return num; +} + +static int iommu_attach_vm_domain(struct dmar_domain *domain, + struct intel_iommu *iommu) +{ + int num; + unsigned long ndomains; + + ndomains = cap_ndoms(iommu->cap); + for_each_set_bit(num, iommu->domain_ids, ndomains) + if (iommu->domains[num] == domain) + return num; + + return __iommu_attach_domain(domain, iommu); +} + +static void iommu_detach_domain(struct dmar_domain *domain, + struct intel_iommu *iommu) +{ + unsigned long flags; + int num, ndomains; + + spin_lock_irqsave(&iommu->lock, flags); + if (domain_type_is_vm_or_si(domain)) { + ndomains = cap_ndoms(iommu->cap); + for_each_set_bit(num, iommu->domain_ids, ndomains) { + if (iommu->domains[num] == domain) { + clear_bit(num, iommu->domain_ids); + iommu->domains[num] = NULL; + break; + } + } + } else { + clear_bit(domain->id, iommu->domain_ids); + iommu->domains[domain->id] = NULL; + } + spin_unlock_irqrestore(&iommu->lock, flags); +} + +static void domain_attach_iommu(struct dmar_domain *domain, + struct intel_iommu *iommu) +{ + unsigned long flags; + + spin_lock_irqsave(&domain->iommu_lock, flags); + if (!test_and_set_bit(iommu->seq_id, domain->iommu_bmp)) { + domain->iommu_count++; + if (domain->iommu_count == 1) + domain->nid = iommu->node; + domain_update_iommu_cap(domain); + } + spin_unlock_irqrestore(&domain->iommu_lock, flags); +} + +static int domain_detach_iommu(struct dmar_domain *domain, + struct intel_iommu *iommu) +{ + unsigned long flags; + int count = INT_MAX; + + spin_lock_irqsave(&domain->iommu_lock, flags); + if (test_and_clear_bit(iommu->seq_id, domain->iommu_bmp)) { + count = --domain->iommu_count; + domain_update_iommu_cap(domain); + } + spin_unlock_irqrestore(&domain->iommu_lock, flags); + + return count; +} + +static struct iova_domain reserved_iova_list; +static struct lock_class_key reserved_rbtree_key; + +static int dmar_init_reserved_ranges(void) +{ + struct pci_dev *pdev = NULL; + struct iova *iova; + int i; + + init_iova_domain(&reserved_iova_list, VTD_PAGE_SIZE, IOVA_START_PFN, + DMA_32BIT_PFN); + + lockdep_set_class(&reserved_iova_list.iova_rbtree_lock, + &reserved_rbtree_key); + + /* IOAPIC ranges shouldn't be accessed by DMA */ + iova = reserve_iova(&reserved_iova_list, IOVA_PFN(IOAPIC_RANGE_START), + IOVA_PFN(IOAPIC_RANGE_END)); + if (!iova) { + printk(KERN_ERR "Reserve IOAPIC range failed\n"); + return -ENODEV; + } + + /* Reserve all PCI MMIO to avoid peer-to-peer access */ + for_each_pci_dev(pdev) { + struct resource *r; + + for (i = 0; i < PCI_NUM_RESOURCES; i++) { + r = &pdev->resource[i]; + if (!r->flags || !(r->flags & IORESOURCE_MEM)) + continue; + iova = reserve_iova(&reserved_iova_list, + IOVA_PFN(r->start), + IOVA_PFN(r->end)); + if (!iova) { + printk(KERN_ERR "Reserve iova failed\n"); + return -ENODEV; + } + } + } + return 0; +} + +static void domain_reserve_special_ranges(struct dmar_domain *domain) +{ + copy_reserved_iova(&reserved_iova_list, &domain->iovad); +} + +static inline int guestwidth_to_adjustwidth(int gaw) +{ + int agaw; + int r = (gaw - 12) % 9; + + if (r == 0) + agaw = gaw; + else + agaw = gaw + 9 - r; + if (agaw > 64) + agaw = 64; + return agaw; +} + +static int domain_init(struct dmar_domain *domain, int guest_width) +{ + struct intel_iommu *iommu; + int adjust_width, agaw; + unsigned long sagaw; + + init_iova_domain(&domain->iovad, VTD_PAGE_SIZE, IOVA_START_PFN, + DMA_32BIT_PFN); + domain_reserve_special_ranges(domain); + + /* calculate AGAW */ + iommu = domain_get_iommu(domain); + if (guest_width > cap_mgaw(iommu->cap)) + guest_width = cap_mgaw(iommu->cap); + domain->gaw = guest_width; + adjust_width = guestwidth_to_adjustwidth(guest_width); + agaw = width_to_agaw(adjust_width); + sagaw = cap_sagaw(iommu->cap); + if (!test_bit(agaw, &sagaw)) { + /* hardware doesn't support it, choose a bigger one */ + pr_debug("IOMMU: hardware doesn't support agaw %d\n", agaw); + agaw = find_next_bit(&sagaw, 5, agaw); + if (agaw >= 5) + return -ENODEV; + } + domain->agaw = agaw; + + if (ecap_coherent(iommu->ecap)) + domain->iommu_coherency = 1; + else + domain->iommu_coherency = 0; + + if (ecap_sc_support(iommu->ecap)) + domain->iommu_snooping = 1; + else + domain->iommu_snooping = 0; + + if (intel_iommu_superpage) + domain->iommu_superpage = fls(cap_super_page_val(iommu->cap)); + else + domain->iommu_superpage = 0; + + domain->nid = iommu->node; + + /* always allocate the top pgd */ + domain->pgd = (struct dma_pte *)alloc_pgtable_page(domain->nid); + if (!domain->pgd) + return -ENOMEM; + __iommu_flush_cache(iommu, domain->pgd, PAGE_SIZE); + return 0; +} + +static void domain_exit(struct dmar_domain *domain) +{ + struct page *freelist = NULL; + int i; + + /* Domain 0 is reserved, so dont process it */ + if (!domain) + return; + + /* Flush any lazy unmaps that may reference this domain */ + if (!intel_iommu_strict) + flush_unmaps_timeout(0); + + /* remove associated devices */ + domain_remove_dev_info(domain); + + /* destroy iovas */ + put_iova_domain(&domain->iovad); + + freelist = domain_unmap(domain, 0, DOMAIN_MAX_PFN(domain->gaw)); + + /* clear attached or cached domains */ + rcu_read_lock(); + for_each_set_bit(i, domain->iommu_bmp, g_num_of_iommus) + iommu_detach_domain(domain, g_iommus[i]); + rcu_read_unlock(); + + dma_free_pagelist(freelist); + + free_domain_mem(domain); +} + +static int domain_context_mapping_one(struct dmar_domain *domain, + struct intel_iommu *iommu, + u8 bus, u8 devfn, int translation) +{ + struct context_entry *context; + unsigned long flags; + struct dma_pte *pgd; + int id; + int agaw; + struct device_domain_info *info = NULL; + + pr_debug("Set context mapping for %02x:%02x.%d\n", + bus, PCI_SLOT(devfn), PCI_FUNC(devfn)); + + BUG_ON(!domain->pgd); + BUG_ON(translation != CONTEXT_TT_PASS_THROUGH && + translation != CONTEXT_TT_MULTI_LEVEL); + + spin_lock_irqsave(&iommu->lock, flags); + context = iommu_context_addr(iommu, bus, devfn, 1); + spin_unlock_irqrestore(&iommu->lock, flags); + if (!context) + return -ENOMEM; + spin_lock_irqsave(&iommu->lock, flags); + if (context_present(context)) { + spin_unlock_irqrestore(&iommu->lock, flags); + return 0; + } + + id = domain->id; + pgd = domain->pgd; + + if (domain_type_is_vm_or_si(domain)) { + if (domain_type_is_vm(domain)) { + id = iommu_attach_vm_domain(domain, iommu); + if (id < 0) { + spin_unlock_irqrestore(&iommu->lock, flags); + pr_err("IOMMU: no free domain ids\n"); + return -EFAULT; + } + } + + /* Skip top levels of page tables for + * iommu which has less agaw than default. + * Unnecessary for PT mode. + */ + if (translation != CONTEXT_TT_PASS_THROUGH) { + for (agaw = domain->agaw; agaw != iommu->agaw; agaw--) { + pgd = phys_to_virt(dma_pte_addr(pgd)); + if (!dma_pte_present(pgd)) { + spin_unlock_irqrestore(&iommu->lock, flags); + return -ENOMEM; + } + } + } + } + + context_set_domain_id(context, id); + + if (translation != CONTEXT_TT_PASS_THROUGH) { + info = iommu_support_dev_iotlb(domain, iommu, bus, devfn); + translation = info ? CONTEXT_TT_DEV_IOTLB : + CONTEXT_TT_MULTI_LEVEL; + } + /* + * In pass through mode, AW must be programmed to indicate the largest + * AGAW value supported by hardware. And ASR is ignored by hardware. + */ + if (unlikely(translation == CONTEXT_TT_PASS_THROUGH)) + context_set_address_width(context, iommu->msagaw); + else { + context_set_address_root(context, virt_to_phys(pgd)); + context_set_address_width(context, iommu->agaw); + } + + context_set_translation_type(context, translation); + context_set_fault_enable(context); + context_set_present(context); + domain_flush_cache(domain, context, sizeof(*context)); + + /* + * It's a non-present to present mapping. If hardware doesn't cache + * non-present entry we only need to flush the write-buffer. If the + * _does_ cache non-present entries, then it does so in the special + * domain #0, which we have to flush: + */ + if (cap_caching_mode(iommu->cap)) { + iommu->flush.flush_context(iommu, 0, + (((u16)bus) << 8) | devfn, + DMA_CCMD_MASK_NOBIT, + DMA_CCMD_DEVICE_INVL); + iommu->flush.flush_iotlb(iommu, id, 0, 0, DMA_TLB_DSI_FLUSH); + } else { + iommu_flush_write_buffer(iommu); + } + iommu_enable_dev_iotlb(info); + spin_unlock_irqrestore(&iommu->lock, flags); + + domain_attach_iommu(domain, iommu); + + return 0; +} + +struct domain_context_mapping_data { + struct dmar_domain *domain; + struct intel_iommu *iommu; + int translation; +}; + +static int domain_context_mapping_cb(struct pci_dev *pdev, + u16 alias, void *opaque) +{ + struct domain_context_mapping_data *data = opaque; + + return domain_context_mapping_one(data->domain, data->iommu, + PCI_BUS_NUM(alias), alias & 0xff, + data->translation); +} + +static int +domain_context_mapping(struct dmar_domain *domain, struct device *dev, + int translation) +{ + struct intel_iommu *iommu; + u8 bus, devfn; + struct domain_context_mapping_data data; + + iommu = device_to_iommu(dev, &bus, &devfn); + if (!iommu) + return -ENODEV; + + if (!dev_is_pci(dev)) + return domain_context_mapping_one(domain, iommu, bus, devfn, + translation); + + data.domain = domain; + data.iommu = iommu; + data.translation = translation; + + return pci_for_each_dma_alias(to_pci_dev(dev), + &domain_context_mapping_cb, &data); +} + +static int domain_context_mapped_cb(struct pci_dev *pdev, + u16 alias, void *opaque) +{ + struct intel_iommu *iommu = opaque; + + return !device_context_mapped(iommu, PCI_BUS_NUM(alias), alias & 0xff); +} + +static int domain_context_mapped(struct device *dev) +{ + struct intel_iommu *iommu; + u8 bus, devfn; + + iommu = device_to_iommu(dev, &bus, &devfn); + if (!iommu) + return -ENODEV; + + if (!dev_is_pci(dev)) + return device_context_mapped(iommu, bus, devfn); + + return !pci_for_each_dma_alias(to_pci_dev(dev), + domain_context_mapped_cb, iommu); +} + +/* Returns a number of VTD pages, but aligned to MM page size */ +static inline unsigned long aligned_nrpages(unsigned long host_addr, + size_t size) +{ + host_addr &= ~PAGE_MASK; + return PAGE_ALIGN(host_addr + size) >> VTD_PAGE_SHIFT; +} + +/* Return largest possible superpage level for a given mapping */ +static inline int hardware_largepage_caps(struct dmar_domain *domain, + unsigned long iov_pfn, + unsigned long phy_pfn, + unsigned long pages) +{ + int support, level = 1; + unsigned long pfnmerge; + + support = domain->iommu_superpage; + + /* To use a large page, the virtual *and* physical addresses + must be aligned to 2MiB/1GiB/etc. Lower bits set in either + of them will mean we have to use smaller pages. So just + merge them and check both at once. */ + pfnmerge = iov_pfn | phy_pfn; + + while (support && !(pfnmerge & ~VTD_STRIDE_MASK)) { + pages >>= VTD_STRIDE_SHIFT; + if (!pages) + break; + pfnmerge >>= VTD_STRIDE_SHIFT; + level++; + support--; + } + return level; +} + +static int __domain_mapping(struct dmar_domain *domain, unsigned long iov_pfn, + struct scatterlist *sg, unsigned long phys_pfn, + unsigned long nr_pages, int prot) +{ + struct dma_pte *first_pte = NULL, *pte = NULL; + phys_addr_t uninitialized_var(pteval); + unsigned long sg_res = 0; + unsigned int largepage_lvl = 0; + unsigned long lvl_pages = 0; + + BUG_ON(!domain_pfn_supported(domain, iov_pfn + nr_pages - 1)); + + if ((prot & (DMA_PTE_READ|DMA_PTE_WRITE)) == 0) + return -EINVAL; + + prot &= DMA_PTE_READ | DMA_PTE_WRITE | DMA_PTE_SNP; + + if (!sg) { + sg_res = nr_pages; + pteval = ((phys_addr_t)phys_pfn << VTD_PAGE_SHIFT) | prot; + } + + while (nr_pages > 0) { + uint64_t tmp; + + if (!sg_res) { + sg_res = aligned_nrpages(sg->offset, sg->length); + sg->dma_address = ((dma_addr_t)iov_pfn << VTD_PAGE_SHIFT) + sg->offset; + sg->dma_length = sg->length; + pteval = page_to_phys(sg_page(sg)) | prot; + phys_pfn = pteval >> VTD_PAGE_SHIFT; + } + + if (!pte) { + largepage_lvl = hardware_largepage_caps(domain, iov_pfn, phys_pfn, sg_res); + + first_pte = pte = pfn_to_dma_pte(domain, iov_pfn, &largepage_lvl); + if (!pte) + return -ENOMEM; + /* It is large page*/ + if (largepage_lvl > 1) { + pteval |= DMA_PTE_LARGE_PAGE; + lvl_pages = lvl_to_nr_pages(largepage_lvl); + /* + * Ensure that old small page tables are + * removed to make room for superpage, + * if they exist. + */ + dma_pte_free_pagetable(domain, iov_pfn, + iov_pfn + lvl_pages - 1); + } else { + pteval &= ~(uint64_t)DMA_PTE_LARGE_PAGE; + } + + } + /* We don't need lock here, nobody else + * touches the iova range + */ + tmp = cmpxchg64_local(&pte->val, 0ULL, pteval); + if (tmp) { + static int dumps = 5; + printk(KERN_CRIT "ERROR: DMA PTE for vPFN 0x%lx already set (to %llx not %llx)\n", + iov_pfn, tmp, (unsigned long long)pteval); + if (dumps) { + dumps--; + debug_dma_dump_mappings(NULL); + } + WARN_ON(1); + } + + lvl_pages = lvl_to_nr_pages(largepage_lvl); + + BUG_ON(nr_pages < lvl_pages); + BUG_ON(sg_res < lvl_pages); + + nr_pages -= lvl_pages; + iov_pfn += lvl_pages; + phys_pfn += lvl_pages; + pteval += lvl_pages * VTD_PAGE_SIZE; + sg_res -= lvl_pages; + + /* If the next PTE would be the first in a new page, then we + need to flush the cache on the entries we've just written. + And then we'll need to recalculate 'pte', so clear it and + let it get set again in the if (!pte) block above. + + If we're done (!nr_pages) we need to flush the cache too. + + Also if we've been setting superpages, we may need to + recalculate 'pte' and switch back to smaller pages for the + end of the mapping, if the trailing size is not enough to + use another superpage (i.e. sg_res < lvl_pages). */ + pte++; + if (!nr_pages || first_pte_in_page(pte) || + (largepage_lvl > 1 && sg_res < lvl_pages)) { + domain_flush_cache(domain, first_pte, + (void *)pte - (void *)first_pte); + pte = NULL; + } + + if (!sg_res && nr_pages) + sg = sg_next(sg); + } + return 0; +} + +static inline int domain_sg_mapping(struct dmar_domain *domain, unsigned long iov_pfn, + struct scatterlist *sg, unsigned long nr_pages, + int prot) +{ + return __domain_mapping(domain, iov_pfn, sg, 0, nr_pages, prot); +} + +static inline int domain_pfn_mapping(struct dmar_domain *domain, unsigned long iov_pfn, + unsigned long phys_pfn, unsigned long nr_pages, + int prot) +{ + return __domain_mapping(domain, iov_pfn, NULL, phys_pfn, nr_pages, prot); +} + +static void iommu_detach_dev(struct intel_iommu *iommu, u8 bus, u8 devfn) +{ + if (!iommu) + return; + + clear_context_table(iommu, bus, devfn); + iommu->flush.flush_context(iommu, 0, 0, 0, + DMA_CCMD_GLOBAL_INVL); + iommu->flush.flush_iotlb(iommu, 0, 0, 0, DMA_TLB_GLOBAL_FLUSH); +} + +static inline void unlink_domain_info(struct device_domain_info *info) +{ + assert_spin_locked(&device_domain_lock); + list_del(&info->link); + list_del(&info->global); + if (info->dev) + info->dev->archdata.iommu = NULL; +} + +static void domain_remove_dev_info(struct dmar_domain *domain) +{ + struct device_domain_info *info, *tmp; + unsigned long flags; + + spin_lock_irqsave(&device_domain_lock, flags); + list_for_each_entry_safe(info, tmp, &domain->devices, link) { + unlink_domain_info(info); + spin_unlock_irqrestore(&device_domain_lock, flags); + + iommu_disable_dev_iotlb(info); + iommu_detach_dev(info->iommu, info->bus, info->devfn); + + if (domain_type_is_vm(domain)) { + iommu_detach_dependent_devices(info->iommu, info->dev); + domain_detach_iommu(domain, info->iommu); + } + + free_devinfo_mem(info); + spin_lock_irqsave(&device_domain_lock, flags); + } + spin_unlock_irqrestore(&device_domain_lock, flags); +} + +/* + * find_domain + * Note: we use struct device->archdata.iommu stores the info + */ +static struct dmar_domain *find_domain(struct device *dev) +{ + struct device_domain_info *info; + + /* No lock here, assumes no domain exit in normal case */ + info = dev->archdata.iommu; + if (info) + return info->domain; + return NULL; +} + +static inline struct device_domain_info * +dmar_search_domain_by_dev_info(int segment, int bus, int devfn) +{ + struct device_domain_info *info; + + list_for_each_entry(info, &device_domain_list, global) + if (info->iommu->segment == segment && info->bus == bus && + info->devfn == devfn) + return info; + + return NULL; +} + +static struct dmar_domain *dmar_insert_dev_info(struct intel_iommu *iommu, + int bus, int devfn, + struct device *dev, + struct dmar_domain *domain) +{ + struct dmar_domain *found = NULL; + struct device_domain_info *info; + unsigned long flags; + + info = alloc_devinfo_mem(); + if (!info) + return NULL; + + info->bus = bus; + info->devfn = devfn; + info->dev = dev; + info->domain = domain; + info->iommu = iommu; + + spin_lock_irqsave(&device_domain_lock, flags); + if (dev) + found = find_domain(dev); + else { + struct device_domain_info *info2; + info2 = dmar_search_domain_by_dev_info(iommu->segment, bus, devfn); + if (info2) + found = info2->domain; + } + if (found) { + spin_unlock_irqrestore(&device_domain_lock, flags); + free_devinfo_mem(info); + /* Caller must free the original domain */ + return found; + } + + list_add(&info->link, &domain->devices); + list_add(&info->global, &device_domain_list); + if (dev) + dev->archdata.iommu = info; + spin_unlock_irqrestore(&device_domain_lock, flags); + + return domain; +} + +static int get_last_alias(struct pci_dev *pdev, u16 alias, void *opaque) +{ + *(u16 *)opaque = alias; + return 0; +} + +/* domain is initialized */ +static struct dmar_domain *get_domain_for_dev(struct device *dev, int gaw) +{ + struct dmar_domain *domain, *tmp; + struct intel_iommu *iommu; + struct device_domain_info *info; + u16 dma_alias; + unsigned long flags; + u8 bus, devfn; + + domain = find_domain(dev); + if (domain) + return domain; + + iommu = device_to_iommu(dev, &bus, &devfn); + if (!iommu) + return NULL; + + if (dev_is_pci(dev)) { + struct pci_dev *pdev = to_pci_dev(dev); + + pci_for_each_dma_alias(pdev, get_last_alias, &dma_alias); + + spin_lock_irqsave(&device_domain_lock, flags); + info = dmar_search_domain_by_dev_info(pci_domain_nr(pdev->bus), + PCI_BUS_NUM(dma_alias), + dma_alias & 0xff); + if (info) { + iommu = info->iommu; + domain = info->domain; + } + spin_unlock_irqrestore(&device_domain_lock, flags); + + /* DMA alias already has a domain, uses it */ + if (info) + goto found_domain; + } + + /* Allocate and initialize new domain for the device */ + domain = alloc_domain(0); + if (!domain) + return NULL; + domain->id = iommu_attach_domain(domain, iommu); + if (domain->id < 0) { + free_domain_mem(domain); + return NULL; + } + domain_attach_iommu(domain, iommu); + if (domain_init(domain, gaw)) { + domain_exit(domain); + return NULL; + } + + /* register PCI DMA alias device */ + if (dev_is_pci(dev)) { + tmp = dmar_insert_dev_info(iommu, PCI_BUS_NUM(dma_alias), + dma_alias & 0xff, NULL, domain); + + if (!tmp || tmp != domain) { + domain_exit(domain); + domain = tmp; + } + + if (!domain) + return NULL; + } + +found_domain: + tmp = dmar_insert_dev_info(iommu, bus, devfn, dev, domain); + + if (!tmp || tmp != domain) { + domain_exit(domain); + domain = tmp; + } + + return domain; +} + +static int iommu_identity_mapping; +#define IDENTMAP_ALL 1 +#define IDENTMAP_GFX 2 +#define IDENTMAP_AZALIA 4 + +static int iommu_domain_identity_map(struct dmar_domain *domain, + unsigned long long start, + unsigned long long end) +{ + unsigned long first_vpfn = start >> VTD_PAGE_SHIFT; + unsigned long last_vpfn = end >> VTD_PAGE_SHIFT; + + if (!reserve_iova(&domain->iovad, dma_to_mm_pfn(first_vpfn), + dma_to_mm_pfn(last_vpfn))) { + printk(KERN_ERR "IOMMU: reserve iova failed\n"); + return -ENOMEM; + } + + pr_debug("Mapping reserved region %llx-%llx for domain %d\n", + start, end, domain->id); + /* + * RMRR range might have overlap with physical memory range, + * clear it first + */ + dma_pte_clear_range(domain, first_vpfn, last_vpfn); + + return domain_pfn_mapping(domain, first_vpfn, first_vpfn, + last_vpfn - first_vpfn + 1, + DMA_PTE_READ|DMA_PTE_WRITE); +} + +static int iommu_prepare_identity_map(struct device *dev, + unsigned long long start, + unsigned long long end) +{ + struct dmar_domain *domain; + int ret; + + domain = get_domain_for_dev(dev, DEFAULT_DOMAIN_ADDRESS_WIDTH); + if (!domain) + return -ENOMEM; + + /* For _hardware_ passthrough, don't bother. But for software + passthrough, we do it anyway -- it may indicate a memory + range which is reserved in E820, so which didn't get set + up to start with in si_domain */ + if (domain == si_domain && hw_pass_through) { + printk("Ignoring identity map for HW passthrough device %s [0x%Lx - 0x%Lx]\n", + dev_name(dev), start, end); + return 0; + } + + printk(KERN_INFO + "IOMMU: Setting identity map for device %s [0x%Lx - 0x%Lx]\n", + dev_name(dev), start, end); + + if (end < start) { + WARN(1, "Your BIOS is broken; RMRR ends before it starts!\n" + "BIOS vendor: %s; Ver: %s; Product Version: %s\n", + dmi_get_system_info(DMI_BIOS_VENDOR), + dmi_get_system_info(DMI_BIOS_VERSION), + dmi_get_system_info(DMI_PRODUCT_VERSION)); + ret = -EIO; + goto error; + } + + if (end >> agaw_to_width(domain->agaw)) { + WARN(1, "Your BIOS is broken; RMRR exceeds permitted address width (%d bits)\n" + "BIOS vendor: %s; Ver: %s; Product Version: %s\n", + agaw_to_width(domain->agaw), + dmi_get_system_info(DMI_BIOS_VENDOR), + dmi_get_system_info(DMI_BIOS_VERSION), + dmi_get_system_info(DMI_PRODUCT_VERSION)); + ret = -EIO; + goto error; + } + + ret = iommu_domain_identity_map(domain, start, end); + if (ret) + goto error; + + /* context entry init */ + ret = domain_context_mapping(domain, dev, CONTEXT_TT_MULTI_LEVEL); + if (ret) + goto error; + + return 0; + + error: + domain_exit(domain); + return ret; +} + +static inline int iommu_prepare_rmrr_dev(struct dmar_rmrr_unit *rmrr, + struct device *dev) +{ + if (dev->archdata.iommu == DUMMY_DEVICE_DOMAIN_INFO) + return 0; + return iommu_prepare_identity_map(dev, rmrr->base_address, + rmrr->end_address); +} + +#ifdef CONFIG_INTEL_IOMMU_FLOPPY_WA +static inline void iommu_prepare_isa(void) +{ + struct pci_dev *pdev; + int ret; + + pdev = pci_get_class(PCI_CLASS_BRIDGE_ISA << 8, NULL); + if (!pdev) + return; + + printk(KERN_INFO "IOMMU: Prepare 0-16MiB unity mapping for LPC\n"); + ret = iommu_prepare_identity_map(&pdev->dev, 0, 16*1024*1024 - 1); + + if (ret) + printk(KERN_ERR "IOMMU: Failed to create 0-16MiB identity map; " + "floppy might not work\n"); + + pci_dev_put(pdev); +} +#else +static inline void iommu_prepare_isa(void) +{ + return; +} +#endif /* !CONFIG_INTEL_IOMMU_FLPY_WA */ + +static int md_domain_init(struct dmar_domain *domain, int guest_width); + +static int __init si_domain_init(int hw) +{ + struct dmar_drhd_unit *drhd; + struct intel_iommu *iommu; + int nid, ret = 0; + bool first = true; + + si_domain = alloc_domain(DOMAIN_FLAG_STATIC_IDENTITY); + if (!si_domain) + return -EFAULT; + + for_each_active_iommu(iommu, drhd) { + ret = iommu_attach_domain(si_domain, iommu); + if (ret < 0) { + domain_exit(si_domain); + return -EFAULT; + } else if (first) { + si_domain->id = ret; + first = false; + } else if (si_domain->id != ret) { + domain_exit(si_domain); + return -EFAULT; + } + domain_attach_iommu(si_domain, iommu); + } + + if (md_domain_init(si_domain, DEFAULT_DOMAIN_ADDRESS_WIDTH)) { + domain_exit(si_domain); + return -EFAULT; + } + + pr_debug("IOMMU: identity mapping domain is domain %d\n", + si_domain->id); + + if (hw) + return 0; + + for_each_online_node(nid) { + unsigned long start_pfn, end_pfn; + int i; + + for_each_mem_pfn_range(i, nid, &start_pfn, &end_pfn, NULL) { + ret = iommu_domain_identity_map(si_domain, + PFN_PHYS(start_pfn), PFN_PHYS(end_pfn)); + if (ret) + return ret; + } + } + + return 0; +} + +static int identity_mapping(struct device *dev) +{ + struct device_domain_info *info; + + if (likely(!iommu_identity_mapping)) + return 0; + + info = dev->archdata.iommu; + if (info && info != DUMMY_DEVICE_DOMAIN_INFO) + return (info->domain == si_domain); + + return 0; +} + +static int domain_add_dev_info(struct dmar_domain *domain, + struct device *dev, int translation) +{ + struct dmar_domain *ndomain; + struct intel_iommu *iommu; + u8 bus, devfn; + int ret; + + iommu = device_to_iommu(dev, &bus, &devfn); + if (!iommu) + return -ENODEV; + + ndomain = dmar_insert_dev_info(iommu, bus, devfn, dev, domain); + if (ndomain != domain) + return -EBUSY; + + ret = domain_context_mapping(domain, dev, translation); + if (ret) { + domain_remove_one_dev_info(domain, dev); + return ret; + } + + return 0; +} + +static bool device_has_rmrr(struct device *dev) +{ + struct dmar_rmrr_unit *rmrr; + struct device *tmp; + int i; + + rcu_read_lock(); + for_each_rmrr_units(rmrr) { + /* + * Return TRUE if this RMRR contains the device that + * is passed in. + */ + for_each_active_dev_scope(rmrr->devices, + rmrr->devices_cnt, i, tmp) + if (tmp == dev) { + rcu_read_unlock(); + return true; + } + } + rcu_read_unlock(); + return false; +} + +/* + * There are a couple cases where we need to restrict the functionality of + * devices associated with RMRRs. The first is when evaluating a device for + * identity mapping because problems exist when devices are moved in and out + * of domains and their respective RMRR information is lost. This means that + * a device with associated RMRRs will never be in a "passthrough" domain. + * The second is use of the device through the IOMMU API. This interface + * expects to have full control of the IOVA space for the device. We cannot + * satisfy both the requirement that RMRR access is maintained and have an + * unencumbered IOVA space. We also have no ability to quiesce the device's + * use of the RMRR space or even inform the IOMMU API user of the restriction. + * We therefore prevent devices associated with an RMRR from participating in + * the IOMMU API, which eliminates them from device assignment. + * + * In both cases we assume that PCI USB devices with RMRRs have them largely + * for historical reasons and that the RMRR space is not actively used post + * boot. This exclusion may change if vendors begin to abuse it. + * + * The same exception is made for graphics devices, with the requirement that + * any use of the RMRR regions will be torn down before assigning the device + * to a guest. + */ +static bool device_is_rmrr_locked(struct device *dev) +{ + if (!device_has_rmrr(dev)) + return false; + + if (dev_is_pci(dev)) { + struct pci_dev *pdev = to_pci_dev(dev); + + if (IS_USB_DEVICE(pdev) || IS_GFX_DEVICE(pdev)) + return false; + } + + return true; +} + +static int iommu_should_identity_map(struct device *dev, int startup) +{ + + if (dev_is_pci(dev)) { + struct pci_dev *pdev = to_pci_dev(dev); + + if (device_is_rmrr_locked(dev)) + return 0; + + if ((iommu_identity_mapping & IDENTMAP_AZALIA) && IS_AZALIA(pdev)) + return 1; + + if ((iommu_identity_mapping & IDENTMAP_GFX) && IS_GFX_DEVICE(pdev)) + return 1; + + if (!(iommu_identity_mapping & IDENTMAP_ALL)) + return 0; + + /* + * We want to start off with all devices in the 1:1 domain, and + * take them out later if we find they can't access all of memory. + * + * However, we can't do this for PCI devices behind bridges, + * because all PCI devices behind the same bridge will end up + * with the same source-id on their transactions. + * + * Practically speaking, we can't change things around for these + * devices at run-time, because we can't be sure there'll be no + * DMA transactions in flight for any of their siblings. + * + * So PCI devices (unless they're on the root bus) as well as + * their parent PCI-PCI or PCIe-PCI bridges must be left _out_ of + * the 1:1 domain, just in _case_ one of their siblings turns out + * not to be able to map all of memory. + */ + if (!pci_is_pcie(pdev)) { + if (!pci_is_root_bus(pdev->bus)) + return 0; + if (pdev->class >> 8 == PCI_CLASS_BRIDGE_PCI) + return 0; + } else if (pci_pcie_type(pdev) == PCI_EXP_TYPE_PCI_BRIDGE) + return 0; + } else { + if (device_has_rmrr(dev)) + return 0; + } + + /* + * At boot time, we don't yet know if devices will be 64-bit capable. + * Assume that they will — if they turn out not to be, then we can + * take them out of the 1:1 domain later. + */ + if (!startup) { + /* + * If the device's dma_mask is less than the system's memory + * size then this is not a candidate for identity mapping. + */ + u64 dma_mask = *dev->dma_mask; + + if (dev->coherent_dma_mask && + dev->coherent_dma_mask < dma_mask) + dma_mask = dev->coherent_dma_mask; + + return dma_mask >= dma_get_required_mask(dev); + } + + return 1; +} + +static int __init dev_prepare_static_identity_mapping(struct device *dev, int hw) +{ + int ret; + + if (!iommu_should_identity_map(dev, 1)) + return 0; + + ret = domain_add_dev_info(si_domain, dev, + hw ? CONTEXT_TT_PASS_THROUGH : + CONTEXT_TT_MULTI_LEVEL); + if (!ret) + pr_info("IOMMU: %s identity mapping for device %s\n", + hw ? "hardware" : "software", dev_name(dev)); + else if (ret == -ENODEV) + /* device not associated with an iommu */ + ret = 0; + + return ret; +} + + +static int __init iommu_prepare_static_identity_mapping(int hw) +{ + struct pci_dev *pdev = NULL; + struct dmar_drhd_unit *drhd; + struct intel_iommu *iommu; + struct device *dev; + int i; + int ret = 0; + + ret = si_domain_init(hw); + if (ret) + return -EFAULT; + + for_each_pci_dev(pdev) { + ret = dev_prepare_static_identity_mapping(&pdev->dev, hw); + if (ret) + return ret; + } + + for_each_active_iommu(iommu, drhd) + for_each_active_dev_scope(drhd->devices, drhd->devices_cnt, i, dev) { + struct acpi_device_physical_node *pn; + struct acpi_device *adev; + + if (dev->bus != &acpi_bus_type) + continue; + + adev= to_acpi_device(dev); + mutex_lock(&adev->physical_node_lock); + list_for_each_entry(pn, &adev->physical_node_list, node) { + ret = dev_prepare_static_identity_mapping(pn->dev, hw); + if (ret) + break; + } + mutex_unlock(&adev->physical_node_lock); + if (ret) + return ret; + } + + return 0; +} + +static void intel_iommu_init_qi(struct intel_iommu *iommu) +{ + /* + * Start from the sane iommu hardware state. + * If the queued invalidation is already initialized by us + * (for example, while enabling interrupt-remapping) then + * we got the things already rolling from a sane state. + */ + if (!iommu->qi) { + /* + * Clear any previous faults. + */ + dmar_fault(-1, iommu); + /* + * Disable queued invalidation if supported and already enabled + * before OS handover. + */ + dmar_disable_qi(iommu); + } + + if (dmar_enable_qi(iommu)) { + /* + * Queued Invalidate not enabled, use Register Based Invalidate + */ + iommu->flush.flush_context = __iommu_flush_context; + iommu->flush.flush_iotlb = __iommu_flush_iotlb; + pr_info("IOMMU: %s using Register based invalidation\n", + iommu->name); + } else { + iommu->flush.flush_context = qi_flush_context; + iommu->flush.flush_iotlb = qi_flush_iotlb; + pr_info("IOMMU: %s using Queued invalidation\n", iommu->name); + } +} + +static int __init init_dmars(void) +{ + struct dmar_drhd_unit *drhd; + struct dmar_rmrr_unit *rmrr; + struct device *dev; + struct intel_iommu *iommu; + int i, ret; + + /* + * for each drhd + * allocate root + * initialize and program root entry to not present + * endfor + */ + for_each_drhd_unit(drhd) { + /* + * lock not needed as this is only incremented in the single + * threaded kernel __init code path all other access are read + * only + */ + if (g_num_of_iommus < DMAR_UNITS_SUPPORTED) { + g_num_of_iommus++; + continue; + } + printk_once(KERN_ERR "intel-iommu: exceeded %d IOMMUs\n", + DMAR_UNITS_SUPPORTED); + } + + /* Preallocate enough resources for IOMMU hot-addition */ + if (g_num_of_iommus < DMAR_UNITS_SUPPORTED) + g_num_of_iommus = DMAR_UNITS_SUPPORTED; + + g_iommus = kcalloc(g_num_of_iommus, sizeof(struct intel_iommu *), + GFP_KERNEL); + if (!g_iommus) { + printk(KERN_ERR "Allocating global iommu array failed\n"); + ret = -ENOMEM; + goto error; + } + + deferred_flush = kzalloc(g_num_of_iommus * + sizeof(struct deferred_flush_tables), GFP_KERNEL); + if (!deferred_flush) { + ret = -ENOMEM; + goto free_g_iommus; + } + + for_each_active_iommu(iommu, drhd) { + g_iommus[iommu->seq_id] = iommu; + + ret = iommu_init_domains(iommu); + if (ret) + goto free_iommu; + + /* + * TBD: + * we could share the same root & context tables + * among all IOMMU's. Need to Split it later. + */ + ret = iommu_alloc_root_entry(iommu); + if (ret) + goto free_iommu; + if (!ecap_pass_through(iommu->ecap)) + hw_pass_through = 0; + } + + for_each_active_iommu(iommu, drhd) + intel_iommu_init_qi(iommu); + + if (iommu_pass_through) + iommu_identity_mapping |= IDENTMAP_ALL; + +#ifdef CONFIG_INTEL_IOMMU_BROKEN_GFX_WA + iommu_identity_mapping |= IDENTMAP_GFX; +#endif + + check_tylersburg_isoch(); + + /* + * If pass through is not set or not enabled, setup context entries for + * identity mappings for rmrr, gfx, and isa and may fall back to static + * identity mapping if iommu_identity_mapping is set. + */ + if (iommu_identity_mapping) { + ret = iommu_prepare_static_identity_mapping(hw_pass_through); + if (ret) { + printk(KERN_CRIT "Failed to setup IOMMU pass-through\n"); + goto free_iommu; + } + } + /* + * For each rmrr + * for each dev attached to rmrr + * do + * locate drhd for dev, alloc domain for dev + * allocate free domain + * allocate page table entries for rmrr + * if context not allocated for bus + * allocate and init context + * set present in root table for this bus + * init context with domain, translation etc + * endfor + * endfor + */ + printk(KERN_INFO "IOMMU: Setting RMRR:\n"); + for_each_rmrr_units(rmrr) { + /* some BIOS lists non-exist devices in DMAR table. */ + for_each_active_dev_scope(rmrr->devices, rmrr->devices_cnt, + i, dev) { + ret = iommu_prepare_rmrr_dev(rmrr, dev); + if (ret) + printk(KERN_ERR + "IOMMU: mapping reserved region failed\n"); + } + } + + iommu_prepare_isa(); + + /* + * for each drhd + * enable fault log + * global invalidate context cache + * global invalidate iotlb + * enable translation + */ + for_each_iommu(iommu, drhd) { + if (drhd->ignored) { + /* + * we always have to disable PMRs or DMA may fail on + * this device + */ + if (force_on) + iommu_disable_protect_mem_regions(iommu); + continue; + } + + iommu_flush_write_buffer(iommu); + + ret = dmar_set_interrupt(iommu); + if (ret) + goto free_iommu; + + iommu_set_root_entry(iommu); + + iommu->flush.flush_context(iommu, 0, 0, 0, DMA_CCMD_GLOBAL_INVL); + iommu->flush.flush_iotlb(iommu, 0, 0, 0, DMA_TLB_GLOBAL_FLUSH); + iommu_enable_translation(iommu); + iommu_disable_protect_mem_regions(iommu); + } + + return 0; + +free_iommu: + for_each_active_iommu(iommu, drhd) { + disable_dmar_iommu(iommu); + free_dmar_iommu(iommu); + } + kfree(deferred_flush); +free_g_iommus: + kfree(g_iommus); +error: + return ret; +} + +/* This takes a number of _MM_ pages, not VTD pages */ +static struct iova *intel_alloc_iova(struct device *dev, + struct dmar_domain *domain, + unsigned long nrpages, uint64_t dma_mask) +{ + struct iova *iova = NULL; + + /* Restrict dma_mask to the width that the iommu can handle */ + dma_mask = min_t(uint64_t, DOMAIN_MAX_ADDR(domain->gaw), dma_mask); + + if (!dmar_forcedac && dma_mask > DMA_BIT_MASK(32)) { + /* + * First try to allocate an io virtual address in + * DMA_BIT_MASK(32) and if that fails then try allocating + * from higher range + */ + iova = alloc_iova(&domain->iovad, nrpages, + IOVA_PFN(DMA_BIT_MASK(32)), 1); + if (iova) + return iova; + } + iova = alloc_iova(&domain->iovad, nrpages, IOVA_PFN(dma_mask), 1); + if (unlikely(!iova)) { + printk(KERN_ERR "Allocating %ld-page iova for %s failed", + nrpages, dev_name(dev)); + return NULL; + } + + return iova; +} + +static struct dmar_domain *__get_valid_domain_for_dev(struct device *dev) +{ + struct dmar_domain *domain; + int ret; + + domain = get_domain_for_dev(dev, DEFAULT_DOMAIN_ADDRESS_WIDTH); + if (!domain) { + printk(KERN_ERR "Allocating domain for %s failed", + dev_name(dev)); + return NULL; + } + + /* make sure context mapping is ok */ + if (unlikely(!domain_context_mapped(dev))) { + ret = domain_context_mapping(domain, dev, CONTEXT_TT_MULTI_LEVEL); + if (ret) { + printk(KERN_ERR "Domain context map for %s failed", + dev_name(dev)); + return NULL; + } + } + + return domain; +} + +static inline struct dmar_domain *get_valid_domain_for_dev(struct device *dev) +{ + struct device_domain_info *info; + + /* No lock here, assumes no domain exit in normal case */ + info = dev->archdata.iommu; + if (likely(info)) + return info->domain; + + return __get_valid_domain_for_dev(dev); +} + +/* Check if the dev needs to go through non-identity map and unmap process.*/ +static int iommu_no_mapping(struct device *dev) +{ + int found; + + if (iommu_dummy(dev)) + return 1; + + if (!iommu_identity_mapping) + return 0; + + found = identity_mapping(dev); + if (found) { + if (iommu_should_identity_map(dev, 0)) + return 1; + else { + /* + * 32 bit DMA is removed from si_domain and fall back + * to non-identity mapping. + */ + domain_remove_one_dev_info(si_domain, dev); + printk(KERN_INFO "32bit %s uses non-identity mapping\n", + dev_name(dev)); + return 0; + } + } else { + /* + * In case of a detached 64 bit DMA device from vm, the device + * is put into si_domain for identity mapping. + */ + if (iommu_should_identity_map(dev, 0)) { + int ret; + ret = domain_add_dev_info(si_domain, dev, + hw_pass_through ? + CONTEXT_TT_PASS_THROUGH : + CONTEXT_TT_MULTI_LEVEL); + if (!ret) { + printk(KERN_INFO "64bit %s uses identity mapping\n", + dev_name(dev)); + return 1; + } + } + } + + return 0; +} + +static dma_addr_t __intel_map_single(struct device *dev, phys_addr_t paddr, + size_t size, int dir, u64 dma_mask) +{ + struct dmar_domain *domain; + phys_addr_t start_paddr; + struct iova *iova; + int prot = 0; + int ret; + struct intel_iommu *iommu; + unsigned long paddr_pfn = paddr >> PAGE_SHIFT; + + BUG_ON(dir == DMA_NONE); + + if (iommu_no_mapping(dev)) + return paddr; + + domain = get_valid_domain_for_dev(dev); + if (!domain) + return 0; + + iommu = domain_get_iommu(domain); + size = aligned_nrpages(paddr, size); + + iova = intel_alloc_iova(dev, domain, dma_to_mm_pfn(size), dma_mask); + if (!iova) + goto error; + + /* + * Check if DMAR supports zero-length reads on write only + * mappings.. + */ + if (dir == DMA_TO_DEVICE || dir == DMA_BIDIRECTIONAL || \ + !cap_zlr(iommu->cap)) + prot |= DMA_PTE_READ; + if (dir == DMA_FROM_DEVICE || dir == DMA_BIDIRECTIONAL) + prot |= DMA_PTE_WRITE; + /* + * paddr - (paddr + size) might be partial page, we should map the whole + * page. Note: if two part of one page are separately mapped, we + * might have two guest_addr mapping to the same host paddr, but this + * is not a big problem + */ + ret = domain_pfn_mapping(domain, mm_to_dma_pfn(iova->pfn_lo), + mm_to_dma_pfn(paddr_pfn), size, prot); + if (ret) + goto error; + + /* it's a non-present to present mapping. Only flush if caching mode */ + if (cap_caching_mode(iommu->cap)) + iommu_flush_iotlb_psi(iommu, domain->id, mm_to_dma_pfn(iova->pfn_lo), size, 0, 1); + else + iommu_flush_write_buffer(iommu); + + start_paddr = (phys_addr_t)iova->pfn_lo << PAGE_SHIFT; + start_paddr += paddr & ~PAGE_MASK; + return start_paddr; + +error: + if (iova) + __free_iova(&domain->iovad, iova); + printk(KERN_ERR"Device %s request: %zx@%llx dir %d --- failed\n", + dev_name(dev), size, (unsigned long long)paddr, dir); + return 0; +} + +static dma_addr_t intel_map_page(struct device *dev, struct page *page, + unsigned long offset, size_t size, + enum dma_data_direction dir, + struct dma_attrs *attrs) +{ + return __intel_map_single(dev, page_to_phys(page) + offset, size, + dir, *dev->dma_mask); +} + +static void flush_unmaps(void) +{ + int i, j; + + timer_on = 0; + + /* just flush them all */ + for (i = 0; i < g_num_of_iommus; i++) { + struct intel_iommu *iommu = g_iommus[i]; + if (!iommu) + continue; + + if (!deferred_flush[i].next) + continue; + + /* In caching mode, global flushes turn emulation expensive */ + if (!cap_caching_mode(iommu->cap)) + iommu->flush.flush_iotlb(iommu, 0, 0, 0, + DMA_TLB_GLOBAL_FLUSH); + for (j = 0; j < deferred_flush[i].next; j++) { + unsigned long mask; + struct iova *iova = deferred_flush[i].iova[j]; + struct dmar_domain *domain = deferred_flush[i].domain[j]; + + /* On real hardware multiple invalidations are expensive */ + if (cap_caching_mode(iommu->cap)) + iommu_flush_iotlb_psi(iommu, domain->id, + iova->pfn_lo, iova_size(iova), + !deferred_flush[i].freelist[j], 0); + else { + mask = ilog2(mm_to_dma_pfn(iova_size(iova))); + iommu_flush_dev_iotlb(deferred_flush[i].domain[j], + (uint64_t)iova->pfn_lo << PAGE_SHIFT, mask); + } + __free_iova(&deferred_flush[i].domain[j]->iovad, iova); + if (deferred_flush[i].freelist[j]) + dma_free_pagelist(deferred_flush[i].freelist[j]); + } + deferred_flush[i].next = 0; + } + + list_size = 0; +} + +static void flush_unmaps_timeout(unsigned long data) +{ + unsigned long flags; + + spin_lock_irqsave(&async_umap_flush_lock, flags); + flush_unmaps(); + spin_unlock_irqrestore(&async_umap_flush_lock, flags); +} + +static void add_unmap(struct dmar_domain *dom, struct iova *iova, struct page *freelist) +{ + unsigned long flags; + int next, iommu_id; + struct intel_iommu *iommu; + + spin_lock_irqsave(&async_umap_flush_lock, flags); + if (list_size == HIGH_WATER_MARK) + flush_unmaps(); + + iommu = domain_get_iommu(dom); + iommu_id = iommu->seq_id; + + next = deferred_flush[iommu_id].next; + deferred_flush[iommu_id].domain[next] = dom; + deferred_flush[iommu_id].iova[next] = iova; + deferred_flush[iommu_id].freelist[next] = freelist; + deferred_flush[iommu_id].next++; + + if (!timer_on) { + mod_timer(&unmap_timer, jiffies + msecs_to_jiffies(10)); + timer_on = 1; + } + list_size++; + spin_unlock_irqrestore(&async_umap_flush_lock, flags); +} + +static void intel_unmap(struct device *dev, dma_addr_t dev_addr) +{ + struct dmar_domain *domain; + unsigned long start_pfn, last_pfn; + struct iova *iova; + struct intel_iommu *iommu; + struct page *freelist; + + if (iommu_no_mapping(dev)) + return; + + domain = find_domain(dev); + BUG_ON(!domain); + + iommu = domain_get_iommu(domain); + + iova = find_iova(&domain->iovad, IOVA_PFN(dev_addr)); + if (WARN_ONCE(!iova, "Driver unmaps unmatched page at PFN %llx\n", + (unsigned long long)dev_addr)) + return; + + start_pfn = mm_to_dma_pfn(iova->pfn_lo); + last_pfn = mm_to_dma_pfn(iova->pfn_hi + 1) - 1; + + pr_debug("Device %s unmapping: pfn %lx-%lx\n", + dev_name(dev), start_pfn, last_pfn); + + freelist = domain_unmap(domain, start_pfn, last_pfn); + + if (intel_iommu_strict) { + iommu_flush_iotlb_psi(iommu, domain->id, start_pfn, + last_pfn - start_pfn + 1, !freelist, 0); + /* free iova */ + __free_iova(&domain->iovad, iova); + dma_free_pagelist(freelist); + } else { + add_unmap(domain, iova, freelist); + /* + * queue up the release of the unmap to save the 1/6th of the + * cpu used up by the iotlb flush operation... + */ + } +} + +static void intel_unmap_page(struct device *dev, dma_addr_t dev_addr, + size_t size, enum dma_data_direction dir, + struct dma_attrs *attrs) +{ + intel_unmap(dev, dev_addr); +} + +static void *intel_alloc_coherent(struct device *dev, size_t size, + dma_addr_t *dma_handle, gfp_t flags, + struct dma_attrs *attrs) +{ + struct page *page = NULL; + int order; + + size = PAGE_ALIGN(size); + order = get_order(size); + + if (!iommu_no_mapping(dev)) + flags &= ~(GFP_DMA | GFP_DMA32); + else if (dev->coherent_dma_mask < dma_get_required_mask(dev)) { + if (dev->coherent_dma_mask < DMA_BIT_MASK(32)) + flags |= GFP_DMA; + else + flags |= GFP_DMA32; + } + + if (flags & __GFP_WAIT) { + unsigned int count = size >> PAGE_SHIFT; + + page = dma_alloc_from_contiguous(dev, count, order); + if (page && iommu_no_mapping(dev) && + page_to_phys(page) + size > dev->coherent_dma_mask) { + dma_release_from_contiguous(dev, page, count); + page = NULL; + } + } + + if (!page) + page = alloc_pages(flags, order); + if (!page) + return NULL; + memset(page_address(page), 0, size); + + *dma_handle = __intel_map_single(dev, page_to_phys(page), size, + DMA_BIDIRECTIONAL, + dev->coherent_dma_mask); + if (*dma_handle) + return page_address(page); + if (!dma_release_from_contiguous(dev, page, size >> PAGE_SHIFT)) + __free_pages(page, order); + + return NULL; +} + +static void intel_free_coherent(struct device *dev, size_t size, void *vaddr, + dma_addr_t dma_handle, struct dma_attrs *attrs) +{ + int order; + struct page *page = virt_to_page(vaddr); + + size = PAGE_ALIGN(size); + order = get_order(size); + + intel_unmap(dev, dma_handle); + if (!dma_release_from_contiguous(dev, page, size >> PAGE_SHIFT)) + __free_pages(page, order); +} + +static void intel_unmap_sg(struct device *dev, struct scatterlist *sglist, + int nelems, enum dma_data_direction dir, + struct dma_attrs *attrs) +{ + intel_unmap(dev, sglist[0].dma_address); +} + +static int intel_nontranslate_map_sg(struct device *hddev, + struct scatterlist *sglist, int nelems, int dir) +{ + int i; + struct scatterlist *sg; + + for_each_sg(sglist, sg, nelems, i) { + BUG_ON(!sg_page(sg)); + sg->dma_address = page_to_phys(sg_page(sg)) + sg->offset; + sg->dma_length = sg->length; + } + return nelems; +} + +static int intel_map_sg(struct device *dev, struct scatterlist *sglist, int nelems, + enum dma_data_direction dir, struct dma_attrs *attrs) +{ + int i; + struct dmar_domain *domain; + size_t size = 0; + int prot = 0; + struct iova *iova = NULL; + int ret; + struct scatterlist *sg; + unsigned long start_vpfn; + struct intel_iommu *iommu; + + BUG_ON(dir == DMA_NONE); + if (iommu_no_mapping(dev)) + return intel_nontranslate_map_sg(dev, sglist, nelems, dir); + + domain = get_valid_domain_for_dev(dev); + if (!domain) + return 0; + + iommu = domain_get_iommu(domain); + + for_each_sg(sglist, sg, nelems, i) + size += aligned_nrpages(sg->offset, sg->length); + + iova = intel_alloc_iova(dev, domain, dma_to_mm_pfn(size), + *dev->dma_mask); + if (!iova) { + sglist->dma_length = 0; + return 0; + } + + /* + * Check if DMAR supports zero-length reads on write only + * mappings.. + */ + if (dir == DMA_TO_DEVICE || dir == DMA_BIDIRECTIONAL || \ + !cap_zlr(iommu->cap)) + prot |= DMA_PTE_READ; + if (dir == DMA_FROM_DEVICE || dir == DMA_BIDIRECTIONAL) + prot |= DMA_PTE_WRITE; + + start_vpfn = mm_to_dma_pfn(iova->pfn_lo); + + ret = domain_sg_mapping(domain, start_vpfn, sglist, size, prot); + if (unlikely(ret)) { + dma_pte_free_pagetable(domain, start_vpfn, + start_vpfn + size - 1); + __free_iova(&domain->iovad, iova); + return 0; + } + + /* it's a non-present to present mapping. Only flush if caching mode */ + if (cap_caching_mode(iommu->cap)) + iommu_flush_iotlb_psi(iommu, domain->id, start_vpfn, size, 0, 1); + else + iommu_flush_write_buffer(iommu); + + return nelems; +} + +static int intel_mapping_error(struct device *dev, dma_addr_t dma_addr) +{ + return !dma_addr; +} + +struct dma_map_ops intel_dma_ops = { + .alloc = intel_alloc_coherent, + .free = intel_free_coherent, + .map_sg = intel_map_sg, + .unmap_sg = intel_unmap_sg, + .map_page = intel_map_page, + .unmap_page = intel_unmap_page, + .mapping_error = intel_mapping_error, +}; + +static inline int iommu_domain_cache_init(void) +{ + int ret = 0; + + iommu_domain_cache = kmem_cache_create("iommu_domain", + sizeof(struct dmar_domain), + 0, + SLAB_HWCACHE_ALIGN, + + NULL); + if (!iommu_domain_cache) { + printk(KERN_ERR "Couldn't create iommu_domain cache\n"); + ret = -ENOMEM; + } + + return ret; +} + +static inline int iommu_devinfo_cache_init(void) +{ + int ret = 0; + + iommu_devinfo_cache = kmem_cache_create("iommu_devinfo", + sizeof(struct device_domain_info), + 0, + SLAB_HWCACHE_ALIGN, + NULL); + if (!iommu_devinfo_cache) { + printk(KERN_ERR "Couldn't create devinfo cache\n"); + ret = -ENOMEM; + } + + return ret; +} + +static int __init iommu_init_mempool(void) +{ + int ret; + ret = iommu_iova_cache_init(); + if (ret) + return ret; + + ret = iommu_domain_cache_init(); + if (ret) + goto domain_error; + + ret = iommu_devinfo_cache_init(); + if (!ret) + return ret; + + kmem_cache_destroy(iommu_domain_cache); +domain_error: + iommu_iova_cache_destroy(); + + return -ENOMEM; +} + +static void __init iommu_exit_mempool(void) +{ + kmem_cache_destroy(iommu_devinfo_cache); + kmem_cache_destroy(iommu_domain_cache); + iommu_iova_cache_destroy(); +} + +static void quirk_ioat_snb_local_iommu(struct pci_dev *pdev) +{ + struct dmar_drhd_unit *drhd; + u32 vtbar; + int rc; + + /* We know that this device on this chipset has its own IOMMU. + * If we find it under a different IOMMU, then the BIOS is lying + * to us. Hope that the IOMMU for this device is actually + * disabled, and it needs no translation... + */ + rc = pci_bus_read_config_dword(pdev->bus, PCI_DEVFN(0, 0), 0xb0, &vtbar); + if (rc) { + /* "can't" happen */ + dev_info(&pdev->dev, "failed to run vt-d quirk\n"); + return; + } + vtbar &= 0xffff0000; + + /* we know that the this iommu should be at offset 0xa000 from vtbar */ + drhd = dmar_find_matched_drhd_unit(pdev); + if (WARN_TAINT_ONCE(!drhd || drhd->reg_base_addr - vtbar != 0xa000, + TAINT_FIRMWARE_WORKAROUND, + "BIOS assigned incorrect VT-d unit for Intel(R) QuickData Technology device\n")) + pdev->dev.archdata.iommu = DUMMY_DEVICE_DOMAIN_INFO; +} +DECLARE_PCI_FIXUP_ENABLE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_IOAT_SNB, quirk_ioat_snb_local_iommu); + +static void __init init_no_remapping_devices(void) +{ + struct dmar_drhd_unit *drhd; + struct device *dev; + int i; + + for_each_drhd_unit(drhd) { + if (!drhd->include_all) { + for_each_active_dev_scope(drhd->devices, + drhd->devices_cnt, i, dev) + break; + /* ignore DMAR unit if no devices exist */ + if (i == drhd->devices_cnt) + drhd->ignored = 1; + } + } + + for_each_active_drhd_unit(drhd) { + if (drhd->include_all) + continue; + + for_each_active_dev_scope(drhd->devices, + drhd->devices_cnt, i, dev) + if (!dev_is_pci(dev) || !IS_GFX_DEVICE(to_pci_dev(dev))) + break; + if (i < drhd->devices_cnt) + continue; + + /* This IOMMU has *only* gfx devices. Either bypass it or + set the gfx_mapped flag, as appropriate */ + if (dmar_map_gfx) { + intel_iommu_gfx_mapped = 1; + } else { + drhd->ignored = 1; + for_each_active_dev_scope(drhd->devices, + drhd->devices_cnt, i, dev) + dev->archdata.iommu = DUMMY_DEVICE_DOMAIN_INFO; + } + } +} + +#ifdef CONFIG_SUSPEND +static int init_iommu_hw(void) +{ + struct dmar_drhd_unit *drhd; + struct intel_iommu *iommu = NULL; + + for_each_active_iommu(iommu, drhd) + if (iommu->qi) + dmar_reenable_qi(iommu); + + for_each_iommu(iommu, drhd) { + if (drhd->ignored) { + /* + * we always have to disable PMRs or DMA may fail on + * this device + */ + if (force_on) + iommu_disable_protect_mem_regions(iommu); + continue; + } + + iommu_flush_write_buffer(iommu); + + iommu_set_root_entry(iommu); + + iommu->flush.flush_context(iommu, 0, 0, 0, + DMA_CCMD_GLOBAL_INVL); + iommu->flush.flush_iotlb(iommu, 0, 0, 0, DMA_TLB_GLOBAL_FLUSH); + iommu_enable_translation(iommu); + iommu_disable_protect_mem_regions(iommu); + } + + return 0; +} + +static void iommu_flush_all(void) +{ + struct dmar_drhd_unit *drhd; + struct intel_iommu *iommu; + + for_each_active_iommu(iommu, drhd) { + iommu->flush.flush_context(iommu, 0, 0, 0, + DMA_CCMD_GLOBAL_INVL); + iommu->flush.flush_iotlb(iommu, 0, 0, 0, + DMA_TLB_GLOBAL_FLUSH); + } +} + +static int iommu_suspend(void) +{ + struct dmar_drhd_unit *drhd; + struct intel_iommu *iommu = NULL; + unsigned long flag; + + for_each_active_iommu(iommu, drhd) { + iommu->iommu_state = kzalloc(sizeof(u32) * MAX_SR_DMAR_REGS, + GFP_ATOMIC); + if (!iommu->iommu_state) + goto nomem; + } + + iommu_flush_all(); + + for_each_active_iommu(iommu, drhd) { + iommu_disable_translation(iommu); + + raw_spin_lock_irqsave(&iommu->register_lock, flag); + + iommu->iommu_state[SR_DMAR_FECTL_REG] = + readl(iommu->reg + DMAR_FECTL_REG); + iommu->iommu_state[SR_DMAR_FEDATA_REG] = + readl(iommu->reg + DMAR_FEDATA_REG); + iommu->iommu_state[SR_DMAR_FEADDR_REG] = + readl(iommu->reg + DMAR_FEADDR_REG); + iommu->iommu_state[SR_DMAR_FEUADDR_REG] = + readl(iommu->reg + DMAR_FEUADDR_REG); + + raw_spin_unlock_irqrestore(&iommu->register_lock, flag); + } + return 0; + +nomem: + for_each_active_iommu(iommu, drhd) + kfree(iommu->iommu_state); + + return -ENOMEM; +} + +static void iommu_resume(void) +{ + struct dmar_drhd_unit *drhd; + struct intel_iommu *iommu = NULL; + unsigned long flag; + + if (init_iommu_hw()) { + if (force_on) + panic("tboot: IOMMU setup failed, DMAR can not resume!\n"); + else + WARN(1, "IOMMU setup failed, DMAR can not resume!\n"); + return; + } + + for_each_active_iommu(iommu, drhd) { + + raw_spin_lock_irqsave(&iommu->register_lock, flag); + + writel(iommu->iommu_state[SR_DMAR_FECTL_REG], + iommu->reg + DMAR_FECTL_REG); + writel(iommu->iommu_state[SR_DMAR_FEDATA_REG], + iommu->reg + DMAR_FEDATA_REG); + writel(iommu->iommu_state[SR_DMAR_FEADDR_REG], + iommu->reg + DMAR_FEADDR_REG); + writel(iommu->iommu_state[SR_DMAR_FEUADDR_REG], + iommu->reg + DMAR_FEUADDR_REG); + + raw_spin_unlock_irqrestore(&iommu->register_lock, flag); + } + + for_each_active_iommu(iommu, drhd) + kfree(iommu->iommu_state); +} + +static struct syscore_ops iommu_syscore_ops = { + .resume = iommu_resume, + .suspend = iommu_suspend, +}; + +static void __init init_iommu_pm_ops(void) +{ + register_syscore_ops(&iommu_syscore_ops); +} + +#else +static inline void init_iommu_pm_ops(void) {} +#endif /* CONFIG_PM */ + + +int __init dmar_parse_one_rmrr(struct acpi_dmar_header *header, void *arg) +{ + struct acpi_dmar_reserved_memory *rmrr; + struct dmar_rmrr_unit *rmrru; + + rmrru = kzalloc(sizeof(*rmrru), GFP_KERNEL); + if (!rmrru) + return -ENOMEM; + + rmrru->hdr = header; + rmrr = (struct acpi_dmar_reserved_memory *)header; + rmrru->base_address = rmrr->base_address; + rmrru->end_address = rmrr->end_address; + rmrru->devices = dmar_alloc_dev_scope((void *)(rmrr + 1), + ((void *)rmrr) + rmrr->header.length, + &rmrru->devices_cnt); + if (rmrru->devices_cnt && rmrru->devices == NULL) { + kfree(rmrru); + return -ENOMEM; + } + + list_add(&rmrru->list, &dmar_rmrr_units); + + return 0; +} + +static struct dmar_atsr_unit *dmar_find_atsr(struct acpi_dmar_atsr *atsr) +{ + struct dmar_atsr_unit *atsru; + struct acpi_dmar_atsr *tmp; + + list_for_each_entry_rcu(atsru, &dmar_atsr_units, list) { + tmp = (struct acpi_dmar_atsr *)atsru->hdr; + if (atsr->segment != tmp->segment) + continue; + if (atsr->header.length != tmp->header.length) + continue; + if (memcmp(atsr, tmp, atsr->header.length) == 0) + return atsru; + } + + return NULL; +} + +int dmar_parse_one_atsr(struct acpi_dmar_header *hdr, void *arg) +{ + struct acpi_dmar_atsr *atsr; + struct dmar_atsr_unit *atsru; + + if (system_state != SYSTEM_BOOTING && !intel_iommu_enabled) + return 0; + + atsr = container_of(hdr, struct acpi_dmar_atsr, header); + atsru = dmar_find_atsr(atsr); + if (atsru) + return 0; + + atsru = kzalloc(sizeof(*atsru) + hdr->length, GFP_KERNEL); + if (!atsru) + return -ENOMEM; + + /* + * If memory is allocated from slab by ACPI _DSM method, we need to + * copy the memory content because the memory buffer will be freed + * on return. + */ + atsru->hdr = (void *)(atsru + 1); + memcpy(atsru->hdr, hdr, hdr->length); + atsru->include_all = atsr->flags & 0x1; + if (!atsru->include_all) { + atsru->devices = dmar_alloc_dev_scope((void *)(atsr + 1), + (void *)atsr + atsr->header.length, + &atsru->devices_cnt); + if (atsru->devices_cnt && atsru->devices == NULL) { + kfree(atsru); + return -ENOMEM; + } + } + + list_add_rcu(&atsru->list, &dmar_atsr_units); + + return 0; +} + +static void intel_iommu_free_atsr(struct dmar_atsr_unit *atsru) +{ + dmar_free_dev_scope(&atsru->devices, &atsru->devices_cnt); + kfree(atsru); +} + +int dmar_release_one_atsr(struct acpi_dmar_header *hdr, void *arg) +{ + struct acpi_dmar_atsr *atsr; + struct dmar_atsr_unit *atsru; + + atsr = container_of(hdr, struct acpi_dmar_atsr, header); + atsru = dmar_find_atsr(atsr); + if (atsru) { + list_del_rcu(&atsru->list); + synchronize_rcu(); + intel_iommu_free_atsr(atsru); + } + + return 0; +} + +int dmar_check_one_atsr(struct acpi_dmar_header *hdr, void *arg) +{ + int i; + struct device *dev; + struct acpi_dmar_atsr *atsr; + struct dmar_atsr_unit *atsru; + + atsr = container_of(hdr, struct acpi_dmar_atsr, header); + atsru = dmar_find_atsr(atsr); + if (!atsru) + return 0; + + if (!atsru->include_all && atsru->devices && atsru->devices_cnt) + for_each_active_dev_scope(atsru->devices, atsru->devices_cnt, + i, dev) + return -EBUSY; + + return 0; +} + +static int intel_iommu_add(struct dmar_drhd_unit *dmaru) +{ + int sp, ret = 0; + struct intel_iommu *iommu = dmaru->iommu; + + if (g_iommus[iommu->seq_id]) + return 0; + + if (hw_pass_through && !ecap_pass_through(iommu->ecap)) { + pr_warn("IOMMU: %s doesn't support hardware pass through.\n", + iommu->name); + return -ENXIO; + } + if (!ecap_sc_support(iommu->ecap) && + domain_update_iommu_snooping(iommu)) { + pr_warn("IOMMU: %s doesn't support snooping.\n", + iommu->name); + return -ENXIO; + } + sp = domain_update_iommu_superpage(iommu) - 1; + if (sp >= 0 && !(cap_super_page_val(iommu->cap) & (1 << sp))) { + pr_warn("IOMMU: %s doesn't support large page.\n", + iommu->name); + return -ENXIO; + } + + /* + * Disable translation if already enabled prior to OS handover. + */ + if (iommu->gcmd & DMA_GCMD_TE) + iommu_disable_translation(iommu); + + g_iommus[iommu->seq_id] = iommu; + ret = iommu_init_domains(iommu); + if (ret == 0) + ret = iommu_alloc_root_entry(iommu); + if (ret) + goto out; + + if (dmaru->ignored) { + /* + * we always have to disable PMRs or DMA may fail on this device + */ + if (force_on) + iommu_disable_protect_mem_regions(iommu); + return 0; + } + + intel_iommu_init_qi(iommu); + iommu_flush_write_buffer(iommu); + ret = dmar_set_interrupt(iommu); + if (ret) + goto disable_iommu; + + iommu_set_root_entry(iommu); + iommu->flush.flush_context(iommu, 0, 0, 0, DMA_CCMD_GLOBAL_INVL); + iommu->flush.flush_iotlb(iommu, 0, 0, 0, DMA_TLB_GLOBAL_FLUSH); + iommu_enable_translation(iommu); + + if (si_domain) { + ret = iommu_attach_domain(si_domain, iommu); + if (ret < 0 || si_domain->id != ret) + goto disable_iommu; + domain_attach_iommu(si_domain, iommu); + } + + iommu_disable_protect_mem_regions(iommu); + return 0; + +disable_iommu: + disable_dmar_iommu(iommu); +out: + free_dmar_iommu(iommu); + return ret; +} + +int dmar_iommu_hotplug(struct dmar_drhd_unit *dmaru, bool insert) +{ + int ret = 0; + struct intel_iommu *iommu = dmaru->iommu; + + if (!intel_iommu_enabled) + return 0; + if (iommu == NULL) + return -EINVAL; + + if (insert) { + ret = intel_iommu_add(dmaru); + } else { + disable_dmar_iommu(iommu); + free_dmar_iommu(iommu); + } + + return ret; +} + +static void intel_iommu_free_dmars(void) +{ + struct dmar_rmrr_unit *rmrru, *rmrr_n; + struct dmar_atsr_unit *atsru, *atsr_n; + + list_for_each_entry_safe(rmrru, rmrr_n, &dmar_rmrr_units, list) { + list_del(&rmrru->list); + dmar_free_dev_scope(&rmrru->devices, &rmrru->devices_cnt); + kfree(rmrru); + } + + list_for_each_entry_safe(atsru, atsr_n, &dmar_atsr_units, list) { + list_del(&atsru->list); + intel_iommu_free_atsr(atsru); + } +} + +int dmar_find_matched_atsr_unit(struct pci_dev *dev) +{ + int i, ret = 1; + struct pci_bus *bus; + struct pci_dev *bridge = NULL; + struct device *tmp; + struct acpi_dmar_atsr *atsr; + struct dmar_atsr_unit *atsru; + + dev = pci_physfn(dev); + for (bus = dev->bus; bus; bus = bus->parent) { + bridge = bus->self; + if (!bridge || !pci_is_pcie(bridge) || + pci_pcie_type(bridge) == PCI_EXP_TYPE_PCI_BRIDGE) + return 0; + if (pci_pcie_type(bridge) == PCI_EXP_TYPE_ROOT_PORT) + break; + } + if (!bridge) + return 0; + + rcu_read_lock(); + list_for_each_entry_rcu(atsru, &dmar_atsr_units, list) { + atsr = container_of(atsru->hdr, struct acpi_dmar_atsr, header); + if (atsr->segment != pci_domain_nr(dev->bus)) + continue; + + for_each_dev_scope(atsru->devices, atsru->devices_cnt, i, tmp) + if (tmp == &bridge->dev) + goto out; + + if (atsru->include_all) + goto out; + } + ret = 0; +out: + rcu_read_unlock(); + + return ret; +} + +int dmar_iommu_notify_scope_dev(struct dmar_pci_notify_info *info) +{ + int ret = 0; + struct dmar_rmrr_unit *rmrru; + struct dmar_atsr_unit *atsru; + struct acpi_dmar_atsr *atsr; + struct acpi_dmar_reserved_memory *rmrr; + + if (!intel_iommu_enabled && system_state != SYSTEM_BOOTING) + return 0; + + list_for_each_entry(rmrru, &dmar_rmrr_units, list) { + rmrr = container_of(rmrru->hdr, + struct acpi_dmar_reserved_memory, header); + if (info->event == BUS_NOTIFY_ADD_DEVICE) { + ret = dmar_insert_dev_scope(info, (void *)(rmrr + 1), + ((void *)rmrr) + rmrr->header.length, + rmrr->segment, rmrru->devices, + rmrru->devices_cnt); + if(ret < 0) + return ret; + } else if (info->event == BUS_NOTIFY_DEL_DEVICE) { + dmar_remove_dev_scope(info, rmrr->segment, + rmrru->devices, rmrru->devices_cnt); + } + } + + list_for_each_entry(atsru, &dmar_atsr_units, list) { + if (atsru->include_all) + continue; + + atsr = container_of(atsru->hdr, struct acpi_dmar_atsr, header); + if (info->event == BUS_NOTIFY_ADD_DEVICE) { + ret = dmar_insert_dev_scope(info, (void *)(atsr + 1), + (void *)atsr + atsr->header.length, + atsr->segment, atsru->devices, + atsru->devices_cnt); + if (ret > 0) + break; + else if(ret < 0) + return ret; + } else if (info->event == BUS_NOTIFY_DEL_DEVICE) { + if (dmar_remove_dev_scope(info, atsr->segment, + atsru->devices, atsru->devices_cnt)) + break; + } + } + + return 0; +} + +/* + * Here we only respond to action of unbound device from driver. + * + * Added device is not attached to its DMAR domain here yet. That will happen + * when mapping the device to iova. + */ +static int device_notifier(struct notifier_block *nb, + unsigned long action, void *data) +{ + struct device *dev = data; + struct dmar_domain *domain; + + if (iommu_dummy(dev)) + return 0; + + if (action != BUS_NOTIFY_REMOVED_DEVICE) + return 0; + + domain = find_domain(dev); + if (!domain) + return 0; + + down_read(&dmar_global_lock); + domain_remove_one_dev_info(domain, dev); + if (!domain_type_is_vm_or_si(domain) && list_empty(&domain->devices)) + domain_exit(domain); + up_read(&dmar_global_lock); + + return 0; +} + +static struct notifier_block device_nb = { + .notifier_call = device_notifier, +}; + +static int intel_iommu_memory_notifier(struct notifier_block *nb, + unsigned long val, void *v) +{ + struct memory_notify *mhp = v; + unsigned long long start, end; + unsigned long start_vpfn, last_vpfn; + + switch (val) { + case MEM_GOING_ONLINE: + start = mhp->start_pfn << PAGE_SHIFT; + end = ((mhp->start_pfn + mhp->nr_pages) << PAGE_SHIFT) - 1; + if (iommu_domain_identity_map(si_domain, start, end)) { + pr_warn("dmar: failed to build identity map for [%llx-%llx]\n", + start, end); + return NOTIFY_BAD; + } + break; + + case MEM_OFFLINE: + case MEM_CANCEL_ONLINE: + start_vpfn = mm_to_dma_pfn(mhp->start_pfn); + last_vpfn = mm_to_dma_pfn(mhp->start_pfn + mhp->nr_pages - 1); + while (start_vpfn <= last_vpfn) { + struct iova *iova; + struct dmar_drhd_unit *drhd; + struct intel_iommu *iommu; + struct page *freelist; + + iova = find_iova(&si_domain->iovad, start_vpfn); + if (iova == NULL) { + pr_debug("dmar: failed get IOVA for PFN %lx\n", + start_vpfn); + break; + } + + iova = split_and_remove_iova(&si_domain->iovad, iova, + start_vpfn, last_vpfn); + if (iova == NULL) { + pr_warn("dmar: failed to split IOVA PFN [%lx-%lx]\n", + start_vpfn, last_vpfn); + return NOTIFY_BAD; + } + + freelist = domain_unmap(si_domain, iova->pfn_lo, + iova->pfn_hi); + + rcu_read_lock(); + for_each_active_iommu(iommu, drhd) + iommu_flush_iotlb_psi(iommu, si_domain->id, + iova->pfn_lo, iova_size(iova), + !freelist, 0); + rcu_read_unlock(); + dma_free_pagelist(freelist); + + start_vpfn = iova->pfn_hi + 1; + free_iova_mem(iova); + } + break; + } + + return NOTIFY_OK; +} + +static struct notifier_block intel_iommu_memory_nb = { + .notifier_call = intel_iommu_memory_notifier, + .priority = 0 +}; + + +static ssize_t intel_iommu_show_version(struct device *dev, + struct device_attribute *attr, + char *buf) +{ + struct intel_iommu *iommu = dev_get_drvdata(dev); + u32 ver = readl(iommu->reg + DMAR_VER_REG); + return sprintf(buf, "%d:%d\n", + DMAR_VER_MAJOR(ver), DMAR_VER_MINOR(ver)); +} +static DEVICE_ATTR(version, S_IRUGO, intel_iommu_show_version, NULL); + +static ssize_t intel_iommu_show_address(struct device *dev, + struct device_attribute *attr, + char *buf) +{ + struct intel_iommu *iommu = dev_get_drvdata(dev); + return sprintf(buf, "%llx\n", iommu->reg_phys); +} +static DEVICE_ATTR(address, S_IRUGO, intel_iommu_show_address, NULL); + +static ssize_t intel_iommu_show_cap(struct device *dev, + struct device_attribute *attr, + char *buf) +{ + struct intel_iommu *iommu = dev_get_drvdata(dev); + return sprintf(buf, "%llx\n", iommu->cap); +} +static DEVICE_ATTR(cap, S_IRUGO, intel_iommu_show_cap, NULL); + +static ssize_t intel_iommu_show_ecap(struct device *dev, + struct device_attribute *attr, + char *buf) +{ + struct intel_iommu *iommu = dev_get_drvdata(dev); + return sprintf(buf, "%llx\n", iommu->ecap); +} +static DEVICE_ATTR(ecap, S_IRUGO, intel_iommu_show_ecap, NULL); + +static struct attribute *intel_iommu_attrs[] = { + &dev_attr_version.attr, + &dev_attr_address.attr, + &dev_attr_cap.attr, + &dev_attr_ecap.attr, + NULL, +}; + +static struct attribute_group intel_iommu_group = { + .name = "intel-iommu", + .attrs = intel_iommu_attrs, +}; + +const struct attribute_group *intel_iommu_groups[] = { + &intel_iommu_group, + NULL, +}; + +int __init intel_iommu_init(void) +{ + int ret = -ENODEV; + struct dmar_drhd_unit *drhd; + struct intel_iommu *iommu; + + /* VT-d is required for a TXT/tboot launch, so enforce that */ + force_on = tboot_force_iommu(); + + if (iommu_init_mempool()) { + if (force_on) + panic("tboot: Failed to initialize iommu memory\n"); + return -ENOMEM; + } + + down_write(&dmar_global_lock); + if (dmar_table_init()) { + if (force_on) + panic("tboot: Failed to initialize DMAR table\n"); + goto out_free_dmar; + } + + /* + * Disable translation if already enabled prior to OS handover. + */ + for_each_active_iommu(iommu, drhd) + if (iommu->gcmd & DMA_GCMD_TE) + iommu_disable_translation(iommu); + + if (dmar_dev_scope_init() < 0) { + if (force_on) + panic("tboot: Failed to initialize DMAR device scope\n"); + goto out_free_dmar; + } + + if (no_iommu || dmar_disabled) + goto out_free_dmar; + + if (list_empty(&dmar_rmrr_units)) + printk(KERN_INFO "DMAR: No RMRR found\n"); + + if (list_empty(&dmar_atsr_units)) + printk(KERN_INFO "DMAR: No ATSR found\n"); + + if (dmar_init_reserved_ranges()) { + if (force_on) + panic("tboot: Failed to reserve iommu ranges\n"); + goto out_free_reserved_range; + } + + init_no_remapping_devices(); + + ret = init_dmars(); + if (ret) { + if (force_on) + panic("tboot: Failed to initialize DMARs\n"); + printk(KERN_ERR "IOMMU: dmar init failed\n"); + goto out_free_reserved_range; + } + up_write(&dmar_global_lock); + printk(KERN_INFO + "PCI-DMA: Intel(R) Virtualization Technology for Directed I/O\n"); + + init_timer(&unmap_timer); +#ifdef CONFIG_SWIOTLB + swiotlb = 0; +#endif + dma_ops = &intel_dma_ops; + + init_iommu_pm_ops(); + + for_each_active_iommu(iommu, drhd) + iommu->iommu_dev = iommu_device_create(NULL, iommu, + intel_iommu_groups, + iommu->name); + + bus_set_iommu(&pci_bus_type, &intel_iommu_ops); + bus_register_notifier(&pci_bus_type, &device_nb); + if (si_domain && !hw_pass_through) + register_memory_notifier(&intel_iommu_memory_nb); + + intel_iommu_enabled = 1; + + return 0; + +out_free_reserved_range: + put_iova_domain(&reserved_iova_list); +out_free_dmar: + intel_iommu_free_dmars(); + up_write(&dmar_global_lock); + iommu_exit_mempool(); + return ret; +} + +static int iommu_detach_dev_cb(struct pci_dev *pdev, u16 alias, void *opaque) +{ + struct intel_iommu *iommu = opaque; + + iommu_detach_dev(iommu, PCI_BUS_NUM(alias), alias & 0xff); + return 0; +} + +/* + * NB - intel-iommu lacks any sort of reference counting for the users of + * dependent devices. If multiple endpoints have intersecting dependent + * devices, unbinding the driver from any one of them will possibly leave + * the others unable to operate. + */ +static void iommu_detach_dependent_devices(struct intel_iommu *iommu, + struct device *dev) +{ + if (!iommu || !dev || !dev_is_pci(dev)) + return; + + pci_for_each_dma_alias(to_pci_dev(dev), &iommu_detach_dev_cb, iommu); +} + +static void domain_remove_one_dev_info(struct dmar_domain *domain, + struct device *dev) +{ + struct device_domain_info *info, *tmp; + struct intel_iommu *iommu; + unsigned long flags; + bool found = false; + u8 bus, devfn; + + iommu = device_to_iommu(dev, &bus, &devfn); + if (!iommu) + return; + + spin_lock_irqsave(&device_domain_lock, flags); + list_for_each_entry_safe(info, tmp, &domain->devices, link) { + if (info->iommu == iommu && info->bus == bus && + info->devfn == devfn) { + unlink_domain_info(info); + spin_unlock_irqrestore(&device_domain_lock, flags); + + iommu_disable_dev_iotlb(info); + iommu_detach_dev(iommu, info->bus, info->devfn); + iommu_detach_dependent_devices(iommu, dev); + free_devinfo_mem(info); + + spin_lock_irqsave(&device_domain_lock, flags); + + if (found) + break; + else + continue; + } + + /* if there is no other devices under the same iommu + * owned by this domain, clear this iommu in iommu_bmp + * update iommu count and coherency + */ + if (info->iommu == iommu) + found = true; + } + + spin_unlock_irqrestore(&device_domain_lock, flags); + + if (found == 0) { + domain_detach_iommu(domain, iommu); + if (!domain_type_is_vm_or_si(domain)) + iommu_detach_domain(domain, iommu); + } +} + +static int md_domain_init(struct dmar_domain *domain, int guest_width) +{ + int adjust_width; + + init_iova_domain(&domain->iovad, VTD_PAGE_SIZE, IOVA_START_PFN, + DMA_32BIT_PFN); + domain_reserve_special_ranges(domain); + + /* calculate AGAW */ + domain->gaw = guest_width; + adjust_width = guestwidth_to_adjustwidth(guest_width); + domain->agaw = width_to_agaw(adjust_width); + + domain->iommu_coherency = 0; + domain->iommu_snooping = 0; + domain->iommu_superpage = 0; + domain->max_addr = 0; + + /* always allocate the top pgd */ + domain->pgd = (struct dma_pte *)alloc_pgtable_page(domain->nid); + if (!domain->pgd) + return -ENOMEM; + domain_flush_cache(domain, domain->pgd, PAGE_SIZE); + return 0; +} + +static struct iommu_domain *intel_iommu_domain_alloc(unsigned type) +{ + struct dmar_domain *dmar_domain; + struct iommu_domain *domain; + + if (type != IOMMU_DOMAIN_UNMANAGED) + return NULL; + + dmar_domain = alloc_domain(DOMAIN_FLAG_VIRTUAL_MACHINE); + if (!dmar_domain) { + printk(KERN_ERR + "intel_iommu_domain_init: dmar_domain == NULL\n"); + return NULL; + } + if (md_domain_init(dmar_domain, DEFAULT_DOMAIN_ADDRESS_WIDTH)) { + printk(KERN_ERR + "intel_iommu_domain_init() failed\n"); + domain_exit(dmar_domain); + return NULL; + } + domain_update_iommu_cap(dmar_domain); + + domain = &dmar_domain->domain; + domain->geometry.aperture_start = 0; + domain->geometry.aperture_end = __DOMAIN_MAX_ADDR(dmar_domain->gaw); + domain->geometry.force_aperture = true; + + return domain; +} + +static void intel_iommu_domain_free(struct iommu_domain *domain) +{ + domain_exit(to_dmar_domain(domain)); +} + +static int intel_iommu_attach_device(struct iommu_domain *domain, + struct device *dev) +{ + struct dmar_domain *dmar_domain = to_dmar_domain(domain); + struct intel_iommu *iommu; + int addr_width; + u8 bus, devfn; + + if (device_is_rmrr_locked(dev)) { + dev_warn(dev, "Device is ineligible for IOMMU domain attach due to platform RMRR requirement. Contact your platform vendor.\n"); + return -EPERM; + } + + /* normally dev is not mapped */ + if (unlikely(domain_context_mapped(dev))) { + struct dmar_domain *old_domain; + + old_domain = find_domain(dev); + if (old_domain) { + if (domain_type_is_vm_or_si(dmar_domain)) + domain_remove_one_dev_info(old_domain, dev); + else + domain_remove_dev_info(old_domain); + + if (!domain_type_is_vm_or_si(old_domain) && + list_empty(&old_domain->devices)) + domain_exit(old_domain); + } + } + + iommu = device_to_iommu(dev, &bus, &devfn); + if (!iommu) + return -ENODEV; + + /* check if this iommu agaw is sufficient for max mapped address */ + addr_width = agaw_to_width(iommu->agaw); + if (addr_width > cap_mgaw(iommu->cap)) + addr_width = cap_mgaw(iommu->cap); + + if (dmar_domain->max_addr > (1LL << addr_width)) { + printk(KERN_ERR "%s: iommu width (%d) is not " + "sufficient for the mapped address (%llx)\n", + __func__, addr_width, dmar_domain->max_addr); + return -EFAULT; + } + dmar_domain->gaw = addr_width; + + /* + * Knock out extra levels of page tables if necessary + */ + while (iommu->agaw < dmar_domain->agaw) { + struct dma_pte *pte; + + pte = dmar_domain->pgd; + if (dma_pte_present(pte)) { + dmar_domain->pgd = (struct dma_pte *) + phys_to_virt(dma_pte_addr(pte)); + free_pgtable_page(pte); + } + dmar_domain->agaw--; + } + + return domain_add_dev_info(dmar_domain, dev, CONTEXT_TT_MULTI_LEVEL); +} + +static void intel_iommu_detach_device(struct iommu_domain *domain, + struct device *dev) +{ + domain_remove_one_dev_info(to_dmar_domain(domain), dev); +} + +static int intel_iommu_map(struct iommu_domain *domain, + unsigned long iova, phys_addr_t hpa, + size_t size, int iommu_prot) +{ + struct dmar_domain *dmar_domain = to_dmar_domain(domain); + u64 max_addr; + int prot = 0; + int ret; + + if (iommu_prot & IOMMU_READ) + prot |= DMA_PTE_READ; + if (iommu_prot & IOMMU_WRITE) + prot |= DMA_PTE_WRITE; + if ((iommu_prot & IOMMU_CACHE) && dmar_domain->iommu_snooping) + prot |= DMA_PTE_SNP; + + max_addr = iova + size; + if (dmar_domain->max_addr < max_addr) { + u64 end; + + /* check if minimum agaw is sufficient for mapped address */ + end = __DOMAIN_MAX_ADDR(dmar_domain->gaw) + 1; + if (end < max_addr) { + printk(KERN_ERR "%s: iommu width (%d) is not " + "sufficient for the mapped address (%llx)\n", + __func__, dmar_domain->gaw, max_addr); + return -EFAULT; + } + dmar_domain->max_addr = max_addr; + } + /* Round up size to next multiple of PAGE_SIZE, if it and + the low bits of hpa would take us onto the next page */ + size = aligned_nrpages(hpa, size); + ret = domain_pfn_mapping(dmar_domain, iova >> VTD_PAGE_SHIFT, + hpa >> VTD_PAGE_SHIFT, size, prot); + return ret; +} + +static size_t intel_iommu_unmap(struct iommu_domain *domain, + unsigned long iova, size_t size) +{ + struct dmar_domain *dmar_domain = to_dmar_domain(domain); + struct page *freelist = NULL; + struct intel_iommu *iommu; + unsigned long start_pfn, last_pfn; + unsigned int npages; + int iommu_id, num, ndomains, level = 0; + + /* Cope with horrid API which requires us to unmap more than the + size argument if it happens to be a large-page mapping. */ + if (!pfn_to_dma_pte(dmar_domain, iova >> VTD_PAGE_SHIFT, &level)) + BUG(); + + if (size < VTD_PAGE_SIZE << level_to_offset_bits(level)) + size = VTD_PAGE_SIZE << level_to_offset_bits(level); + + start_pfn = iova >> VTD_PAGE_SHIFT; + last_pfn = (iova + size - 1) >> VTD_PAGE_SHIFT; + + freelist = domain_unmap(dmar_domain, start_pfn, last_pfn); + + npages = last_pfn - start_pfn + 1; + + for_each_set_bit(iommu_id, dmar_domain->iommu_bmp, g_num_of_iommus) { + iommu = g_iommus[iommu_id]; + + /* + * find bit position of dmar_domain + */ + ndomains = cap_ndoms(iommu->cap); + for_each_set_bit(num, iommu->domain_ids, ndomains) { + if (iommu->domains[num] == dmar_domain) + iommu_flush_iotlb_psi(iommu, num, start_pfn, + npages, !freelist, 0); + } + + } + + dma_free_pagelist(freelist); + + if (dmar_domain->max_addr == iova + size) + dmar_domain->max_addr = iova; + + return size; +} + +static phys_addr_t intel_iommu_iova_to_phys(struct iommu_domain *domain, + dma_addr_t iova) +{ + struct dmar_domain *dmar_domain = to_dmar_domain(domain); + struct dma_pte *pte; + int level = 0; + u64 phys = 0; + + pte = pfn_to_dma_pte(dmar_domain, iova >> VTD_PAGE_SHIFT, &level); + if (pte) + phys = dma_pte_addr(pte); + + return phys; +} + +static bool intel_iommu_capable(enum iommu_cap cap) +{ + if (cap == IOMMU_CAP_CACHE_COHERENCY) + return domain_update_iommu_snooping(NULL) == 1; + if (cap == IOMMU_CAP_INTR_REMAP) + return irq_remapping_enabled == 1; + + return false; +} + +static int intel_iommu_add_device(struct device *dev) +{ + struct intel_iommu *iommu; + struct iommu_group *group; + u8 bus, devfn; + + iommu = device_to_iommu(dev, &bus, &devfn); + if (!iommu) + return -ENODEV; + + iommu_device_link(iommu->iommu_dev, dev); + + group = iommu_group_get_for_dev(dev); + + if (IS_ERR(group)) + return PTR_ERR(group); + + iommu_group_put(group); + return 0; +} + +static void intel_iommu_remove_device(struct device *dev) +{ + struct intel_iommu *iommu; + u8 bus, devfn; + + iommu = device_to_iommu(dev, &bus, &devfn); + if (!iommu) + return; + + iommu_group_remove_device(dev); + + iommu_device_unlink(iommu->iommu_dev, dev); +} + +static const struct iommu_ops intel_iommu_ops = { + .capable = intel_iommu_capable, + .domain_alloc = intel_iommu_domain_alloc, + .domain_free = intel_iommu_domain_free, + .attach_dev = intel_iommu_attach_device, + .detach_dev = intel_iommu_detach_device, + .map = intel_iommu_map, + .unmap = intel_iommu_unmap, + .map_sg = default_iommu_map_sg, + .iova_to_phys = intel_iommu_iova_to_phys, + .add_device = intel_iommu_add_device, + .remove_device = intel_iommu_remove_device, + .pgsize_bitmap = INTEL_IOMMU_PGSIZES, +}; + +static void quirk_iommu_g4x_gfx(struct pci_dev *dev) +{ + /* G4x/GM45 integrated gfx dmar support is totally busted. */ + printk(KERN_INFO "DMAR: Disabling IOMMU for graphics on this chipset\n"); + dmar_map_gfx = 0; +} + +DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x2a40, quirk_iommu_g4x_gfx); +DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x2e00, quirk_iommu_g4x_gfx); +DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x2e10, quirk_iommu_g4x_gfx); +DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x2e20, quirk_iommu_g4x_gfx); +DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x2e30, quirk_iommu_g4x_gfx); +DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x2e40, quirk_iommu_g4x_gfx); +DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x2e90, quirk_iommu_g4x_gfx); + +static void quirk_iommu_rwbf(struct pci_dev *dev) +{ + /* + * Mobile 4 Series Chipset neglects to set RWBF capability, + * but needs it. Same seems to hold for the desktop versions. + */ + printk(KERN_INFO "DMAR: Forcing write-buffer flush capability\n"); + rwbf_quirk = 1; +} + +DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x2a40, quirk_iommu_rwbf); +DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x2e00, quirk_iommu_rwbf); +DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x2e10, quirk_iommu_rwbf); +DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x2e20, quirk_iommu_rwbf); +DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x2e30, quirk_iommu_rwbf); +DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x2e40, quirk_iommu_rwbf); +DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x2e90, quirk_iommu_rwbf); + +#define GGC 0x52 +#define GGC_MEMORY_SIZE_MASK (0xf << 8) +#define GGC_MEMORY_SIZE_NONE (0x0 << 8) +#define GGC_MEMORY_SIZE_1M (0x1 << 8) +#define GGC_MEMORY_SIZE_2M (0x3 << 8) +#define GGC_MEMORY_VT_ENABLED (0x8 << 8) +#define GGC_MEMORY_SIZE_2M_VT (0x9 << 8) +#define GGC_MEMORY_SIZE_3M_VT (0xa << 8) +#define GGC_MEMORY_SIZE_4M_VT (0xb << 8) + +static void quirk_calpella_no_shadow_gtt(struct pci_dev *dev) +{ + unsigned short ggc; + + if (pci_read_config_word(dev, GGC, &ggc)) + return; + + if (!(ggc & GGC_MEMORY_VT_ENABLED)) { + printk(KERN_INFO "DMAR: BIOS has allocated no shadow GTT; disabling IOMMU for graphics\n"); + dmar_map_gfx = 0; + } else if (dmar_map_gfx) { + /* we have to ensure the gfx device is idle before we flush */ + printk(KERN_INFO "DMAR: Disabling batched IOTLB flush on Ironlake\n"); + intel_iommu_strict = 1; + } +} +DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x0040, quirk_calpella_no_shadow_gtt); +DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x0044, quirk_calpella_no_shadow_gtt); +DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x0062, quirk_calpella_no_shadow_gtt); +DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x006a, quirk_calpella_no_shadow_gtt); + +/* On Tylersburg chipsets, some BIOSes have been known to enable the + ISOCH DMAR unit for the Azalia sound device, but not give it any + TLB entries, which causes it to deadlock. Check for that. We do + this in a function called from init_dmars(), instead of in a PCI + quirk, because we don't want to print the obnoxious "BIOS broken" + message if VT-d is actually disabled. +*/ +static void __init check_tylersburg_isoch(void) +{ + struct pci_dev *pdev; + uint32_t vtisochctrl; + + /* If there's no Azalia in the system anyway, forget it. */ + pdev = pci_get_device(PCI_VENDOR_ID_INTEL, 0x3a3e, NULL); + if (!pdev) + return; + pci_dev_put(pdev); + + /* System Management Registers. Might be hidden, in which case + we can't do the sanity check. But that's OK, because the + known-broken BIOSes _don't_ actually hide it, so far. */ + pdev = pci_get_device(PCI_VENDOR_ID_INTEL, 0x342e, NULL); + if (!pdev) + return; + + if (pci_read_config_dword(pdev, 0x188, &vtisochctrl)) { + pci_dev_put(pdev); + return; + } + + pci_dev_put(pdev); + + /* If Azalia DMA is routed to the non-isoch DMAR unit, fine. */ + if (vtisochctrl & 1) + return; + + /* Drop all bits other than the number of TLB entries */ + vtisochctrl &= 0x1c; + + /* If we have the recommended number of TLB entries (16), fine. */ + if (vtisochctrl == 0x10) + return; + + /* Zero TLB entries? You get to ride the short bus to school. */ + if (!vtisochctrl) { + WARN(1, "Your BIOS is broken; DMA routed to ISOCH DMAR unit but no TLB space.\n" + "BIOS vendor: %s; Ver: %s; Product Version: %s\n", + dmi_get_system_info(DMI_BIOS_VENDOR), + dmi_get_system_info(DMI_BIOS_VERSION), + dmi_get_system_info(DMI_PRODUCT_VERSION)); + iommu_identity_mapping |= IDENTMAP_AZALIA; + return; + } + + printk(KERN_WARNING "DMAR: Recommended TLB entries for ISOCH unit is 16; your BIOS set %d\n", + vtisochctrl); +} diff --git a/drivers/iommu/intel_irq_remapping.c b/drivers/iommu/intel_irq_remapping.c new file mode 100644 index 000000000..5709ae9c3 --- /dev/null +++ b/drivers/iommu/intel_irq_remapping.c @@ -0,0 +1,1301 @@ +#include <linux/interrupt.h> +#include <linux/dmar.h> +#include <linux/spinlock.h> +#include <linux/slab.h> +#include <linux/jiffies.h> +#include <linux/hpet.h> +#include <linux/pci.h> +#include <linux/irq.h> +#include <linux/intel-iommu.h> +#include <linux/acpi.h> +#include <asm/io_apic.h> +#include <asm/smp.h> +#include <asm/cpu.h> +#include <asm/irq_remapping.h> +#include <asm/pci-direct.h> +#include <asm/msidef.h> + +#include "irq_remapping.h" + +struct ioapic_scope { + struct intel_iommu *iommu; + unsigned int id; + unsigned int bus; /* PCI bus number */ + unsigned int devfn; /* PCI devfn number */ +}; + +struct hpet_scope { + struct intel_iommu *iommu; + u8 id; + unsigned int bus; + unsigned int devfn; +}; + +#define IR_X2APIC_MODE(mode) (mode ? (1 << 11) : 0) +#define IRTE_DEST(dest) ((eim_mode) ? dest : dest << 8) + +static int __read_mostly eim_mode; +static struct ioapic_scope ir_ioapic[MAX_IO_APICS]; +static struct hpet_scope ir_hpet[MAX_HPET_TBS]; + +/* + * Lock ordering: + * ->dmar_global_lock + * ->irq_2_ir_lock + * ->qi->q_lock + * ->iommu->register_lock + * Note: + * intel_irq_remap_ops.{supported,prepare,enable,disable,reenable} are called + * in single-threaded environment with interrupt disabled, so no need to tabke + * the dmar_global_lock. + */ +static DEFINE_RAW_SPINLOCK(irq_2_ir_lock); + +static int __init parse_ioapics_under_ir(void); + +static struct irq_2_iommu *irq_2_iommu(unsigned int irq) +{ + struct irq_cfg *cfg = irq_cfg(irq); + return cfg ? &cfg->irq_2_iommu : NULL; +} + +static int get_irte(int irq, struct irte *entry) +{ + struct irq_2_iommu *irq_iommu = irq_2_iommu(irq); + unsigned long flags; + int index; + + if (!entry || !irq_iommu) + return -1; + + raw_spin_lock_irqsave(&irq_2_ir_lock, flags); + + if (unlikely(!irq_iommu->iommu)) { + raw_spin_unlock_irqrestore(&irq_2_ir_lock, flags); + return -1; + } + + index = irq_iommu->irte_index + irq_iommu->sub_handle; + *entry = *(irq_iommu->iommu->ir_table->base + index); + + raw_spin_unlock_irqrestore(&irq_2_ir_lock, flags); + return 0; +} + +static int alloc_irte(struct intel_iommu *iommu, int irq, u16 count) +{ + struct ir_table *table = iommu->ir_table; + struct irq_2_iommu *irq_iommu = irq_2_iommu(irq); + struct irq_cfg *cfg = irq_cfg(irq); + unsigned int mask = 0; + unsigned long flags; + int index; + + if (!count || !irq_iommu) + return -1; + + if (count > 1) { + count = __roundup_pow_of_two(count); + mask = ilog2(count); + } + + if (mask > ecap_max_handle_mask(iommu->ecap)) { + printk(KERN_ERR + "Requested mask %x exceeds the max invalidation handle" + " mask value %Lx\n", mask, + ecap_max_handle_mask(iommu->ecap)); + return -1; + } + + raw_spin_lock_irqsave(&irq_2_ir_lock, flags); + index = bitmap_find_free_region(table->bitmap, + INTR_REMAP_TABLE_ENTRIES, mask); + if (index < 0) { + pr_warn("IR%d: can't allocate an IRTE\n", iommu->seq_id); + } else { + cfg->remapped = 1; + irq_iommu->iommu = iommu; + irq_iommu->irte_index = index; + irq_iommu->sub_handle = 0; + irq_iommu->irte_mask = mask; + } + raw_spin_unlock_irqrestore(&irq_2_ir_lock, flags); + + return index; +} + +static int qi_flush_iec(struct intel_iommu *iommu, int index, int mask) +{ + struct qi_desc desc; + + desc.low = QI_IEC_IIDEX(index) | QI_IEC_TYPE | QI_IEC_IM(mask) + | QI_IEC_SELECTIVE; + desc.high = 0; + + return qi_submit_sync(&desc, iommu); +} + +static int map_irq_to_irte_handle(int irq, u16 *sub_handle) +{ + struct irq_2_iommu *irq_iommu = irq_2_iommu(irq); + unsigned long flags; + int index; + + if (!irq_iommu) + return -1; + + raw_spin_lock_irqsave(&irq_2_ir_lock, flags); + *sub_handle = irq_iommu->sub_handle; + index = irq_iommu->irte_index; + raw_spin_unlock_irqrestore(&irq_2_ir_lock, flags); + return index; +} + +static int set_irte_irq(int irq, struct intel_iommu *iommu, u16 index, u16 subhandle) +{ + struct irq_2_iommu *irq_iommu = irq_2_iommu(irq); + struct irq_cfg *cfg = irq_cfg(irq); + unsigned long flags; + + if (!irq_iommu) + return -1; + + raw_spin_lock_irqsave(&irq_2_ir_lock, flags); + + cfg->remapped = 1; + irq_iommu->iommu = iommu; + irq_iommu->irte_index = index; + irq_iommu->sub_handle = subhandle; + irq_iommu->irte_mask = 0; + + raw_spin_unlock_irqrestore(&irq_2_ir_lock, flags); + + return 0; +} + +static int modify_irte(int irq, struct irte *irte_modified) +{ + struct irq_2_iommu *irq_iommu = irq_2_iommu(irq); + struct intel_iommu *iommu; + unsigned long flags; + struct irte *irte; + int rc, index; + + if (!irq_iommu) + return -1; + + raw_spin_lock_irqsave(&irq_2_ir_lock, flags); + + iommu = irq_iommu->iommu; + + index = irq_iommu->irte_index + irq_iommu->sub_handle; + irte = &iommu->ir_table->base[index]; + + set_64bit(&irte->low, irte_modified->low); + set_64bit(&irte->high, irte_modified->high); + __iommu_flush_cache(iommu, irte, sizeof(*irte)); + + rc = qi_flush_iec(iommu, index, 0); + raw_spin_unlock_irqrestore(&irq_2_ir_lock, flags); + + return rc; +} + +static struct intel_iommu *map_hpet_to_ir(u8 hpet_id) +{ + int i; + + for (i = 0; i < MAX_HPET_TBS; i++) + if (ir_hpet[i].id == hpet_id && ir_hpet[i].iommu) + return ir_hpet[i].iommu; + return NULL; +} + +static struct intel_iommu *map_ioapic_to_ir(int apic) +{ + int i; + + for (i = 0; i < MAX_IO_APICS; i++) + if (ir_ioapic[i].id == apic && ir_ioapic[i].iommu) + return ir_ioapic[i].iommu; + return NULL; +} + +static struct intel_iommu *map_dev_to_ir(struct pci_dev *dev) +{ + struct dmar_drhd_unit *drhd; + + drhd = dmar_find_matched_drhd_unit(dev); + if (!drhd) + return NULL; + + return drhd->iommu; +} + +static int clear_entries(struct irq_2_iommu *irq_iommu) +{ + struct irte *start, *entry, *end; + struct intel_iommu *iommu; + int index; + + if (irq_iommu->sub_handle) + return 0; + + iommu = irq_iommu->iommu; + index = irq_iommu->irte_index + irq_iommu->sub_handle; + + start = iommu->ir_table->base + index; + end = start + (1 << irq_iommu->irte_mask); + + for (entry = start; entry < end; entry++) { + set_64bit(&entry->low, 0); + set_64bit(&entry->high, 0); + } + bitmap_release_region(iommu->ir_table->bitmap, index, + irq_iommu->irte_mask); + + return qi_flush_iec(iommu, index, irq_iommu->irte_mask); +} + +static int free_irte(int irq) +{ + struct irq_2_iommu *irq_iommu = irq_2_iommu(irq); + unsigned long flags; + int rc; + + if (!irq_iommu) + return -1; + + raw_spin_lock_irqsave(&irq_2_ir_lock, flags); + + rc = clear_entries(irq_iommu); + + irq_iommu->iommu = NULL; + irq_iommu->irte_index = 0; + irq_iommu->sub_handle = 0; + irq_iommu->irte_mask = 0; + + raw_spin_unlock_irqrestore(&irq_2_ir_lock, flags); + + return rc; +} + +/* + * source validation type + */ +#define SVT_NO_VERIFY 0x0 /* no verification is required */ +#define SVT_VERIFY_SID_SQ 0x1 /* verify using SID and SQ fields */ +#define SVT_VERIFY_BUS 0x2 /* verify bus of request-id */ + +/* + * source-id qualifier + */ +#define SQ_ALL_16 0x0 /* verify all 16 bits of request-id */ +#define SQ_13_IGNORE_1 0x1 /* verify most significant 13 bits, ignore + * the third least significant bit + */ +#define SQ_13_IGNORE_2 0x2 /* verify most significant 13 bits, ignore + * the second and third least significant bits + */ +#define SQ_13_IGNORE_3 0x3 /* verify most significant 13 bits, ignore + * the least three significant bits + */ + +/* + * set SVT, SQ and SID fields of irte to verify + * source ids of interrupt requests + */ +static void set_irte_sid(struct irte *irte, unsigned int svt, + unsigned int sq, unsigned int sid) +{ + if (disable_sourceid_checking) + svt = SVT_NO_VERIFY; + irte->svt = svt; + irte->sq = sq; + irte->sid = sid; +} + +static int set_ioapic_sid(struct irte *irte, int apic) +{ + int i; + u16 sid = 0; + + if (!irte) + return -1; + + down_read(&dmar_global_lock); + for (i = 0; i < MAX_IO_APICS; i++) { + if (ir_ioapic[i].iommu && ir_ioapic[i].id == apic) { + sid = (ir_ioapic[i].bus << 8) | ir_ioapic[i].devfn; + break; + } + } + up_read(&dmar_global_lock); + + if (sid == 0) { + pr_warning("Failed to set source-id of IOAPIC (%d)\n", apic); + return -1; + } + + set_irte_sid(irte, SVT_VERIFY_SID_SQ, SQ_ALL_16, sid); + + return 0; +} + +static int set_hpet_sid(struct irte *irte, u8 id) +{ + int i; + u16 sid = 0; + + if (!irte) + return -1; + + down_read(&dmar_global_lock); + for (i = 0; i < MAX_HPET_TBS; i++) { + if (ir_hpet[i].iommu && ir_hpet[i].id == id) { + sid = (ir_hpet[i].bus << 8) | ir_hpet[i].devfn; + break; + } + } + up_read(&dmar_global_lock); + + if (sid == 0) { + pr_warning("Failed to set source-id of HPET block (%d)\n", id); + return -1; + } + + /* + * Should really use SQ_ALL_16. Some platforms are broken. + * While we figure out the right quirks for these broken platforms, use + * SQ_13_IGNORE_3 for now. + */ + set_irte_sid(irte, SVT_VERIFY_SID_SQ, SQ_13_IGNORE_3, sid); + + return 0; +} + +struct set_msi_sid_data { + struct pci_dev *pdev; + u16 alias; +}; + +static int set_msi_sid_cb(struct pci_dev *pdev, u16 alias, void *opaque) +{ + struct set_msi_sid_data *data = opaque; + + data->pdev = pdev; + data->alias = alias; + + return 0; +} + +static int set_msi_sid(struct irte *irte, struct pci_dev *dev) +{ + struct set_msi_sid_data data; + + if (!irte || !dev) + return -1; + + pci_for_each_dma_alias(dev, set_msi_sid_cb, &data); + + /* + * DMA alias provides us with a PCI device and alias. The only case + * where the it will return an alias on a different bus than the + * device is the case of a PCIe-to-PCI bridge, where the alias is for + * the subordinate bus. In this case we can only verify the bus. + * + * If the alias device is on a different bus than our source device + * then we have a topology based alias, use it. + * + * Otherwise, the alias is for a device DMA quirk and we cannot + * assume that MSI uses the same requester ID. Therefore use the + * original device. + */ + if (PCI_BUS_NUM(data.alias) != data.pdev->bus->number) + set_irte_sid(irte, SVT_VERIFY_BUS, SQ_ALL_16, + PCI_DEVID(PCI_BUS_NUM(data.alias), + dev->bus->number)); + else if (data.pdev->bus->number != dev->bus->number) + set_irte_sid(irte, SVT_VERIFY_SID_SQ, SQ_ALL_16, data.alias); + else + set_irte_sid(irte, SVT_VERIFY_SID_SQ, SQ_ALL_16, + PCI_DEVID(dev->bus->number, dev->devfn)); + + return 0; +} + +static void iommu_set_irq_remapping(struct intel_iommu *iommu, int mode) +{ + u64 addr; + u32 sts; + unsigned long flags; + + addr = virt_to_phys((void *)iommu->ir_table->base); + + raw_spin_lock_irqsave(&iommu->register_lock, flags); + + dmar_writeq(iommu->reg + DMAR_IRTA_REG, + (addr) | IR_X2APIC_MODE(mode) | INTR_REMAP_TABLE_REG_SIZE); + + /* Set interrupt-remapping table pointer */ + writel(iommu->gcmd | DMA_GCMD_SIRTP, iommu->reg + DMAR_GCMD_REG); + + IOMMU_WAIT_OP(iommu, DMAR_GSTS_REG, + readl, (sts & DMA_GSTS_IRTPS), sts); + raw_spin_unlock_irqrestore(&iommu->register_lock, flags); + + /* + * global invalidation of interrupt entry cache before enabling + * interrupt-remapping. + */ + qi_global_iec(iommu); + + raw_spin_lock_irqsave(&iommu->register_lock, flags); + + /* Enable interrupt-remapping */ + iommu->gcmd |= DMA_GCMD_IRE; + iommu->gcmd &= ~DMA_GCMD_CFI; /* Block compatibility-format MSIs */ + writel(iommu->gcmd, iommu->reg + DMAR_GCMD_REG); + + IOMMU_WAIT_OP(iommu, DMAR_GSTS_REG, + readl, (sts & DMA_GSTS_IRES), sts); + + /* + * With CFI clear in the Global Command register, we should be + * protected from dangerous (i.e. compatibility) interrupts + * regardless of x2apic status. Check just to be sure. + */ + if (sts & DMA_GSTS_CFIS) + WARN(1, KERN_WARNING + "Compatibility-format IRQs enabled despite intr remapping;\n" + "you are vulnerable to IRQ injection.\n"); + + raw_spin_unlock_irqrestore(&iommu->register_lock, flags); +} + +static int intel_setup_irq_remapping(struct intel_iommu *iommu) +{ + struct ir_table *ir_table; + struct page *pages; + unsigned long *bitmap; + + if (iommu->ir_table) + return 0; + + ir_table = kzalloc(sizeof(struct ir_table), GFP_KERNEL); + if (!ir_table) + return -ENOMEM; + + pages = alloc_pages_node(iommu->node, GFP_KERNEL | __GFP_ZERO, + INTR_REMAP_PAGE_ORDER); + + if (!pages) { + pr_err("IR%d: failed to allocate pages of order %d\n", + iommu->seq_id, INTR_REMAP_PAGE_ORDER); + goto out_free_table; + } + + bitmap = kcalloc(BITS_TO_LONGS(INTR_REMAP_TABLE_ENTRIES), + sizeof(long), GFP_ATOMIC); + if (bitmap == NULL) { + pr_err("IR%d: failed to allocate bitmap\n", iommu->seq_id); + goto out_free_pages; + } + + ir_table->base = page_address(pages); + ir_table->bitmap = bitmap; + iommu->ir_table = ir_table; + return 0; + +out_free_pages: + __free_pages(pages, INTR_REMAP_PAGE_ORDER); +out_free_table: + kfree(ir_table); + return -ENOMEM; +} + +static void intel_teardown_irq_remapping(struct intel_iommu *iommu) +{ + if (iommu && iommu->ir_table) { + free_pages((unsigned long)iommu->ir_table->base, + INTR_REMAP_PAGE_ORDER); + kfree(iommu->ir_table->bitmap); + kfree(iommu->ir_table); + iommu->ir_table = NULL; + } +} + +/* + * Disable Interrupt Remapping. + */ +static void iommu_disable_irq_remapping(struct intel_iommu *iommu) +{ + unsigned long flags; + u32 sts; + + if (!ecap_ir_support(iommu->ecap)) + return; + + /* + * global invalidation of interrupt entry cache before disabling + * interrupt-remapping. + */ + qi_global_iec(iommu); + + raw_spin_lock_irqsave(&iommu->register_lock, flags); + + sts = dmar_readq(iommu->reg + DMAR_GSTS_REG); + if (!(sts & DMA_GSTS_IRES)) + goto end; + + iommu->gcmd &= ~DMA_GCMD_IRE; + writel(iommu->gcmd, iommu->reg + DMAR_GCMD_REG); + + IOMMU_WAIT_OP(iommu, DMAR_GSTS_REG, + readl, !(sts & DMA_GSTS_IRES), sts); + +end: + raw_spin_unlock_irqrestore(&iommu->register_lock, flags); +} + +static int __init dmar_x2apic_optout(void) +{ + struct acpi_table_dmar *dmar; + dmar = (struct acpi_table_dmar *)dmar_tbl; + if (!dmar || no_x2apic_optout) + return 0; + return dmar->flags & DMAR_X2APIC_OPT_OUT; +} + +static void __init intel_cleanup_irq_remapping(void) +{ + struct dmar_drhd_unit *drhd; + struct intel_iommu *iommu; + + for_each_iommu(iommu, drhd) { + if (ecap_ir_support(iommu->ecap)) { + iommu_disable_irq_remapping(iommu); + intel_teardown_irq_remapping(iommu); + } + } + + if (x2apic_supported()) + pr_warn("Failed to enable irq remapping. You are vulnerable to irq-injection attacks.\n"); +} + +static int __init intel_prepare_irq_remapping(void) +{ + struct dmar_drhd_unit *drhd; + struct intel_iommu *iommu; + + if (irq_remap_broken) { + printk(KERN_WARNING + "This system BIOS has enabled interrupt remapping\n" + "on a chipset that contains an erratum making that\n" + "feature unstable. To maintain system stability\n" + "interrupt remapping is being disabled. Please\n" + "contact your BIOS vendor for an update\n"); + add_taint(TAINT_FIRMWARE_WORKAROUND, LOCKDEP_STILL_OK); + return -ENODEV; + } + + if (dmar_table_init() < 0) + return -ENODEV; + + if (!dmar_ir_support()) + return -ENODEV; + + if (parse_ioapics_under_ir() != 1) { + printk(KERN_INFO "Not enabling interrupt remapping\n"); + goto error; + } + + /* First make sure all IOMMUs support IRQ remapping */ + for_each_iommu(iommu, drhd) + if (!ecap_ir_support(iommu->ecap)) + goto error; + + /* Do the allocations early */ + for_each_iommu(iommu, drhd) + if (intel_setup_irq_remapping(iommu)) + goto error; + + return 0; + +error: + intel_cleanup_irq_remapping(); + return -ENODEV; +} + +static int __init intel_enable_irq_remapping(void) +{ + struct dmar_drhd_unit *drhd; + struct intel_iommu *iommu; + bool setup = false; + int eim = 0; + + if (x2apic_supported()) { + eim = !dmar_x2apic_optout(); + if (!eim) + pr_info("x2apic is disabled because BIOS sets x2apic opt out bit. You can use 'intremap=no_x2apic_optout' to override the BIOS setting.\n"); + } + + for_each_iommu(iommu, drhd) { + /* + * If the queued invalidation is already initialized, + * shouldn't disable it. + */ + if (iommu->qi) + continue; + + /* + * Clear previous faults. + */ + dmar_fault(-1, iommu); + + /* + * Disable intr remapping and queued invalidation, if already + * enabled prior to OS handover. + */ + iommu_disable_irq_remapping(iommu); + + dmar_disable_qi(iommu); + } + + /* + * check for the Interrupt-remapping support + */ + for_each_iommu(iommu, drhd) + if (eim && !ecap_eim_support(iommu->ecap)) { + printk(KERN_INFO "DRHD %Lx: EIM not supported by DRHD, " + " ecap %Lx\n", drhd->reg_base_addr, iommu->ecap); + eim = 0; + } + eim_mode = eim; + if (eim) + pr_info("Queued invalidation will be enabled to support x2apic and Intr-remapping.\n"); + + /* + * Enable queued invalidation for all the DRHD's. + */ + for_each_iommu(iommu, drhd) { + int ret = dmar_enable_qi(iommu); + + if (ret) { + printk(KERN_ERR "DRHD %Lx: failed to enable queued, " + " invalidation, ecap %Lx, ret %d\n", + drhd->reg_base_addr, iommu->ecap, ret); + goto error; + } + } + + /* + * Setup Interrupt-remapping for all the DRHD's now. + */ + for_each_iommu(iommu, drhd) { + iommu_set_irq_remapping(iommu, eim); + setup = true; + } + + if (!setup) + goto error; + + irq_remapping_enabled = 1; + + /* + * VT-d has a different layout for IO-APIC entries when + * interrupt remapping is enabled. So it needs a special routine + * to print IO-APIC entries for debugging purposes too. + */ + x86_io_apic_ops.print_entries = intel_ir_io_apic_print_entries; + + pr_info("Enabled IRQ remapping in %s mode\n", eim ? "x2apic" : "xapic"); + + return eim ? IRQ_REMAP_X2APIC_MODE : IRQ_REMAP_XAPIC_MODE; + +error: + intel_cleanup_irq_remapping(); + return -1; +} + +static int ir_parse_one_hpet_scope(struct acpi_dmar_device_scope *scope, + struct intel_iommu *iommu, + struct acpi_dmar_hardware_unit *drhd) +{ + struct acpi_dmar_pci_path *path; + u8 bus; + int count, free = -1; + + bus = scope->bus; + path = (struct acpi_dmar_pci_path *)(scope + 1); + count = (scope->length - sizeof(struct acpi_dmar_device_scope)) + / sizeof(struct acpi_dmar_pci_path); + + while (--count > 0) { + /* + * Access PCI directly due to the PCI + * subsystem isn't initialized yet. + */ + bus = read_pci_config_byte(bus, path->device, path->function, + PCI_SECONDARY_BUS); + path++; + } + + for (count = 0; count < MAX_HPET_TBS; count++) { + if (ir_hpet[count].iommu == iommu && + ir_hpet[count].id == scope->enumeration_id) + return 0; + else if (ir_hpet[count].iommu == NULL && free == -1) + free = count; + } + if (free == -1) { + pr_warn("Exceeded Max HPET blocks\n"); + return -ENOSPC; + } + + ir_hpet[free].iommu = iommu; + ir_hpet[free].id = scope->enumeration_id; + ir_hpet[free].bus = bus; + ir_hpet[free].devfn = PCI_DEVFN(path->device, path->function); + pr_info("HPET id %d under DRHD base 0x%Lx\n", + scope->enumeration_id, drhd->address); + + return 0; +} + +static int ir_parse_one_ioapic_scope(struct acpi_dmar_device_scope *scope, + struct intel_iommu *iommu, + struct acpi_dmar_hardware_unit *drhd) +{ + struct acpi_dmar_pci_path *path; + u8 bus; + int count, free = -1; + + bus = scope->bus; + path = (struct acpi_dmar_pci_path *)(scope + 1); + count = (scope->length - sizeof(struct acpi_dmar_device_scope)) + / sizeof(struct acpi_dmar_pci_path); + + while (--count > 0) { + /* + * Access PCI directly due to the PCI + * subsystem isn't initialized yet. + */ + bus = read_pci_config_byte(bus, path->device, path->function, + PCI_SECONDARY_BUS); + path++; + } + + for (count = 0; count < MAX_IO_APICS; count++) { + if (ir_ioapic[count].iommu == iommu && + ir_ioapic[count].id == scope->enumeration_id) + return 0; + else if (ir_ioapic[count].iommu == NULL && free == -1) + free = count; + } + if (free == -1) { + pr_warn("Exceeded Max IO APICS\n"); + return -ENOSPC; + } + + ir_ioapic[free].bus = bus; + ir_ioapic[free].devfn = PCI_DEVFN(path->device, path->function); + ir_ioapic[free].iommu = iommu; + ir_ioapic[free].id = scope->enumeration_id; + pr_info("IOAPIC id %d under DRHD base 0x%Lx IOMMU %d\n", + scope->enumeration_id, drhd->address, iommu->seq_id); + + return 0; +} + +static int ir_parse_ioapic_hpet_scope(struct acpi_dmar_header *header, + struct intel_iommu *iommu) +{ + int ret = 0; + struct acpi_dmar_hardware_unit *drhd; + struct acpi_dmar_device_scope *scope; + void *start, *end; + + drhd = (struct acpi_dmar_hardware_unit *)header; + start = (void *)(drhd + 1); + end = ((void *)drhd) + header->length; + + while (start < end && ret == 0) { + scope = start; + if (scope->entry_type == ACPI_DMAR_SCOPE_TYPE_IOAPIC) + ret = ir_parse_one_ioapic_scope(scope, iommu, drhd); + else if (scope->entry_type == ACPI_DMAR_SCOPE_TYPE_HPET) + ret = ir_parse_one_hpet_scope(scope, iommu, drhd); + start += scope->length; + } + + return ret; +} + +static void ir_remove_ioapic_hpet_scope(struct intel_iommu *iommu) +{ + int i; + + for (i = 0; i < MAX_HPET_TBS; i++) + if (ir_hpet[i].iommu == iommu) + ir_hpet[i].iommu = NULL; + + for (i = 0; i < MAX_IO_APICS; i++) + if (ir_ioapic[i].iommu == iommu) + ir_ioapic[i].iommu = NULL; +} + +/* + * Finds the assocaition between IOAPIC's and its Interrupt-remapping + * hardware unit. + */ +static int __init parse_ioapics_under_ir(void) +{ + struct dmar_drhd_unit *drhd; + struct intel_iommu *iommu; + bool ir_supported = false; + int ioapic_idx; + + for_each_iommu(iommu, drhd) + if (ecap_ir_support(iommu->ecap)) { + if (ir_parse_ioapic_hpet_scope(drhd->hdr, iommu)) + return -1; + + ir_supported = true; + } + + if (!ir_supported) + return 0; + + for (ioapic_idx = 0; ioapic_idx < nr_ioapics; ioapic_idx++) { + int ioapic_id = mpc_ioapic_id(ioapic_idx); + if (!map_ioapic_to_ir(ioapic_id)) { + pr_err(FW_BUG "ioapic %d has no mapping iommu, " + "interrupt remapping will be disabled\n", + ioapic_id); + return -1; + } + } + + return 1; +} + +static int __init ir_dev_scope_init(void) +{ + int ret; + + if (!irq_remapping_enabled) + return 0; + + down_write(&dmar_global_lock); + ret = dmar_dev_scope_init(); + up_write(&dmar_global_lock); + + return ret; +} +rootfs_initcall(ir_dev_scope_init); + +static void disable_irq_remapping(void) +{ + struct dmar_drhd_unit *drhd; + struct intel_iommu *iommu = NULL; + + /* + * Disable Interrupt-remapping for all the DRHD's now. + */ + for_each_iommu(iommu, drhd) { + if (!ecap_ir_support(iommu->ecap)) + continue; + + iommu_disable_irq_remapping(iommu); + } +} + +static int reenable_irq_remapping(int eim) +{ + struct dmar_drhd_unit *drhd; + bool setup = false; + struct intel_iommu *iommu = NULL; + + for_each_iommu(iommu, drhd) + if (iommu->qi) + dmar_reenable_qi(iommu); + + /* + * Setup Interrupt-remapping for all the DRHD's now. + */ + for_each_iommu(iommu, drhd) { + if (!ecap_ir_support(iommu->ecap)) + continue; + + /* Set up interrupt remapping for iommu.*/ + iommu_set_irq_remapping(iommu, eim); + setup = true; + } + + if (!setup) + goto error; + + return 0; + +error: + /* + * handle error condition gracefully here! + */ + return -1; +} + +static void prepare_irte(struct irte *irte, int vector, + unsigned int dest) +{ + memset(irte, 0, sizeof(*irte)); + + irte->present = 1; + irte->dst_mode = apic->irq_dest_mode; + /* + * Trigger mode in the IRTE will always be edge, and for IO-APIC, the + * actual level or edge trigger will be setup in the IO-APIC + * RTE. This will help simplify level triggered irq migration. + * For more details, see the comments (in io_apic.c) explainig IO-APIC + * irq migration in the presence of interrupt-remapping. + */ + irte->trigger_mode = 0; + irte->dlvry_mode = apic->irq_delivery_mode; + irte->vector = vector; + irte->dest_id = IRTE_DEST(dest); + irte->redir_hint = 1; +} + +static int intel_setup_ioapic_entry(int irq, + struct IO_APIC_route_entry *route_entry, + unsigned int destination, int vector, + struct io_apic_irq_attr *attr) +{ + int ioapic_id = mpc_ioapic_id(attr->ioapic); + struct intel_iommu *iommu; + struct IR_IO_APIC_route_entry *entry; + struct irte irte; + int index; + + down_read(&dmar_global_lock); + iommu = map_ioapic_to_ir(ioapic_id); + if (!iommu) { + pr_warn("No mapping iommu for ioapic %d\n", ioapic_id); + index = -ENODEV; + } else { + index = alloc_irte(iommu, irq, 1); + if (index < 0) { + pr_warn("Failed to allocate IRTE for ioapic %d\n", + ioapic_id); + index = -ENOMEM; + } + } + up_read(&dmar_global_lock); + if (index < 0) + return index; + + prepare_irte(&irte, vector, destination); + + /* Set source-id of interrupt request */ + set_ioapic_sid(&irte, ioapic_id); + + modify_irte(irq, &irte); + + apic_printk(APIC_VERBOSE, KERN_DEBUG "IOAPIC[%d]: " + "Set IRTE entry (P:%d FPD:%d Dst_Mode:%d " + "Redir_hint:%d Trig_Mode:%d Dlvry_Mode:%X " + "Avail:%X Vector:%02X Dest:%08X " + "SID:%04X SQ:%X SVT:%X)\n", + attr->ioapic, irte.present, irte.fpd, irte.dst_mode, + irte.redir_hint, irte.trigger_mode, irte.dlvry_mode, + irte.avail, irte.vector, irte.dest_id, + irte.sid, irte.sq, irte.svt); + + entry = (struct IR_IO_APIC_route_entry *)route_entry; + memset(entry, 0, sizeof(*entry)); + + entry->index2 = (index >> 15) & 0x1; + entry->zero = 0; + entry->format = 1; + entry->index = (index & 0x7fff); + /* + * IO-APIC RTE will be configured with virtual vector. + * irq handler will do the explicit EOI to the io-apic. + */ + entry->vector = attr->ioapic_pin; + entry->mask = 0; /* enable IRQ */ + entry->trigger = attr->trigger; + entry->polarity = attr->polarity; + + /* Mask level triggered irqs. + * Use IRQ_DELAYED_DISABLE for edge triggered irqs. + */ + if (attr->trigger) + entry->mask = 1; + + return 0; +} + +/* + * Migrate the IO-APIC irq in the presence of intr-remapping. + * + * For both level and edge triggered, irq migration is a simple atomic + * update(of vector and cpu destination) of IRTE and flush the hardware cache. + * + * For level triggered, we eliminate the io-apic RTE modification (with the + * updated vector information), by using a virtual vector (io-apic pin number). + * Real vector that is used for interrupting cpu will be coming from + * the interrupt-remapping table entry. + * + * As the migration is a simple atomic update of IRTE, the same mechanism + * is used to migrate MSI irq's in the presence of interrupt-remapping. + */ +static int +intel_ioapic_set_affinity(struct irq_data *data, const struct cpumask *mask, + bool force) +{ + struct irq_cfg *cfg = irqd_cfg(data); + unsigned int dest, irq = data->irq; + struct irte irte; + int err; + + if (!config_enabled(CONFIG_SMP)) + return -EINVAL; + + if (!cpumask_intersects(mask, cpu_online_mask)) + return -EINVAL; + + if (get_irte(irq, &irte)) + return -EBUSY; + + err = assign_irq_vector(irq, cfg, mask); + if (err) + return err; + + err = apic->cpu_mask_to_apicid_and(cfg->domain, mask, &dest); + if (err) { + if (assign_irq_vector(irq, cfg, data->affinity)) + pr_err("Failed to recover vector for irq %d\n", irq); + return err; + } + + irte.vector = cfg->vector; + irte.dest_id = IRTE_DEST(dest); + + /* + * Atomically updates the IRTE with the new destination, vector + * and flushes the interrupt entry cache. + */ + modify_irte(irq, &irte); + + /* + * After this point, all the interrupts will start arriving + * at the new destination. So, time to cleanup the previous + * vector allocation. + */ + if (cfg->move_in_progress) + send_cleanup_vector(cfg); + + cpumask_copy(data->affinity, mask); + return 0; +} + +static void intel_compose_msi_msg(struct pci_dev *pdev, + unsigned int irq, unsigned int dest, + struct msi_msg *msg, u8 hpet_id) +{ + struct irq_cfg *cfg; + struct irte irte; + u16 sub_handle = 0; + int ir_index; + + cfg = irq_cfg(irq); + + ir_index = map_irq_to_irte_handle(irq, &sub_handle); + BUG_ON(ir_index == -1); + + prepare_irte(&irte, cfg->vector, dest); + + /* Set source-id of interrupt request */ + if (pdev) + set_msi_sid(&irte, pdev); + else + set_hpet_sid(&irte, hpet_id); + + modify_irte(irq, &irte); + + msg->address_hi = MSI_ADDR_BASE_HI; + msg->data = sub_handle; + msg->address_lo = MSI_ADDR_BASE_LO | MSI_ADDR_IR_EXT_INT | + MSI_ADDR_IR_SHV | + MSI_ADDR_IR_INDEX1(ir_index) | + MSI_ADDR_IR_INDEX2(ir_index); +} + +/* + * Map the PCI dev to the corresponding remapping hardware unit + * and allocate 'nvec' consecutive interrupt-remapping table entries + * in it. + */ +static int intel_msi_alloc_irq(struct pci_dev *dev, int irq, int nvec) +{ + struct intel_iommu *iommu; + int index; + + down_read(&dmar_global_lock); + iommu = map_dev_to_ir(dev); + if (!iommu) { + printk(KERN_ERR + "Unable to map PCI %s to iommu\n", pci_name(dev)); + index = -ENOENT; + } else { + index = alloc_irte(iommu, irq, nvec); + if (index < 0) { + printk(KERN_ERR + "Unable to allocate %d IRTE for PCI %s\n", + nvec, pci_name(dev)); + index = -ENOSPC; + } + } + up_read(&dmar_global_lock); + + return index; +} + +static int intel_msi_setup_irq(struct pci_dev *pdev, unsigned int irq, + int index, int sub_handle) +{ + struct intel_iommu *iommu; + int ret = -ENOENT; + + down_read(&dmar_global_lock); + iommu = map_dev_to_ir(pdev); + if (iommu) { + /* + * setup the mapping between the irq and the IRTE + * base index, the sub_handle pointing to the + * appropriate interrupt remap table entry. + */ + set_irte_irq(irq, iommu, index, sub_handle); + ret = 0; + } + up_read(&dmar_global_lock); + + return ret; +} + +static int intel_alloc_hpet_msi(unsigned int irq, unsigned int id) +{ + int ret = -1; + struct intel_iommu *iommu; + int index; + + down_read(&dmar_global_lock); + iommu = map_hpet_to_ir(id); + if (iommu) { + index = alloc_irte(iommu, irq, 1); + if (index >= 0) + ret = 0; + } + up_read(&dmar_global_lock); + + return ret; +} + +struct irq_remap_ops intel_irq_remap_ops = { + .prepare = intel_prepare_irq_remapping, + .enable = intel_enable_irq_remapping, + .disable = disable_irq_remapping, + .reenable = reenable_irq_remapping, + .enable_faulting = enable_drhd_fault_handling, + .setup_ioapic_entry = intel_setup_ioapic_entry, + .set_affinity = intel_ioapic_set_affinity, + .free_irq = free_irte, + .compose_msi_msg = intel_compose_msi_msg, + .msi_alloc_irq = intel_msi_alloc_irq, + .msi_setup_irq = intel_msi_setup_irq, + .alloc_hpet_msi = intel_alloc_hpet_msi, +}; + +/* + * Support of Interrupt Remapping Unit Hotplug + */ +static int dmar_ir_add(struct dmar_drhd_unit *dmaru, struct intel_iommu *iommu) +{ + int ret; + int eim = x2apic_enabled(); + + if (eim && !ecap_eim_support(iommu->ecap)) { + pr_info("DRHD %Lx: EIM not supported by DRHD, ecap %Lx\n", + iommu->reg_phys, iommu->ecap); + return -ENODEV; + } + + if (ir_parse_ioapic_hpet_scope(dmaru->hdr, iommu)) { + pr_warn("DRHD %Lx: failed to parse managed IOAPIC/HPET\n", + iommu->reg_phys); + return -ENODEV; + } + + /* TODO: check all IOAPICs are covered by IOMMU */ + + /* Setup Interrupt-remapping now. */ + ret = intel_setup_irq_remapping(iommu); + if (ret) { + pr_err("DRHD %Lx: failed to allocate resource\n", + iommu->reg_phys); + ir_remove_ioapic_hpet_scope(iommu); + return ret; + } + + if (!iommu->qi) { + /* Clear previous faults. */ + dmar_fault(-1, iommu); + iommu_disable_irq_remapping(iommu); + dmar_disable_qi(iommu); + } + + /* Enable queued invalidation */ + ret = dmar_enable_qi(iommu); + if (!ret) { + iommu_set_irq_remapping(iommu, eim); + } else { + pr_err("DRHD %Lx: failed to enable queued invalidation, ecap %Lx, ret %d\n", + iommu->reg_phys, iommu->ecap, ret); + intel_teardown_irq_remapping(iommu); + ir_remove_ioapic_hpet_scope(iommu); + } + + return ret; +} + +int dmar_ir_hotplug(struct dmar_drhd_unit *dmaru, bool insert) +{ + int ret = 0; + struct intel_iommu *iommu = dmaru->iommu; + + if (!irq_remapping_enabled) + return 0; + if (iommu == NULL) + return -EINVAL; + if (!ecap_ir_support(iommu->ecap)) + return 0; + + if (insert) { + if (!iommu->ir_table) + ret = dmar_ir_add(dmaru, iommu); + } else { + if (iommu->ir_table) { + if (!bitmap_empty(iommu->ir_table->bitmap, + INTR_REMAP_TABLE_ENTRIES)) { + ret = -EBUSY; + } else { + iommu_disable_irq_remapping(iommu); + intel_teardown_irq_remapping(iommu); + ir_remove_ioapic_hpet_scope(iommu); + } + } + } + + return ret; +} diff --git a/drivers/iommu/io-pgtable-arm.c b/drivers/iommu/io-pgtable-arm.c new file mode 100644 index 000000000..4e460216b --- /dev/null +++ b/drivers/iommu/io-pgtable-arm.c @@ -0,0 +1,992 @@ +/* + * CPU-agnostic ARM page table allocator. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program. If not, see <http://www.gnu.org/licenses/>. + * + * Copyright (C) 2014 ARM Limited + * + * Author: Will Deacon <will.deacon@arm.com> + */ + +#define pr_fmt(fmt) "arm-lpae io-pgtable: " fmt + +#include <linux/iommu.h> +#include <linux/kernel.h> +#include <linux/sizes.h> +#include <linux/slab.h> +#include <linux/types.h> + +#include "io-pgtable.h" + +#define ARM_LPAE_MAX_ADDR_BITS 48 +#define ARM_LPAE_S2_MAX_CONCAT_PAGES 16 +#define ARM_LPAE_MAX_LEVELS 4 + +/* Struct accessors */ +#define io_pgtable_to_data(x) \ + container_of((x), struct arm_lpae_io_pgtable, iop) + +#define io_pgtable_ops_to_pgtable(x) \ + container_of((x), struct io_pgtable, ops) + +#define io_pgtable_ops_to_data(x) \ + io_pgtable_to_data(io_pgtable_ops_to_pgtable(x)) + +/* + * For consistency with the architecture, we always consider + * ARM_LPAE_MAX_LEVELS levels, with the walk starting at level n >=0 + */ +#define ARM_LPAE_START_LVL(d) (ARM_LPAE_MAX_LEVELS - (d)->levels) + +/* + * Calculate the right shift amount to get to the portion describing level l + * in a virtual address mapped by the pagetable in d. + */ +#define ARM_LPAE_LVL_SHIFT(l,d) \ + ((((d)->levels - ((l) - ARM_LPAE_START_LVL(d) + 1)) \ + * (d)->bits_per_level) + (d)->pg_shift) + +#define ARM_LPAE_PAGES_PER_PGD(d) \ + DIV_ROUND_UP((d)->pgd_size, 1UL << (d)->pg_shift) + +/* + * Calculate the index at level l used to map virtual address a using the + * pagetable in d. + */ +#define ARM_LPAE_PGD_IDX(l,d) \ + ((l) == ARM_LPAE_START_LVL(d) ? ilog2(ARM_LPAE_PAGES_PER_PGD(d)) : 0) + +#define ARM_LPAE_LVL_IDX(a,l,d) \ + (((u64)(a) >> ARM_LPAE_LVL_SHIFT(l,d)) & \ + ((1 << ((d)->bits_per_level + ARM_LPAE_PGD_IDX(l,d))) - 1)) + +/* Calculate the block/page mapping size at level l for pagetable in d. */ +#define ARM_LPAE_BLOCK_SIZE(l,d) \ + (1 << (ilog2(sizeof(arm_lpae_iopte)) + \ + ((ARM_LPAE_MAX_LEVELS - (l)) * (d)->bits_per_level))) + +/* Page table bits */ +#define ARM_LPAE_PTE_TYPE_SHIFT 0 +#define ARM_LPAE_PTE_TYPE_MASK 0x3 + +#define ARM_LPAE_PTE_TYPE_BLOCK 1 +#define ARM_LPAE_PTE_TYPE_TABLE 3 +#define ARM_LPAE_PTE_TYPE_PAGE 3 + +#define ARM_LPAE_PTE_NSTABLE (((arm_lpae_iopte)1) << 63) +#define ARM_LPAE_PTE_XN (((arm_lpae_iopte)3) << 53) +#define ARM_LPAE_PTE_AF (((arm_lpae_iopte)1) << 10) +#define ARM_LPAE_PTE_SH_NS (((arm_lpae_iopte)0) << 8) +#define ARM_LPAE_PTE_SH_OS (((arm_lpae_iopte)2) << 8) +#define ARM_LPAE_PTE_SH_IS (((arm_lpae_iopte)3) << 8) +#define ARM_LPAE_PTE_NS (((arm_lpae_iopte)1) << 5) +#define ARM_LPAE_PTE_VALID (((arm_lpae_iopte)1) << 0) + +#define ARM_LPAE_PTE_ATTR_LO_MASK (((arm_lpae_iopte)0x3ff) << 2) +/* Ignore the contiguous bit for block splitting */ +#define ARM_LPAE_PTE_ATTR_HI_MASK (((arm_lpae_iopte)6) << 52) +#define ARM_LPAE_PTE_ATTR_MASK (ARM_LPAE_PTE_ATTR_LO_MASK | \ + ARM_LPAE_PTE_ATTR_HI_MASK) + +/* Stage-1 PTE */ +#define ARM_LPAE_PTE_AP_UNPRIV (((arm_lpae_iopte)1) << 6) +#define ARM_LPAE_PTE_AP_RDONLY (((arm_lpae_iopte)2) << 6) +#define ARM_LPAE_PTE_ATTRINDX_SHIFT 2 +#define ARM_LPAE_PTE_nG (((arm_lpae_iopte)1) << 11) + +/* Stage-2 PTE */ +#define ARM_LPAE_PTE_HAP_FAULT (((arm_lpae_iopte)0) << 6) +#define ARM_LPAE_PTE_HAP_READ (((arm_lpae_iopte)1) << 6) +#define ARM_LPAE_PTE_HAP_WRITE (((arm_lpae_iopte)2) << 6) +#define ARM_LPAE_PTE_MEMATTR_OIWB (((arm_lpae_iopte)0xf) << 2) +#define ARM_LPAE_PTE_MEMATTR_NC (((arm_lpae_iopte)0x5) << 2) +#define ARM_LPAE_PTE_MEMATTR_DEV (((arm_lpae_iopte)0x1) << 2) + +/* Register bits */ +#define ARM_32_LPAE_TCR_EAE (1 << 31) +#define ARM_64_LPAE_S2_TCR_RES1 (1 << 31) + +#define ARM_LPAE_TCR_EPD1 (1 << 23) + +#define ARM_LPAE_TCR_TG0_4K (0 << 14) +#define ARM_LPAE_TCR_TG0_64K (1 << 14) +#define ARM_LPAE_TCR_TG0_16K (2 << 14) + +#define ARM_LPAE_TCR_SH0_SHIFT 12 +#define ARM_LPAE_TCR_SH0_MASK 0x3 +#define ARM_LPAE_TCR_SH_NS 0 +#define ARM_LPAE_TCR_SH_OS 2 +#define ARM_LPAE_TCR_SH_IS 3 + +#define ARM_LPAE_TCR_ORGN0_SHIFT 10 +#define ARM_LPAE_TCR_IRGN0_SHIFT 8 +#define ARM_LPAE_TCR_RGN_MASK 0x3 +#define ARM_LPAE_TCR_RGN_NC 0 +#define ARM_LPAE_TCR_RGN_WBWA 1 +#define ARM_LPAE_TCR_RGN_WT 2 +#define ARM_LPAE_TCR_RGN_WB 3 + +#define ARM_LPAE_TCR_SL0_SHIFT 6 +#define ARM_LPAE_TCR_SL0_MASK 0x3 + +#define ARM_LPAE_TCR_T0SZ_SHIFT 0 +#define ARM_LPAE_TCR_SZ_MASK 0xf + +#define ARM_LPAE_TCR_PS_SHIFT 16 +#define ARM_LPAE_TCR_PS_MASK 0x7 + +#define ARM_LPAE_TCR_IPS_SHIFT 32 +#define ARM_LPAE_TCR_IPS_MASK 0x7 + +#define ARM_LPAE_TCR_PS_32_BIT 0x0ULL +#define ARM_LPAE_TCR_PS_36_BIT 0x1ULL +#define ARM_LPAE_TCR_PS_40_BIT 0x2ULL +#define ARM_LPAE_TCR_PS_42_BIT 0x3ULL +#define ARM_LPAE_TCR_PS_44_BIT 0x4ULL +#define ARM_LPAE_TCR_PS_48_BIT 0x5ULL + +#define ARM_LPAE_MAIR_ATTR_SHIFT(n) ((n) << 3) +#define ARM_LPAE_MAIR_ATTR_MASK 0xff +#define ARM_LPAE_MAIR_ATTR_DEVICE 0x04 +#define ARM_LPAE_MAIR_ATTR_NC 0x44 +#define ARM_LPAE_MAIR_ATTR_WBRWA 0xff +#define ARM_LPAE_MAIR_ATTR_IDX_NC 0 +#define ARM_LPAE_MAIR_ATTR_IDX_CACHE 1 +#define ARM_LPAE_MAIR_ATTR_IDX_DEV 2 + +/* IOPTE accessors */ +#define iopte_deref(pte,d) \ + (__va((pte) & ((1ULL << ARM_LPAE_MAX_ADDR_BITS) - 1) \ + & ~((1ULL << (d)->pg_shift) - 1))) + +#define iopte_type(pte,l) \ + (((pte) >> ARM_LPAE_PTE_TYPE_SHIFT) & ARM_LPAE_PTE_TYPE_MASK) + +#define iopte_prot(pte) ((pte) & ARM_LPAE_PTE_ATTR_MASK) + +#define iopte_leaf(pte,l) \ + (l == (ARM_LPAE_MAX_LEVELS - 1) ? \ + (iopte_type(pte,l) == ARM_LPAE_PTE_TYPE_PAGE) : \ + (iopte_type(pte,l) == ARM_LPAE_PTE_TYPE_BLOCK)) + +#define iopte_to_pfn(pte,d) \ + (((pte) & ((1ULL << ARM_LPAE_MAX_ADDR_BITS) - 1)) >> (d)->pg_shift) + +#define pfn_to_iopte(pfn,d) \ + (((pfn) << (d)->pg_shift) & ((1ULL << ARM_LPAE_MAX_ADDR_BITS) - 1)) + +struct arm_lpae_io_pgtable { + struct io_pgtable iop; + + int levels; + size_t pgd_size; + unsigned long pg_shift; + unsigned long bits_per_level; + + void *pgd; +}; + +typedef u64 arm_lpae_iopte; + +static bool selftest_running = false; + +static int arm_lpae_init_pte(struct arm_lpae_io_pgtable *data, + unsigned long iova, phys_addr_t paddr, + arm_lpae_iopte prot, int lvl, + arm_lpae_iopte *ptep) +{ + arm_lpae_iopte pte = prot; + + /* We require an unmap first */ + if (iopte_leaf(*ptep, lvl)) { + WARN_ON(!selftest_running); + return -EEXIST; + } + + if (data->iop.cfg.quirks & IO_PGTABLE_QUIRK_ARM_NS) + pte |= ARM_LPAE_PTE_NS; + + if (lvl == ARM_LPAE_MAX_LEVELS - 1) + pte |= ARM_LPAE_PTE_TYPE_PAGE; + else + pte |= ARM_LPAE_PTE_TYPE_BLOCK; + + pte |= ARM_LPAE_PTE_AF | ARM_LPAE_PTE_SH_IS; + pte |= pfn_to_iopte(paddr >> data->pg_shift, data); + + *ptep = pte; + data->iop.cfg.tlb->flush_pgtable(ptep, sizeof(*ptep), data->iop.cookie); + return 0; +} + +static int __arm_lpae_map(struct arm_lpae_io_pgtable *data, unsigned long iova, + phys_addr_t paddr, size_t size, arm_lpae_iopte prot, + int lvl, arm_lpae_iopte *ptep) +{ + arm_lpae_iopte *cptep, pte; + void *cookie = data->iop.cookie; + size_t block_size = ARM_LPAE_BLOCK_SIZE(lvl, data); + + /* Find our entry at the current level */ + ptep += ARM_LPAE_LVL_IDX(iova, lvl, data); + + /* If we can install a leaf entry at this level, then do so */ + if (size == block_size && (size & data->iop.cfg.pgsize_bitmap)) + return arm_lpae_init_pte(data, iova, paddr, prot, lvl, ptep); + + /* We can't allocate tables at the final level */ + if (WARN_ON(lvl >= ARM_LPAE_MAX_LEVELS - 1)) + return -EINVAL; + + /* Grab a pointer to the next level */ + pte = *ptep; + if (!pte) { + cptep = alloc_pages_exact(1UL << data->pg_shift, + GFP_ATOMIC | __GFP_ZERO); + if (!cptep) + return -ENOMEM; + + data->iop.cfg.tlb->flush_pgtable(cptep, 1UL << data->pg_shift, + cookie); + pte = __pa(cptep) | ARM_LPAE_PTE_TYPE_TABLE; + if (data->iop.cfg.quirks & IO_PGTABLE_QUIRK_ARM_NS) + pte |= ARM_LPAE_PTE_NSTABLE; + *ptep = pte; + data->iop.cfg.tlb->flush_pgtable(ptep, sizeof(*ptep), cookie); + } else { + cptep = iopte_deref(pte, data); + } + + /* Rinse, repeat */ + return __arm_lpae_map(data, iova, paddr, size, prot, lvl + 1, cptep); +} + +static arm_lpae_iopte arm_lpae_prot_to_pte(struct arm_lpae_io_pgtable *data, + int prot) +{ + arm_lpae_iopte pte; + + if (data->iop.fmt == ARM_64_LPAE_S1 || + data->iop.fmt == ARM_32_LPAE_S1) { + pte = ARM_LPAE_PTE_AP_UNPRIV | ARM_LPAE_PTE_nG; + + if (!(prot & IOMMU_WRITE) && (prot & IOMMU_READ)) + pte |= ARM_LPAE_PTE_AP_RDONLY; + + if (prot & IOMMU_CACHE) + pte |= (ARM_LPAE_MAIR_ATTR_IDX_CACHE + << ARM_LPAE_PTE_ATTRINDX_SHIFT); + } else { + pte = ARM_LPAE_PTE_HAP_FAULT; + if (prot & IOMMU_READ) + pte |= ARM_LPAE_PTE_HAP_READ; + if (prot & IOMMU_WRITE) + pte |= ARM_LPAE_PTE_HAP_WRITE; + if (prot & IOMMU_CACHE) + pte |= ARM_LPAE_PTE_MEMATTR_OIWB; + else + pte |= ARM_LPAE_PTE_MEMATTR_NC; + } + + if (prot & IOMMU_NOEXEC) + pte |= ARM_LPAE_PTE_XN; + + return pte; +} + +static int arm_lpae_map(struct io_pgtable_ops *ops, unsigned long iova, + phys_addr_t paddr, size_t size, int iommu_prot) +{ + struct arm_lpae_io_pgtable *data = io_pgtable_ops_to_data(ops); + arm_lpae_iopte *ptep = data->pgd; + int lvl = ARM_LPAE_START_LVL(data); + arm_lpae_iopte prot; + + /* If no access, then nothing to do */ + if (!(iommu_prot & (IOMMU_READ | IOMMU_WRITE))) + return 0; + + prot = arm_lpae_prot_to_pte(data, iommu_prot); + return __arm_lpae_map(data, iova, paddr, size, prot, lvl, ptep); +} + +static void __arm_lpae_free_pgtable(struct arm_lpae_io_pgtable *data, int lvl, + arm_lpae_iopte *ptep) +{ + arm_lpae_iopte *start, *end; + unsigned long table_size; + + /* Only leaf entries at the last level */ + if (lvl == ARM_LPAE_MAX_LEVELS - 1) + return; + + if (lvl == ARM_LPAE_START_LVL(data)) + table_size = data->pgd_size; + else + table_size = 1UL << data->pg_shift; + + start = ptep; + end = (void *)ptep + table_size; + + while (ptep != end) { + arm_lpae_iopte pte = *ptep++; + + if (!pte || iopte_leaf(pte, lvl)) + continue; + + __arm_lpae_free_pgtable(data, lvl + 1, iopte_deref(pte, data)); + } + + free_pages_exact(start, table_size); +} + +static void arm_lpae_free_pgtable(struct io_pgtable *iop) +{ + struct arm_lpae_io_pgtable *data = io_pgtable_to_data(iop); + + __arm_lpae_free_pgtable(data, ARM_LPAE_START_LVL(data), data->pgd); + kfree(data); +} + +static int arm_lpae_split_blk_unmap(struct arm_lpae_io_pgtable *data, + unsigned long iova, size_t size, + arm_lpae_iopte prot, int lvl, + arm_lpae_iopte *ptep, size_t blk_size) +{ + unsigned long blk_start, blk_end; + phys_addr_t blk_paddr; + arm_lpae_iopte table = 0; + void *cookie = data->iop.cookie; + const struct iommu_gather_ops *tlb = data->iop.cfg.tlb; + + blk_start = iova & ~(blk_size - 1); + blk_end = blk_start + blk_size; + blk_paddr = iopte_to_pfn(*ptep, data) << data->pg_shift; + + for (; blk_start < blk_end; blk_start += size, blk_paddr += size) { + arm_lpae_iopte *tablep; + + /* Unmap! */ + if (blk_start == iova) + continue; + + /* __arm_lpae_map expects a pointer to the start of the table */ + tablep = &table - ARM_LPAE_LVL_IDX(blk_start, lvl, data); + if (__arm_lpae_map(data, blk_start, blk_paddr, size, prot, lvl, + tablep) < 0) { + if (table) { + /* Free the table we allocated */ + tablep = iopte_deref(table, data); + __arm_lpae_free_pgtable(data, lvl + 1, tablep); + } + return 0; /* Bytes unmapped */ + } + } + + *ptep = table; + tlb->flush_pgtable(ptep, sizeof(*ptep), cookie); + iova &= ~(blk_size - 1); + tlb->tlb_add_flush(iova, blk_size, true, cookie); + return size; +} + +static int __arm_lpae_unmap(struct arm_lpae_io_pgtable *data, + unsigned long iova, size_t size, int lvl, + arm_lpae_iopte *ptep) +{ + arm_lpae_iopte pte; + const struct iommu_gather_ops *tlb = data->iop.cfg.tlb; + void *cookie = data->iop.cookie; + size_t blk_size = ARM_LPAE_BLOCK_SIZE(lvl, data); + + ptep += ARM_LPAE_LVL_IDX(iova, lvl, data); + pte = *ptep; + + /* Something went horribly wrong and we ran out of page table */ + if (WARN_ON(!pte || (lvl == ARM_LPAE_MAX_LEVELS))) + return 0; + + /* If the size matches this level, we're in the right place */ + if (size == blk_size) { + *ptep = 0; + tlb->flush_pgtable(ptep, sizeof(*ptep), cookie); + + if (!iopte_leaf(pte, lvl)) { + /* Also flush any partial walks */ + tlb->tlb_add_flush(iova, size, false, cookie); + tlb->tlb_sync(data->iop.cookie); + ptep = iopte_deref(pte, data); + __arm_lpae_free_pgtable(data, lvl + 1, ptep); + } else { + tlb->tlb_add_flush(iova, size, true, cookie); + } + + return size; + } else if (iopte_leaf(pte, lvl)) { + /* + * Insert a table at the next level to map the old region, + * minus the part we want to unmap + */ + return arm_lpae_split_blk_unmap(data, iova, size, + iopte_prot(pte), lvl, ptep, + blk_size); + } + + /* Keep on walkin' */ + ptep = iopte_deref(pte, data); + return __arm_lpae_unmap(data, iova, size, lvl + 1, ptep); +} + +static int arm_lpae_unmap(struct io_pgtable_ops *ops, unsigned long iova, + size_t size) +{ + size_t unmapped; + struct arm_lpae_io_pgtable *data = io_pgtable_ops_to_data(ops); + struct io_pgtable *iop = &data->iop; + arm_lpae_iopte *ptep = data->pgd; + int lvl = ARM_LPAE_START_LVL(data); + + unmapped = __arm_lpae_unmap(data, iova, size, lvl, ptep); + if (unmapped) + iop->cfg.tlb->tlb_sync(iop->cookie); + + return unmapped; +} + +static phys_addr_t arm_lpae_iova_to_phys(struct io_pgtable_ops *ops, + unsigned long iova) +{ + struct arm_lpae_io_pgtable *data = io_pgtable_ops_to_data(ops); + arm_lpae_iopte pte, *ptep = data->pgd; + int lvl = ARM_LPAE_START_LVL(data); + + do { + /* Valid IOPTE pointer? */ + if (!ptep) + return 0; + + /* Grab the IOPTE we're interested in */ + pte = *(ptep + ARM_LPAE_LVL_IDX(iova, lvl, data)); + + /* Valid entry? */ + if (!pte) + return 0; + + /* Leaf entry? */ + if (iopte_leaf(pte,lvl)) + goto found_translation; + + /* Take it to the next level */ + ptep = iopte_deref(pte, data); + } while (++lvl < ARM_LPAE_MAX_LEVELS); + + /* Ran out of page tables to walk */ + return 0; + +found_translation: + iova &= ((1 << data->pg_shift) - 1); + return ((phys_addr_t)iopte_to_pfn(pte,data) << data->pg_shift) | iova; +} + +static void arm_lpae_restrict_pgsizes(struct io_pgtable_cfg *cfg) +{ + unsigned long granule; + + /* + * We need to restrict the supported page sizes to match the + * translation regime for a particular granule. Aim to match + * the CPU page size if possible, otherwise prefer smaller sizes. + * While we're at it, restrict the block sizes to match the + * chosen granule. + */ + if (cfg->pgsize_bitmap & PAGE_SIZE) + granule = PAGE_SIZE; + else if (cfg->pgsize_bitmap & ~PAGE_MASK) + granule = 1UL << __fls(cfg->pgsize_bitmap & ~PAGE_MASK); + else if (cfg->pgsize_bitmap & PAGE_MASK) + granule = 1UL << __ffs(cfg->pgsize_bitmap & PAGE_MASK); + else + granule = 0; + + switch (granule) { + case SZ_4K: + cfg->pgsize_bitmap &= (SZ_4K | SZ_2M | SZ_1G); + break; + case SZ_16K: + cfg->pgsize_bitmap &= (SZ_16K | SZ_32M); + break; + case SZ_64K: + cfg->pgsize_bitmap &= (SZ_64K | SZ_512M); + break; + default: + cfg->pgsize_bitmap = 0; + } +} + +static struct arm_lpae_io_pgtable * +arm_lpae_alloc_pgtable(struct io_pgtable_cfg *cfg) +{ + unsigned long va_bits, pgd_bits; + struct arm_lpae_io_pgtable *data; + + arm_lpae_restrict_pgsizes(cfg); + + if (!(cfg->pgsize_bitmap & (SZ_4K | SZ_16K | SZ_64K))) + return NULL; + + if (cfg->ias > ARM_LPAE_MAX_ADDR_BITS) + return NULL; + + if (cfg->oas > ARM_LPAE_MAX_ADDR_BITS) + return NULL; + + data = kmalloc(sizeof(*data), GFP_KERNEL); + if (!data) + return NULL; + + data->pg_shift = __ffs(cfg->pgsize_bitmap); + data->bits_per_level = data->pg_shift - ilog2(sizeof(arm_lpae_iopte)); + + va_bits = cfg->ias - data->pg_shift; + data->levels = DIV_ROUND_UP(va_bits, data->bits_per_level); + + /* Calculate the actual size of our pgd (without concatenation) */ + pgd_bits = va_bits - (data->bits_per_level * (data->levels - 1)); + data->pgd_size = 1UL << (pgd_bits + ilog2(sizeof(arm_lpae_iopte))); + + data->iop.ops = (struct io_pgtable_ops) { + .map = arm_lpae_map, + .unmap = arm_lpae_unmap, + .iova_to_phys = arm_lpae_iova_to_phys, + }; + + return data; +} + +static struct io_pgtable * +arm_64_lpae_alloc_pgtable_s1(struct io_pgtable_cfg *cfg, void *cookie) +{ + u64 reg; + struct arm_lpae_io_pgtable *data = arm_lpae_alloc_pgtable(cfg); + + if (!data) + return NULL; + + /* TCR */ + reg = (ARM_LPAE_TCR_SH_IS << ARM_LPAE_TCR_SH0_SHIFT) | + (ARM_LPAE_TCR_RGN_WBWA << ARM_LPAE_TCR_IRGN0_SHIFT) | + (ARM_LPAE_TCR_RGN_WBWA << ARM_LPAE_TCR_ORGN0_SHIFT); + + switch (1 << data->pg_shift) { + case SZ_4K: + reg |= ARM_LPAE_TCR_TG0_4K; + break; + case SZ_16K: + reg |= ARM_LPAE_TCR_TG0_16K; + break; + case SZ_64K: + reg |= ARM_LPAE_TCR_TG0_64K; + break; + } + + switch (cfg->oas) { + case 32: + reg |= (ARM_LPAE_TCR_PS_32_BIT << ARM_LPAE_TCR_IPS_SHIFT); + break; + case 36: + reg |= (ARM_LPAE_TCR_PS_36_BIT << ARM_LPAE_TCR_IPS_SHIFT); + break; + case 40: + reg |= (ARM_LPAE_TCR_PS_40_BIT << ARM_LPAE_TCR_IPS_SHIFT); + break; + case 42: + reg |= (ARM_LPAE_TCR_PS_42_BIT << ARM_LPAE_TCR_IPS_SHIFT); + break; + case 44: + reg |= (ARM_LPAE_TCR_PS_44_BIT << ARM_LPAE_TCR_IPS_SHIFT); + break; + case 48: + reg |= (ARM_LPAE_TCR_PS_48_BIT << ARM_LPAE_TCR_IPS_SHIFT); + break; + default: + goto out_free_data; + } + + reg |= (64ULL - cfg->ias) << ARM_LPAE_TCR_T0SZ_SHIFT; + + /* Disable speculative walks through TTBR1 */ + reg |= ARM_LPAE_TCR_EPD1; + cfg->arm_lpae_s1_cfg.tcr = reg; + + /* MAIRs */ + reg = (ARM_LPAE_MAIR_ATTR_NC + << ARM_LPAE_MAIR_ATTR_SHIFT(ARM_LPAE_MAIR_ATTR_IDX_NC)) | + (ARM_LPAE_MAIR_ATTR_WBRWA + << ARM_LPAE_MAIR_ATTR_SHIFT(ARM_LPAE_MAIR_ATTR_IDX_CACHE)) | + (ARM_LPAE_MAIR_ATTR_DEVICE + << ARM_LPAE_MAIR_ATTR_SHIFT(ARM_LPAE_MAIR_ATTR_IDX_DEV)); + + cfg->arm_lpae_s1_cfg.mair[0] = reg; + cfg->arm_lpae_s1_cfg.mair[1] = 0; + + /* Looking good; allocate a pgd */ + data->pgd = alloc_pages_exact(data->pgd_size, GFP_KERNEL | __GFP_ZERO); + if (!data->pgd) + goto out_free_data; + + cfg->tlb->flush_pgtable(data->pgd, data->pgd_size, cookie); + + /* TTBRs */ + cfg->arm_lpae_s1_cfg.ttbr[0] = virt_to_phys(data->pgd); + cfg->arm_lpae_s1_cfg.ttbr[1] = 0; + return &data->iop; + +out_free_data: + kfree(data); + return NULL; +} + +static struct io_pgtable * +arm_64_lpae_alloc_pgtable_s2(struct io_pgtable_cfg *cfg, void *cookie) +{ + u64 reg, sl; + struct arm_lpae_io_pgtable *data = arm_lpae_alloc_pgtable(cfg); + + if (!data) + return NULL; + + /* + * Concatenate PGDs at level 1 if possible in order to reduce + * the depth of the stage-2 walk. + */ + if (data->levels == ARM_LPAE_MAX_LEVELS) { + unsigned long pgd_pages; + + pgd_pages = data->pgd_size >> ilog2(sizeof(arm_lpae_iopte)); + if (pgd_pages <= ARM_LPAE_S2_MAX_CONCAT_PAGES) { + data->pgd_size = pgd_pages << data->pg_shift; + data->levels--; + } + } + + /* VTCR */ + reg = ARM_64_LPAE_S2_TCR_RES1 | + (ARM_LPAE_TCR_SH_IS << ARM_LPAE_TCR_SH0_SHIFT) | + (ARM_LPAE_TCR_RGN_WBWA << ARM_LPAE_TCR_IRGN0_SHIFT) | + (ARM_LPAE_TCR_RGN_WBWA << ARM_LPAE_TCR_ORGN0_SHIFT); + + sl = ARM_LPAE_START_LVL(data); + + switch (1 << data->pg_shift) { + case SZ_4K: + reg |= ARM_LPAE_TCR_TG0_4K; + sl++; /* SL0 format is different for 4K granule size */ + break; + case SZ_16K: + reg |= ARM_LPAE_TCR_TG0_16K; + break; + case SZ_64K: + reg |= ARM_LPAE_TCR_TG0_64K; + break; + } + + switch (cfg->oas) { + case 32: + reg |= (ARM_LPAE_TCR_PS_32_BIT << ARM_LPAE_TCR_PS_SHIFT); + break; + case 36: + reg |= (ARM_LPAE_TCR_PS_36_BIT << ARM_LPAE_TCR_PS_SHIFT); + break; + case 40: + reg |= (ARM_LPAE_TCR_PS_40_BIT << ARM_LPAE_TCR_PS_SHIFT); + break; + case 42: + reg |= (ARM_LPAE_TCR_PS_42_BIT << ARM_LPAE_TCR_PS_SHIFT); + break; + case 44: + reg |= (ARM_LPAE_TCR_PS_44_BIT << ARM_LPAE_TCR_PS_SHIFT); + break; + case 48: + reg |= (ARM_LPAE_TCR_PS_48_BIT << ARM_LPAE_TCR_PS_SHIFT); + break; + default: + goto out_free_data; + } + + reg |= (64ULL - cfg->ias) << ARM_LPAE_TCR_T0SZ_SHIFT; + reg |= (~sl & ARM_LPAE_TCR_SL0_MASK) << ARM_LPAE_TCR_SL0_SHIFT; + cfg->arm_lpae_s2_cfg.vtcr = reg; + + /* Allocate pgd pages */ + data->pgd = alloc_pages_exact(data->pgd_size, GFP_KERNEL | __GFP_ZERO); + if (!data->pgd) + goto out_free_data; + + cfg->tlb->flush_pgtable(data->pgd, data->pgd_size, cookie); + + /* VTTBR */ + cfg->arm_lpae_s2_cfg.vttbr = virt_to_phys(data->pgd); + return &data->iop; + +out_free_data: + kfree(data); + return NULL; +} + +static struct io_pgtable * +arm_32_lpae_alloc_pgtable_s1(struct io_pgtable_cfg *cfg, void *cookie) +{ + struct io_pgtable *iop; + + if (cfg->ias > 32 || cfg->oas > 40) + return NULL; + + cfg->pgsize_bitmap &= (SZ_4K | SZ_2M | SZ_1G); + iop = arm_64_lpae_alloc_pgtable_s1(cfg, cookie); + if (iop) { + cfg->arm_lpae_s1_cfg.tcr |= ARM_32_LPAE_TCR_EAE; + cfg->arm_lpae_s1_cfg.tcr &= 0xffffffff; + } + + return iop; +} + +static struct io_pgtable * +arm_32_lpae_alloc_pgtable_s2(struct io_pgtable_cfg *cfg, void *cookie) +{ + struct io_pgtable *iop; + + if (cfg->ias > 40 || cfg->oas > 40) + return NULL; + + cfg->pgsize_bitmap &= (SZ_4K | SZ_2M | SZ_1G); + iop = arm_64_lpae_alloc_pgtable_s2(cfg, cookie); + if (iop) + cfg->arm_lpae_s2_cfg.vtcr &= 0xffffffff; + + return iop; +} + +struct io_pgtable_init_fns io_pgtable_arm_64_lpae_s1_init_fns = { + .alloc = arm_64_lpae_alloc_pgtable_s1, + .free = arm_lpae_free_pgtable, +}; + +struct io_pgtable_init_fns io_pgtable_arm_64_lpae_s2_init_fns = { + .alloc = arm_64_lpae_alloc_pgtable_s2, + .free = arm_lpae_free_pgtable, +}; + +struct io_pgtable_init_fns io_pgtable_arm_32_lpae_s1_init_fns = { + .alloc = arm_32_lpae_alloc_pgtable_s1, + .free = arm_lpae_free_pgtable, +}; + +struct io_pgtable_init_fns io_pgtable_arm_32_lpae_s2_init_fns = { + .alloc = arm_32_lpae_alloc_pgtable_s2, + .free = arm_lpae_free_pgtable, +}; + +#ifdef CONFIG_IOMMU_IO_PGTABLE_LPAE_SELFTEST + +static struct io_pgtable_cfg *cfg_cookie; + +static void dummy_tlb_flush_all(void *cookie) +{ + WARN_ON(cookie != cfg_cookie); +} + +static void dummy_tlb_add_flush(unsigned long iova, size_t size, bool leaf, + void *cookie) +{ + WARN_ON(cookie != cfg_cookie); + WARN_ON(!(size & cfg_cookie->pgsize_bitmap)); +} + +static void dummy_tlb_sync(void *cookie) +{ + WARN_ON(cookie != cfg_cookie); +} + +static void dummy_flush_pgtable(void *ptr, size_t size, void *cookie) +{ + WARN_ON(cookie != cfg_cookie); +} + +static struct iommu_gather_ops dummy_tlb_ops __initdata = { + .tlb_flush_all = dummy_tlb_flush_all, + .tlb_add_flush = dummy_tlb_add_flush, + .tlb_sync = dummy_tlb_sync, + .flush_pgtable = dummy_flush_pgtable, +}; + +static void __init arm_lpae_dump_ops(struct io_pgtable_ops *ops) +{ + struct arm_lpae_io_pgtable *data = io_pgtable_ops_to_data(ops); + struct io_pgtable_cfg *cfg = &data->iop.cfg; + + pr_err("cfg: pgsize_bitmap 0x%lx, ias %u-bit\n", + cfg->pgsize_bitmap, cfg->ias); + pr_err("data: %d levels, 0x%zx pgd_size, %lu pg_shift, %lu bits_per_level, pgd @ %p\n", + data->levels, data->pgd_size, data->pg_shift, + data->bits_per_level, data->pgd); +} + +#define __FAIL(ops, i) ({ \ + WARN(1, "selftest: test failed for fmt idx %d\n", (i)); \ + arm_lpae_dump_ops(ops); \ + selftest_running = false; \ + -EFAULT; \ +}) + +static int __init arm_lpae_run_tests(struct io_pgtable_cfg *cfg) +{ + static const enum io_pgtable_fmt fmts[] = { + ARM_64_LPAE_S1, + ARM_64_LPAE_S2, + }; + + int i, j; + unsigned long iova; + size_t size; + struct io_pgtable_ops *ops; + + selftest_running = true; + + for (i = 0; i < ARRAY_SIZE(fmts); ++i) { + cfg_cookie = cfg; + ops = alloc_io_pgtable_ops(fmts[i], cfg, cfg); + if (!ops) { + pr_err("selftest: failed to allocate io pgtable ops\n"); + return -ENOMEM; + } + + /* + * Initial sanity checks. + * Empty page tables shouldn't provide any translations. + */ + if (ops->iova_to_phys(ops, 42)) + return __FAIL(ops, i); + + if (ops->iova_to_phys(ops, SZ_1G + 42)) + return __FAIL(ops, i); + + if (ops->iova_to_phys(ops, SZ_2G + 42)) + return __FAIL(ops, i); + + /* + * Distinct mappings of different granule sizes. + */ + iova = 0; + j = find_first_bit(&cfg->pgsize_bitmap, BITS_PER_LONG); + while (j != BITS_PER_LONG) { + size = 1UL << j; + + if (ops->map(ops, iova, iova, size, IOMMU_READ | + IOMMU_WRITE | + IOMMU_NOEXEC | + IOMMU_CACHE)) + return __FAIL(ops, i); + + /* Overlapping mappings */ + if (!ops->map(ops, iova, iova + size, size, + IOMMU_READ | IOMMU_NOEXEC)) + return __FAIL(ops, i); + + if (ops->iova_to_phys(ops, iova + 42) != (iova + 42)) + return __FAIL(ops, i); + + iova += SZ_1G; + j++; + j = find_next_bit(&cfg->pgsize_bitmap, BITS_PER_LONG, j); + } + + /* Partial unmap */ + size = 1UL << __ffs(cfg->pgsize_bitmap); + if (ops->unmap(ops, SZ_1G + size, size) != size) + return __FAIL(ops, i); + + /* Remap of partial unmap */ + if (ops->map(ops, SZ_1G + size, size, size, IOMMU_READ)) + return __FAIL(ops, i); + + if (ops->iova_to_phys(ops, SZ_1G + size + 42) != (size + 42)) + return __FAIL(ops, i); + + /* Full unmap */ + iova = 0; + j = find_first_bit(&cfg->pgsize_bitmap, BITS_PER_LONG); + while (j != BITS_PER_LONG) { + size = 1UL << j; + + if (ops->unmap(ops, iova, size) != size) + return __FAIL(ops, i); + + if (ops->iova_to_phys(ops, iova + 42)) + return __FAIL(ops, i); + + /* Remap full block */ + if (ops->map(ops, iova, iova, size, IOMMU_WRITE)) + return __FAIL(ops, i); + + if (ops->iova_to_phys(ops, iova + 42) != (iova + 42)) + return __FAIL(ops, i); + + iova += SZ_1G; + j++; + j = find_next_bit(&cfg->pgsize_bitmap, BITS_PER_LONG, j); + } + + free_io_pgtable_ops(ops); + } + + selftest_running = false; + return 0; +} + +static int __init arm_lpae_do_selftests(void) +{ + static const unsigned long pgsize[] = { + SZ_4K | SZ_2M | SZ_1G, + SZ_16K | SZ_32M, + SZ_64K | SZ_512M, + }; + + static const unsigned int ias[] = { + 32, 36, 40, 42, 44, 48, + }; + + int i, j, pass = 0, fail = 0; + struct io_pgtable_cfg cfg = { + .tlb = &dummy_tlb_ops, + .oas = 48, + }; + + for (i = 0; i < ARRAY_SIZE(pgsize); ++i) { + for (j = 0; j < ARRAY_SIZE(ias); ++j) { + cfg.pgsize_bitmap = pgsize[i]; + cfg.ias = ias[j]; + pr_info("selftest: pgsize_bitmap 0x%08lx, IAS %u\n", + pgsize[i], ias[j]); + if (arm_lpae_run_tests(&cfg)) + fail++; + else + pass++; + } + } + + pr_info("selftest: completed with %d PASS %d FAIL\n", pass, fail); + return fail ? -EFAULT : 0; +} +subsys_initcall(arm_lpae_do_selftests); +#endif diff --git a/drivers/iommu/io-pgtable.c b/drivers/iommu/io-pgtable.c new file mode 100644 index 000000000..6436fe24b --- /dev/null +++ b/drivers/iommu/io-pgtable.c @@ -0,0 +1,82 @@ +/* + * Generic page table allocator for IOMMUs. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program. If not, see <http://www.gnu.org/licenses/>. + * + * Copyright (C) 2014 ARM Limited + * + * Author: Will Deacon <will.deacon@arm.com> + */ + +#include <linux/bug.h> +#include <linux/kernel.h> +#include <linux/types.h> + +#include "io-pgtable.h" + +extern struct io_pgtable_init_fns io_pgtable_arm_32_lpae_s1_init_fns; +extern struct io_pgtable_init_fns io_pgtable_arm_32_lpae_s2_init_fns; +extern struct io_pgtable_init_fns io_pgtable_arm_64_lpae_s1_init_fns; +extern struct io_pgtable_init_fns io_pgtable_arm_64_lpae_s2_init_fns; + +static const struct io_pgtable_init_fns * +io_pgtable_init_table[IO_PGTABLE_NUM_FMTS] = +{ +#ifdef CONFIG_IOMMU_IO_PGTABLE_LPAE + [ARM_32_LPAE_S1] = &io_pgtable_arm_32_lpae_s1_init_fns, + [ARM_32_LPAE_S2] = &io_pgtable_arm_32_lpae_s2_init_fns, + [ARM_64_LPAE_S1] = &io_pgtable_arm_64_lpae_s1_init_fns, + [ARM_64_LPAE_S2] = &io_pgtable_arm_64_lpae_s2_init_fns, +#endif +}; + +struct io_pgtable_ops *alloc_io_pgtable_ops(enum io_pgtable_fmt fmt, + struct io_pgtable_cfg *cfg, + void *cookie) +{ + struct io_pgtable *iop; + const struct io_pgtable_init_fns *fns; + + if (fmt >= IO_PGTABLE_NUM_FMTS) + return NULL; + + fns = io_pgtable_init_table[fmt]; + if (!fns) + return NULL; + + iop = fns->alloc(cfg, cookie); + if (!iop) + return NULL; + + iop->fmt = fmt; + iop->cookie = cookie; + iop->cfg = *cfg; + + return &iop->ops; +} + +/* + * It is the IOMMU driver's responsibility to ensure that the page table + * is no longer accessible to the walker by this point. + */ +void free_io_pgtable_ops(struct io_pgtable_ops *ops) +{ + struct io_pgtable *iop; + + if (!ops) + return; + + iop = container_of(ops, struct io_pgtable, ops); + iop->cfg.tlb->tlb_flush_all(iop->cookie); + io_pgtable_init_table[iop->fmt]->free(iop); +} diff --git a/drivers/iommu/io-pgtable.h b/drivers/iommu/io-pgtable.h new file mode 100644 index 000000000..10e32f69c --- /dev/null +++ b/drivers/iommu/io-pgtable.h @@ -0,0 +1,143 @@ +#ifndef __IO_PGTABLE_H +#define __IO_PGTABLE_H + +/* + * Public API for use by IOMMU drivers + */ +enum io_pgtable_fmt { + ARM_32_LPAE_S1, + ARM_32_LPAE_S2, + ARM_64_LPAE_S1, + ARM_64_LPAE_S2, + IO_PGTABLE_NUM_FMTS, +}; + +/** + * struct iommu_gather_ops - IOMMU callbacks for TLB and page table management. + * + * @tlb_flush_all: Synchronously invalidate the entire TLB context. + * @tlb_add_flush: Queue up a TLB invalidation for a virtual address range. + * @tlb_sync: Ensure any queue TLB invalidation has taken effect. + * @flush_pgtable: Ensure page table updates are visible to the IOMMU. + * + * Note that these can all be called in atomic context and must therefore + * not block. + */ +struct iommu_gather_ops { + void (*tlb_flush_all)(void *cookie); + void (*tlb_add_flush)(unsigned long iova, size_t size, bool leaf, + void *cookie); + void (*tlb_sync)(void *cookie); + void (*flush_pgtable)(void *ptr, size_t size, void *cookie); +}; + +/** + * struct io_pgtable_cfg - Configuration data for a set of page tables. + * + * @quirks: A bitmap of hardware quirks that require some special + * action by the low-level page table allocator. + * @pgsize_bitmap: A bitmap of page sizes supported by this set of page + * tables. + * @ias: Input address (iova) size, in bits. + * @oas: Output address (paddr) size, in bits. + * @tlb: TLB management callbacks for this set of tables. + */ +struct io_pgtable_cfg { + #define IO_PGTABLE_QUIRK_ARM_NS (1 << 0) /* Set NS bit in PTEs */ + int quirks; + unsigned long pgsize_bitmap; + unsigned int ias; + unsigned int oas; + const struct iommu_gather_ops *tlb; + + /* Low-level data specific to the table format */ + union { + struct { + u64 ttbr[2]; + u64 tcr; + u64 mair[2]; + } arm_lpae_s1_cfg; + + struct { + u64 vttbr; + u64 vtcr; + } arm_lpae_s2_cfg; + }; +}; + +/** + * struct io_pgtable_ops - Page table manipulation API for IOMMU drivers. + * + * @map: Map a physically contiguous memory region. + * @unmap: Unmap a physically contiguous memory region. + * @iova_to_phys: Translate iova to physical address. + * + * These functions map directly onto the iommu_ops member functions with + * the same names. + */ +struct io_pgtable_ops { + int (*map)(struct io_pgtable_ops *ops, unsigned long iova, + phys_addr_t paddr, size_t size, int prot); + int (*unmap)(struct io_pgtable_ops *ops, unsigned long iova, + size_t size); + phys_addr_t (*iova_to_phys)(struct io_pgtable_ops *ops, + unsigned long iova); +}; + +/** + * alloc_io_pgtable_ops() - Allocate a page table allocator for use by an IOMMU. + * + * @fmt: The page table format. + * @cfg: The page table configuration. This will be modified to represent + * the configuration actually provided by the allocator (e.g. the + * pgsize_bitmap may be restricted). + * @cookie: An opaque token provided by the IOMMU driver and passed back to + * the callback routines in cfg->tlb. + */ +struct io_pgtable_ops *alloc_io_pgtable_ops(enum io_pgtable_fmt fmt, + struct io_pgtable_cfg *cfg, + void *cookie); + +/** + * free_io_pgtable_ops() - Free an io_pgtable_ops structure. The caller + * *must* ensure that the page table is no longer + * live, but the TLB can be dirty. + * + * @ops: The ops returned from alloc_io_pgtable_ops. + */ +void free_io_pgtable_ops(struct io_pgtable_ops *ops); + + +/* + * Internal structures for page table allocator implementations. + */ + +/** + * struct io_pgtable - Internal structure describing a set of page tables. + * + * @fmt: The page table format. + * @cookie: An opaque token provided by the IOMMU driver and passed back to + * any callback routines. + * @cfg: A copy of the page table configuration. + * @ops: The page table operations in use for this set of page tables. + */ +struct io_pgtable { + enum io_pgtable_fmt fmt; + void *cookie; + struct io_pgtable_cfg cfg; + struct io_pgtable_ops ops; +}; + +/** + * struct io_pgtable_init_fns - Alloc/free a set of page tables for a + * particular format. + * + * @alloc: Allocate a set of page tables described by cfg. + * @free: Free the page tables associated with iop. + */ +struct io_pgtable_init_fns { + struct io_pgtable *(*alloc)(struct io_pgtable_cfg *cfg, void *cookie); + void (*free)(struct io_pgtable *iop); +}; + +#endif /* __IO_PGTABLE_H */ diff --git a/drivers/iommu/iommu-sysfs.c b/drivers/iommu/iommu-sysfs.c new file mode 100644 index 000000000..39b2d9127 --- /dev/null +++ b/drivers/iommu/iommu-sysfs.c @@ -0,0 +1,134 @@ +/* + * IOMMU sysfs class support + * + * Copyright (C) 2014 Red Hat, Inc. All rights reserved. + * Author: Alex Williamson <alex.williamson@redhat.com> + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + */ + +#include <linux/device.h> +#include <linux/iommu.h> +#include <linux/module.h> +#include <linux/slab.h> + +/* + * We provide a common class "devices" group which initially has no attributes. + * As devices are added to the IOMMU, we'll add links to the group. + */ +static struct attribute *devices_attr[] = { + NULL, +}; + +static const struct attribute_group iommu_devices_attr_group = { + .name = "devices", + .attrs = devices_attr, +}; + +static const struct attribute_group *iommu_dev_groups[] = { + &iommu_devices_attr_group, + NULL, +}; + +static void iommu_release_device(struct device *dev) +{ + kfree(dev); +} + +static struct class iommu_class = { + .name = "iommu", + .dev_release = iommu_release_device, + .dev_groups = iommu_dev_groups, +}; + +static int __init iommu_dev_init(void) +{ + return class_register(&iommu_class); +} +postcore_initcall(iommu_dev_init); + +/* + * Create an IOMMU device and return a pointer to it. IOMMU specific + * attributes can be provided as an attribute group, allowing a unique + * namespace per IOMMU type. + */ +struct device *iommu_device_create(struct device *parent, void *drvdata, + const struct attribute_group **groups, + const char *fmt, ...) +{ + struct device *dev; + va_list vargs; + int ret; + + dev = kzalloc(sizeof(*dev), GFP_KERNEL); + if (!dev) + return ERR_PTR(-ENOMEM); + + device_initialize(dev); + + dev->class = &iommu_class; + dev->parent = parent; + dev->groups = groups; + dev_set_drvdata(dev, drvdata); + + va_start(vargs, fmt); + ret = kobject_set_name_vargs(&dev->kobj, fmt, vargs); + va_end(vargs); + if (ret) + goto error; + + ret = device_add(dev); + if (ret) + goto error; + + return dev; + +error: + put_device(dev); + return ERR_PTR(ret); +} + +void iommu_device_destroy(struct device *dev) +{ + if (!dev || IS_ERR(dev)) + return; + + device_unregister(dev); +} + +/* + * IOMMU drivers can indicate a device is managed by a given IOMMU using + * this interface. A link to the device will be created in the "devices" + * directory of the IOMMU device in sysfs and an "iommu" link will be + * created under the linked device, pointing back at the IOMMU device. + */ +int iommu_device_link(struct device *dev, struct device *link) +{ + int ret; + + if (!dev || IS_ERR(dev)) + return -ENODEV; + + ret = sysfs_add_link_to_group(&dev->kobj, "devices", + &link->kobj, dev_name(link)); + if (ret) + return ret; + + ret = sysfs_create_link_nowarn(&link->kobj, &dev->kobj, "iommu"); + if (ret) + sysfs_remove_link_from_group(&dev->kobj, "devices", + dev_name(link)); + + return ret; +} + +void iommu_device_unlink(struct device *dev, struct device *link) +{ + if (!dev || IS_ERR(dev)) + return; + + sysfs_remove_link(&link->kobj, "iommu"); + sysfs_remove_link_from_group(&dev->kobj, "devices", dev_name(link)); +} diff --git a/drivers/iommu/iommu-traces.c b/drivers/iommu/iommu-traces.c new file mode 100644 index 000000000..bf3b317ff --- /dev/null +++ b/drivers/iommu/iommu-traces.c @@ -0,0 +1,27 @@ +/* + * iommu trace points + * + * Copyright (C) 2013 Shuah Khan <shuah.kh@samsung.com> + * + */ + +#include <linux/string.h> +#include <linux/types.h> + +#define CREATE_TRACE_POINTS +#include <trace/events/iommu.h> + +/* iommu_group_event */ +EXPORT_TRACEPOINT_SYMBOL_GPL(add_device_to_group); +EXPORT_TRACEPOINT_SYMBOL_GPL(remove_device_from_group); + +/* iommu_device_event */ +EXPORT_TRACEPOINT_SYMBOL_GPL(attach_device_to_domain); +EXPORT_TRACEPOINT_SYMBOL_GPL(detach_device_from_domain); + +/* iommu_map_unmap */ +EXPORT_TRACEPOINT_SYMBOL_GPL(map); +EXPORT_TRACEPOINT_SYMBOL_GPL(unmap); + +/* iommu_error */ +EXPORT_TRACEPOINT_SYMBOL_GPL(io_page_fault); diff --git a/drivers/iommu/iommu.c b/drivers/iommu/iommu.c new file mode 100644 index 000000000..d4f527e56 --- /dev/null +++ b/drivers/iommu/iommu.c @@ -0,0 +1,1275 @@ +/* + * Copyright (C) 2007-2008 Advanced Micro Devices, Inc. + * Author: Joerg Roedel <jroedel@suse.de> + * + * This program is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 as published + * by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + */ + +#define pr_fmt(fmt) "%s: " fmt, __func__ + +#include <linux/device.h> +#include <linux/kernel.h> +#include <linux/bug.h> +#include <linux/types.h> +#include <linux/module.h> +#include <linux/slab.h> +#include <linux/errno.h> +#include <linux/iommu.h> +#include <linux/idr.h> +#include <linux/notifier.h> +#include <linux/err.h> +#include <linux/pci.h> +#include <linux/bitops.h> +#include <trace/events/iommu.h> + +static struct kset *iommu_group_kset; +static struct ida iommu_group_ida; +static struct mutex iommu_group_mutex; + +struct iommu_callback_data { + const struct iommu_ops *ops; +}; + +struct iommu_group { + struct kobject kobj; + struct kobject *devices_kobj; + struct list_head devices; + struct mutex mutex; + struct blocking_notifier_head notifier; + void *iommu_data; + void (*iommu_data_release)(void *iommu_data); + char *name; + int id; +}; + +struct iommu_device { + struct list_head list; + struct device *dev; + char *name; +}; + +struct iommu_group_attribute { + struct attribute attr; + ssize_t (*show)(struct iommu_group *group, char *buf); + ssize_t (*store)(struct iommu_group *group, + const char *buf, size_t count); +}; + +#define IOMMU_GROUP_ATTR(_name, _mode, _show, _store) \ +struct iommu_group_attribute iommu_group_attr_##_name = \ + __ATTR(_name, _mode, _show, _store) + +#define to_iommu_group_attr(_attr) \ + container_of(_attr, struct iommu_group_attribute, attr) +#define to_iommu_group(_kobj) \ + container_of(_kobj, struct iommu_group, kobj) + +static ssize_t iommu_group_attr_show(struct kobject *kobj, + struct attribute *__attr, char *buf) +{ + struct iommu_group_attribute *attr = to_iommu_group_attr(__attr); + struct iommu_group *group = to_iommu_group(kobj); + ssize_t ret = -EIO; + + if (attr->show) + ret = attr->show(group, buf); + return ret; +} + +static ssize_t iommu_group_attr_store(struct kobject *kobj, + struct attribute *__attr, + const char *buf, size_t count) +{ + struct iommu_group_attribute *attr = to_iommu_group_attr(__attr); + struct iommu_group *group = to_iommu_group(kobj); + ssize_t ret = -EIO; + + if (attr->store) + ret = attr->store(group, buf, count); + return ret; +} + +static const struct sysfs_ops iommu_group_sysfs_ops = { + .show = iommu_group_attr_show, + .store = iommu_group_attr_store, +}; + +static int iommu_group_create_file(struct iommu_group *group, + struct iommu_group_attribute *attr) +{ + return sysfs_create_file(&group->kobj, &attr->attr); +} + +static void iommu_group_remove_file(struct iommu_group *group, + struct iommu_group_attribute *attr) +{ + sysfs_remove_file(&group->kobj, &attr->attr); +} + +static ssize_t iommu_group_show_name(struct iommu_group *group, char *buf) +{ + return sprintf(buf, "%s\n", group->name); +} + +static IOMMU_GROUP_ATTR(name, S_IRUGO, iommu_group_show_name, NULL); + +static void iommu_group_release(struct kobject *kobj) +{ + struct iommu_group *group = to_iommu_group(kobj); + + if (group->iommu_data_release) + group->iommu_data_release(group->iommu_data); + + mutex_lock(&iommu_group_mutex); + ida_remove(&iommu_group_ida, group->id); + mutex_unlock(&iommu_group_mutex); + + kfree(group->name); + kfree(group); +} + +static struct kobj_type iommu_group_ktype = { + .sysfs_ops = &iommu_group_sysfs_ops, + .release = iommu_group_release, +}; + +/** + * iommu_group_alloc - Allocate a new group + * @name: Optional name to associate with group, visible in sysfs + * + * This function is called by an iommu driver to allocate a new iommu + * group. The iommu group represents the minimum granularity of the iommu. + * Upon successful return, the caller holds a reference to the supplied + * group in order to hold the group until devices are added. Use + * iommu_group_put() to release this extra reference count, allowing the + * group to be automatically reclaimed once it has no devices or external + * references. + */ +struct iommu_group *iommu_group_alloc(void) +{ + struct iommu_group *group; + int ret; + + group = kzalloc(sizeof(*group), GFP_KERNEL); + if (!group) + return ERR_PTR(-ENOMEM); + + group->kobj.kset = iommu_group_kset; + mutex_init(&group->mutex); + INIT_LIST_HEAD(&group->devices); + BLOCKING_INIT_NOTIFIER_HEAD(&group->notifier); + + mutex_lock(&iommu_group_mutex); + +again: + if (unlikely(0 == ida_pre_get(&iommu_group_ida, GFP_KERNEL))) { + kfree(group); + mutex_unlock(&iommu_group_mutex); + return ERR_PTR(-ENOMEM); + } + + if (-EAGAIN == ida_get_new(&iommu_group_ida, &group->id)) + goto again; + + mutex_unlock(&iommu_group_mutex); + + ret = kobject_init_and_add(&group->kobj, &iommu_group_ktype, + NULL, "%d", group->id); + if (ret) { + mutex_lock(&iommu_group_mutex); + ida_remove(&iommu_group_ida, group->id); + mutex_unlock(&iommu_group_mutex); + kfree(group); + return ERR_PTR(ret); + } + + group->devices_kobj = kobject_create_and_add("devices", &group->kobj); + if (!group->devices_kobj) { + kobject_put(&group->kobj); /* triggers .release & free */ + return ERR_PTR(-ENOMEM); + } + + /* + * The devices_kobj holds a reference on the group kobject, so + * as long as that exists so will the group. We can therefore + * use the devices_kobj for reference counting. + */ + kobject_put(&group->kobj); + + return group; +} +EXPORT_SYMBOL_GPL(iommu_group_alloc); + +struct iommu_group *iommu_group_get_by_id(int id) +{ + struct kobject *group_kobj; + struct iommu_group *group; + const char *name; + + if (!iommu_group_kset) + return NULL; + + name = kasprintf(GFP_KERNEL, "%d", id); + if (!name) + return NULL; + + group_kobj = kset_find_obj(iommu_group_kset, name); + kfree(name); + + if (!group_kobj) + return NULL; + + group = container_of(group_kobj, struct iommu_group, kobj); + BUG_ON(group->id != id); + + kobject_get(group->devices_kobj); + kobject_put(&group->kobj); + + return group; +} +EXPORT_SYMBOL_GPL(iommu_group_get_by_id); + +/** + * iommu_group_get_iommudata - retrieve iommu_data registered for a group + * @group: the group + * + * iommu drivers can store data in the group for use when doing iommu + * operations. This function provides a way to retrieve it. Caller + * should hold a group reference. + */ +void *iommu_group_get_iommudata(struct iommu_group *group) +{ + return group->iommu_data; +} +EXPORT_SYMBOL_GPL(iommu_group_get_iommudata); + +/** + * iommu_group_set_iommudata - set iommu_data for a group + * @group: the group + * @iommu_data: new data + * @release: release function for iommu_data + * + * iommu drivers can store data in the group for use when doing iommu + * operations. This function provides a way to set the data after + * the group has been allocated. Caller should hold a group reference. + */ +void iommu_group_set_iommudata(struct iommu_group *group, void *iommu_data, + void (*release)(void *iommu_data)) +{ + group->iommu_data = iommu_data; + group->iommu_data_release = release; +} +EXPORT_SYMBOL_GPL(iommu_group_set_iommudata); + +/** + * iommu_group_set_name - set name for a group + * @group: the group + * @name: name + * + * Allow iommu driver to set a name for a group. When set it will + * appear in a name attribute file under the group in sysfs. + */ +int iommu_group_set_name(struct iommu_group *group, const char *name) +{ + int ret; + + if (group->name) { + iommu_group_remove_file(group, &iommu_group_attr_name); + kfree(group->name); + group->name = NULL; + if (!name) + return 0; + } + + group->name = kstrdup(name, GFP_KERNEL); + if (!group->name) + return -ENOMEM; + + ret = iommu_group_create_file(group, &iommu_group_attr_name); + if (ret) { + kfree(group->name); + group->name = NULL; + return ret; + } + + return 0; +} +EXPORT_SYMBOL_GPL(iommu_group_set_name); + +/** + * iommu_group_add_device - add a device to an iommu group + * @group: the group into which to add the device (reference should be held) + * @dev: the device + * + * This function is called by an iommu driver to add a device into a + * group. Adding a device increments the group reference count. + */ +int iommu_group_add_device(struct iommu_group *group, struct device *dev) +{ + int ret, i = 0; + struct iommu_device *device; + + device = kzalloc(sizeof(*device), GFP_KERNEL); + if (!device) + return -ENOMEM; + + device->dev = dev; + + ret = sysfs_create_link(&dev->kobj, &group->kobj, "iommu_group"); + if (ret) { + kfree(device); + return ret; + } + + device->name = kasprintf(GFP_KERNEL, "%s", kobject_name(&dev->kobj)); +rename: + if (!device->name) { + sysfs_remove_link(&dev->kobj, "iommu_group"); + kfree(device); + return -ENOMEM; + } + + ret = sysfs_create_link_nowarn(group->devices_kobj, + &dev->kobj, device->name); + if (ret) { + kfree(device->name); + if (ret == -EEXIST && i >= 0) { + /* + * Account for the slim chance of collision + * and append an instance to the name. + */ + device->name = kasprintf(GFP_KERNEL, "%s.%d", + kobject_name(&dev->kobj), i++); + goto rename; + } + + sysfs_remove_link(&dev->kobj, "iommu_group"); + kfree(device); + return ret; + } + + kobject_get(group->devices_kobj); + + dev->iommu_group = group; + + mutex_lock(&group->mutex); + list_add_tail(&device->list, &group->devices); + mutex_unlock(&group->mutex); + + /* Notify any listeners about change to group. */ + blocking_notifier_call_chain(&group->notifier, + IOMMU_GROUP_NOTIFY_ADD_DEVICE, dev); + + trace_add_device_to_group(group->id, dev); + return 0; +} +EXPORT_SYMBOL_GPL(iommu_group_add_device); + +/** + * iommu_group_remove_device - remove a device from it's current group + * @dev: device to be removed + * + * This function is called by an iommu driver to remove the device from + * it's current group. This decrements the iommu group reference count. + */ +void iommu_group_remove_device(struct device *dev) +{ + struct iommu_group *group = dev->iommu_group; + struct iommu_device *tmp_device, *device = NULL; + + /* Pre-notify listeners that a device is being removed. */ + blocking_notifier_call_chain(&group->notifier, + IOMMU_GROUP_NOTIFY_DEL_DEVICE, dev); + + mutex_lock(&group->mutex); + list_for_each_entry(tmp_device, &group->devices, list) { + if (tmp_device->dev == dev) { + device = tmp_device; + list_del(&device->list); + break; + } + } + mutex_unlock(&group->mutex); + + if (!device) + return; + + sysfs_remove_link(group->devices_kobj, device->name); + sysfs_remove_link(&dev->kobj, "iommu_group"); + + trace_remove_device_from_group(group->id, dev); + + kfree(device->name); + kfree(device); + dev->iommu_group = NULL; + kobject_put(group->devices_kobj); +} +EXPORT_SYMBOL_GPL(iommu_group_remove_device); + +/** + * iommu_group_for_each_dev - iterate over each device in the group + * @group: the group + * @data: caller opaque data to be passed to callback function + * @fn: caller supplied callback function + * + * This function is called by group users to iterate over group devices. + * Callers should hold a reference count to the group during callback. + * The group->mutex is held across callbacks, which will block calls to + * iommu_group_add/remove_device. + */ +int iommu_group_for_each_dev(struct iommu_group *group, void *data, + int (*fn)(struct device *, void *)) +{ + struct iommu_device *device; + int ret = 0; + + mutex_lock(&group->mutex); + list_for_each_entry(device, &group->devices, list) { + ret = fn(device->dev, data); + if (ret) + break; + } + mutex_unlock(&group->mutex); + return ret; +} +EXPORT_SYMBOL_GPL(iommu_group_for_each_dev); + +/** + * iommu_group_get - Return the group for a device and increment reference + * @dev: get the group that this device belongs to + * + * This function is called by iommu drivers and users to get the group + * for the specified device. If found, the group is returned and the group + * reference in incremented, else NULL. + */ +struct iommu_group *iommu_group_get(struct device *dev) +{ + struct iommu_group *group = dev->iommu_group; + + if (group) + kobject_get(group->devices_kobj); + + return group; +} +EXPORT_SYMBOL_GPL(iommu_group_get); + +/** + * iommu_group_put - Decrement group reference + * @group: the group to use + * + * This function is called by iommu drivers and users to release the + * iommu group. Once the reference count is zero, the group is released. + */ +void iommu_group_put(struct iommu_group *group) +{ + if (group) + kobject_put(group->devices_kobj); +} +EXPORT_SYMBOL_GPL(iommu_group_put); + +/** + * iommu_group_register_notifier - Register a notifier for group changes + * @group: the group to watch + * @nb: notifier block to signal + * + * This function allows iommu group users to track changes in a group. + * See include/linux/iommu.h for actions sent via this notifier. Caller + * should hold a reference to the group throughout notifier registration. + */ +int iommu_group_register_notifier(struct iommu_group *group, + struct notifier_block *nb) +{ + return blocking_notifier_chain_register(&group->notifier, nb); +} +EXPORT_SYMBOL_GPL(iommu_group_register_notifier); + +/** + * iommu_group_unregister_notifier - Unregister a notifier + * @group: the group to watch + * @nb: notifier block to signal + * + * Unregister a previously registered group notifier block. + */ +int iommu_group_unregister_notifier(struct iommu_group *group, + struct notifier_block *nb) +{ + return blocking_notifier_chain_unregister(&group->notifier, nb); +} +EXPORT_SYMBOL_GPL(iommu_group_unregister_notifier); + +/** + * iommu_group_id - Return ID for a group + * @group: the group to ID + * + * Return the unique ID for the group matching the sysfs group number. + */ +int iommu_group_id(struct iommu_group *group) +{ + return group->id; +} +EXPORT_SYMBOL_GPL(iommu_group_id); + +static struct iommu_group *get_pci_alias_group(struct pci_dev *pdev, + unsigned long *devfns); + +/* + * To consider a PCI device isolated, we require ACS to support Source + * Validation, Request Redirection, Completer Redirection, and Upstream + * Forwarding. This effectively means that devices cannot spoof their + * requester ID, requests and completions cannot be redirected, and all + * transactions are forwarded upstream, even as it passes through a + * bridge where the target device is downstream. + */ +#define REQ_ACS_FLAGS (PCI_ACS_SV | PCI_ACS_RR | PCI_ACS_CR | PCI_ACS_UF) + +/* + * For multifunction devices which are not isolated from each other, find + * all the other non-isolated functions and look for existing groups. For + * each function, we also need to look for aliases to or from other devices + * that may already have a group. + */ +static struct iommu_group *get_pci_function_alias_group(struct pci_dev *pdev, + unsigned long *devfns) +{ + struct pci_dev *tmp = NULL; + struct iommu_group *group; + + if (!pdev->multifunction || pci_acs_enabled(pdev, REQ_ACS_FLAGS)) + return NULL; + + for_each_pci_dev(tmp) { + if (tmp == pdev || tmp->bus != pdev->bus || + PCI_SLOT(tmp->devfn) != PCI_SLOT(pdev->devfn) || + pci_acs_enabled(tmp, REQ_ACS_FLAGS)) + continue; + + group = get_pci_alias_group(tmp, devfns); + if (group) { + pci_dev_put(tmp); + return group; + } + } + + return NULL; +} + +/* + * Look for aliases to or from the given device for exisiting groups. The + * dma_alias_devfn only supports aliases on the same bus, therefore the search + * space is quite small (especially since we're really only looking at pcie + * device, and therefore only expect multiple slots on the root complex or + * downstream switch ports). It's conceivable though that a pair of + * multifunction devices could have aliases between them that would cause a + * loop. To prevent this, we use a bitmap to track where we've been. + */ +static struct iommu_group *get_pci_alias_group(struct pci_dev *pdev, + unsigned long *devfns) +{ + struct pci_dev *tmp = NULL; + struct iommu_group *group; + + if (test_and_set_bit(pdev->devfn & 0xff, devfns)) + return NULL; + + group = iommu_group_get(&pdev->dev); + if (group) + return group; + + for_each_pci_dev(tmp) { + if (tmp == pdev || tmp->bus != pdev->bus) + continue; + + /* We alias them or they alias us */ + if (((pdev->dev_flags & PCI_DEV_FLAGS_DMA_ALIAS_DEVFN) && + pdev->dma_alias_devfn == tmp->devfn) || + ((tmp->dev_flags & PCI_DEV_FLAGS_DMA_ALIAS_DEVFN) && + tmp->dma_alias_devfn == pdev->devfn)) { + + group = get_pci_alias_group(tmp, devfns); + if (group) { + pci_dev_put(tmp); + return group; + } + + group = get_pci_function_alias_group(tmp, devfns); + if (group) { + pci_dev_put(tmp); + return group; + } + } + } + + return NULL; +} + +struct group_for_pci_data { + struct pci_dev *pdev; + struct iommu_group *group; +}; + +/* + * DMA alias iterator callback, return the last seen device. Stop and return + * the IOMMU group if we find one along the way. + */ +static int get_pci_alias_or_group(struct pci_dev *pdev, u16 alias, void *opaque) +{ + struct group_for_pci_data *data = opaque; + + data->pdev = pdev; + data->group = iommu_group_get(&pdev->dev); + + return data->group != NULL; +} + +/* + * Use standard PCI bus topology, isolation features, and DMA alias quirks + * to find or create an IOMMU group for a device. + */ +static struct iommu_group *iommu_group_get_for_pci_dev(struct pci_dev *pdev) +{ + struct group_for_pci_data data; + struct pci_bus *bus; + struct iommu_group *group = NULL; + u64 devfns[4] = { 0 }; + + /* + * Find the upstream DMA alias for the device. A device must not + * be aliased due to topology in order to have its own IOMMU group. + * If we find an alias along the way that already belongs to a + * group, use it. + */ + if (pci_for_each_dma_alias(pdev, get_pci_alias_or_group, &data)) + return data.group; + + pdev = data.pdev; + + /* + * Continue upstream from the point of minimum IOMMU granularity + * due to aliases to the point where devices are protected from + * peer-to-peer DMA by PCI ACS. Again, if we find an existing + * group, use it. + */ + for (bus = pdev->bus; !pci_is_root_bus(bus); bus = bus->parent) { + if (!bus->self) + continue; + + if (pci_acs_path_enabled(bus->self, NULL, REQ_ACS_FLAGS)) + break; + + pdev = bus->self; + + group = iommu_group_get(&pdev->dev); + if (group) + return group; + } + + /* + * Look for existing groups on device aliases. If we alias another + * device or another device aliases us, use the same group. + */ + group = get_pci_alias_group(pdev, (unsigned long *)devfns); + if (group) + return group; + + /* + * Look for existing groups on non-isolated functions on the same + * slot and aliases of those funcions, if any. No need to clear + * the search bitmap, the tested devfns are still valid. + */ + group = get_pci_function_alias_group(pdev, (unsigned long *)devfns); + if (group) + return group; + + /* No shared group found, allocate new */ + return iommu_group_alloc(); +} + +/** + * iommu_group_get_for_dev - Find or create the IOMMU group for a device + * @dev: target device + * + * This function is intended to be called by IOMMU drivers and extended to + * support common, bus-defined algorithms when determining or creating the + * IOMMU group for a device. On success, the caller will hold a reference + * to the returned IOMMU group, which will already include the provided + * device. The reference should be released with iommu_group_put(). + */ +struct iommu_group *iommu_group_get_for_dev(struct device *dev) +{ + struct iommu_group *group; + int ret; + + group = iommu_group_get(dev); + if (group) + return group; + + if (!dev_is_pci(dev)) + return ERR_PTR(-EINVAL); + + group = iommu_group_get_for_pci_dev(to_pci_dev(dev)); + + if (IS_ERR(group)) + return group; + + ret = iommu_group_add_device(group, dev); + if (ret) { + iommu_group_put(group); + return ERR_PTR(ret); + } + + return group; +} + +static int add_iommu_group(struct device *dev, void *data) +{ + struct iommu_callback_data *cb = data; + const struct iommu_ops *ops = cb->ops; + + if (!ops->add_device) + return 0; + + WARN_ON(dev->iommu_group); + + ops->add_device(dev); + + return 0; +} + +static int iommu_bus_notifier(struct notifier_block *nb, + unsigned long action, void *data) +{ + struct device *dev = data; + const struct iommu_ops *ops = dev->bus->iommu_ops; + struct iommu_group *group; + unsigned long group_action = 0; + + /* + * ADD/DEL call into iommu driver ops if provided, which may + * result in ADD/DEL notifiers to group->notifier + */ + if (action == BUS_NOTIFY_ADD_DEVICE) { + if (ops->add_device) + return ops->add_device(dev); + } else if (action == BUS_NOTIFY_DEL_DEVICE) { + if (ops->remove_device && dev->iommu_group) { + ops->remove_device(dev); + return 0; + } + } + + /* + * Remaining BUS_NOTIFYs get filtered and republished to the + * group, if anyone is listening + */ + group = iommu_group_get(dev); + if (!group) + return 0; + + switch (action) { + case BUS_NOTIFY_BIND_DRIVER: + group_action = IOMMU_GROUP_NOTIFY_BIND_DRIVER; + break; + case BUS_NOTIFY_BOUND_DRIVER: + group_action = IOMMU_GROUP_NOTIFY_BOUND_DRIVER; + break; + case BUS_NOTIFY_UNBIND_DRIVER: + group_action = IOMMU_GROUP_NOTIFY_UNBIND_DRIVER; + break; + case BUS_NOTIFY_UNBOUND_DRIVER: + group_action = IOMMU_GROUP_NOTIFY_UNBOUND_DRIVER; + break; + } + + if (group_action) + blocking_notifier_call_chain(&group->notifier, + group_action, dev); + + iommu_group_put(group); + return 0; +} + +static int iommu_bus_init(struct bus_type *bus, const struct iommu_ops *ops) +{ + int err; + struct notifier_block *nb; + struct iommu_callback_data cb = { + .ops = ops, + }; + + nb = kzalloc(sizeof(struct notifier_block), GFP_KERNEL); + if (!nb) + return -ENOMEM; + + nb->notifier_call = iommu_bus_notifier; + + err = bus_register_notifier(bus, nb); + if (err) { + kfree(nb); + return err; + } + + err = bus_for_each_dev(bus, NULL, &cb, add_iommu_group); + if (err) { + bus_unregister_notifier(bus, nb); + kfree(nb); + return err; + } + + return 0; +} + +/** + * bus_set_iommu - set iommu-callbacks for the bus + * @bus: bus. + * @ops: the callbacks provided by the iommu-driver + * + * This function is called by an iommu driver to set the iommu methods + * used for a particular bus. Drivers for devices on that bus can use + * the iommu-api after these ops are registered. + * This special function is needed because IOMMUs are usually devices on + * the bus itself, so the iommu drivers are not initialized when the bus + * is set up. With this function the iommu-driver can set the iommu-ops + * afterwards. + */ +int bus_set_iommu(struct bus_type *bus, const struct iommu_ops *ops) +{ + int err; + + if (bus->iommu_ops != NULL) + return -EBUSY; + + bus->iommu_ops = ops; + + /* Do IOMMU specific setup for this bus-type */ + err = iommu_bus_init(bus, ops); + if (err) + bus->iommu_ops = NULL; + + return err; +} +EXPORT_SYMBOL_GPL(bus_set_iommu); + +bool iommu_present(struct bus_type *bus) +{ + return bus->iommu_ops != NULL; +} +EXPORT_SYMBOL_GPL(iommu_present); + +bool iommu_capable(struct bus_type *bus, enum iommu_cap cap) +{ + if (!bus->iommu_ops || !bus->iommu_ops->capable) + return false; + + return bus->iommu_ops->capable(cap); +} +EXPORT_SYMBOL_GPL(iommu_capable); + +/** + * iommu_set_fault_handler() - set a fault handler for an iommu domain + * @domain: iommu domain + * @handler: fault handler + * @token: user data, will be passed back to the fault handler + * + * This function should be used by IOMMU users which want to be notified + * whenever an IOMMU fault happens. + * + * The fault handler itself should return 0 on success, and an appropriate + * error code otherwise. + */ +void iommu_set_fault_handler(struct iommu_domain *domain, + iommu_fault_handler_t handler, + void *token) +{ + BUG_ON(!domain); + + domain->handler = handler; + domain->handler_token = token; +} +EXPORT_SYMBOL_GPL(iommu_set_fault_handler); + +struct iommu_domain *iommu_domain_alloc(struct bus_type *bus) +{ + struct iommu_domain *domain; + + if (bus == NULL || bus->iommu_ops == NULL) + return NULL; + + domain = bus->iommu_ops->domain_alloc(IOMMU_DOMAIN_UNMANAGED); + if (!domain) + return NULL; + + domain->ops = bus->iommu_ops; + domain->type = IOMMU_DOMAIN_UNMANAGED; + + return domain; +} +EXPORT_SYMBOL_GPL(iommu_domain_alloc); + +void iommu_domain_free(struct iommu_domain *domain) +{ + domain->ops->domain_free(domain); +} +EXPORT_SYMBOL_GPL(iommu_domain_free); + +int iommu_attach_device(struct iommu_domain *domain, struct device *dev) +{ + int ret; + if (unlikely(domain->ops->attach_dev == NULL)) + return -ENODEV; + + ret = domain->ops->attach_dev(domain, dev); + if (!ret) + trace_attach_device_to_domain(dev); + return ret; +} +EXPORT_SYMBOL_GPL(iommu_attach_device); + +void iommu_detach_device(struct iommu_domain *domain, struct device *dev) +{ + if (unlikely(domain->ops->detach_dev == NULL)) + return; + + domain->ops->detach_dev(domain, dev); + trace_detach_device_from_domain(dev); +} +EXPORT_SYMBOL_GPL(iommu_detach_device); + +/* + * IOMMU groups are really the natrual working unit of the IOMMU, but + * the IOMMU API works on domains and devices. Bridge that gap by + * iterating over the devices in a group. Ideally we'd have a single + * device which represents the requestor ID of the group, but we also + * allow IOMMU drivers to create policy defined minimum sets, where + * the physical hardware may be able to distiguish members, but we + * wish to group them at a higher level (ex. untrusted multi-function + * PCI devices). Thus we attach each device. + */ +static int iommu_group_do_attach_device(struct device *dev, void *data) +{ + struct iommu_domain *domain = data; + + return iommu_attach_device(domain, dev); +} + +int iommu_attach_group(struct iommu_domain *domain, struct iommu_group *group) +{ + return iommu_group_for_each_dev(group, domain, + iommu_group_do_attach_device); +} +EXPORT_SYMBOL_GPL(iommu_attach_group); + +static int iommu_group_do_detach_device(struct device *dev, void *data) +{ + struct iommu_domain *domain = data; + + iommu_detach_device(domain, dev); + + return 0; +} + +void iommu_detach_group(struct iommu_domain *domain, struct iommu_group *group) +{ + iommu_group_for_each_dev(group, domain, iommu_group_do_detach_device); +} +EXPORT_SYMBOL_GPL(iommu_detach_group); + +phys_addr_t iommu_iova_to_phys(struct iommu_domain *domain, dma_addr_t iova) +{ + if (unlikely(domain->ops->iova_to_phys == NULL)) + return 0; + + return domain->ops->iova_to_phys(domain, iova); +} +EXPORT_SYMBOL_GPL(iommu_iova_to_phys); + +static size_t iommu_pgsize(struct iommu_domain *domain, + unsigned long addr_merge, size_t size) +{ + unsigned int pgsize_idx; + size_t pgsize; + + /* Max page size that still fits into 'size' */ + pgsize_idx = __fls(size); + + /* need to consider alignment requirements ? */ + if (likely(addr_merge)) { + /* Max page size allowed by address */ + unsigned int align_pgsize_idx = __ffs(addr_merge); + pgsize_idx = min(pgsize_idx, align_pgsize_idx); + } + + /* build a mask of acceptable page sizes */ + pgsize = (1UL << (pgsize_idx + 1)) - 1; + + /* throw away page sizes not supported by the hardware */ + pgsize &= domain->ops->pgsize_bitmap; + + /* make sure we're still sane */ + BUG_ON(!pgsize); + + /* pick the biggest page */ + pgsize_idx = __fls(pgsize); + pgsize = 1UL << pgsize_idx; + + return pgsize; +} + +int iommu_map(struct iommu_domain *domain, unsigned long iova, + phys_addr_t paddr, size_t size, int prot) +{ + unsigned long orig_iova = iova; + unsigned int min_pagesz; + size_t orig_size = size; + int ret = 0; + + if (unlikely(domain->ops->map == NULL || + domain->ops->pgsize_bitmap == 0UL)) + return -ENODEV; + + if (unlikely(!(domain->type & __IOMMU_DOMAIN_PAGING))) + return -EINVAL; + + /* find out the minimum page size supported */ + min_pagesz = 1 << __ffs(domain->ops->pgsize_bitmap); + + /* + * both the virtual address and the physical one, as well as + * the size of the mapping, must be aligned (at least) to the + * size of the smallest page supported by the hardware + */ + if (!IS_ALIGNED(iova | paddr | size, min_pagesz)) { + pr_err("unaligned: iova 0x%lx pa %pa size 0x%zx min_pagesz 0x%x\n", + iova, &paddr, size, min_pagesz); + return -EINVAL; + } + + pr_debug("map: iova 0x%lx pa %pa size 0x%zx\n", iova, &paddr, size); + + while (size) { + size_t pgsize = iommu_pgsize(domain, iova | paddr, size); + + pr_debug("mapping: iova 0x%lx pa %pa pgsize 0x%zx\n", + iova, &paddr, pgsize); + + ret = domain->ops->map(domain, iova, paddr, pgsize, prot); + if (ret) + break; + + iova += pgsize; + paddr += pgsize; + size -= pgsize; + } + + /* unroll mapping in case something went wrong */ + if (ret) + iommu_unmap(domain, orig_iova, orig_size - size); + else + trace_map(orig_iova, paddr, orig_size); + + return ret; +} +EXPORT_SYMBOL_GPL(iommu_map); + +size_t iommu_unmap(struct iommu_domain *domain, unsigned long iova, size_t size) +{ + size_t unmapped_page, unmapped = 0; + unsigned int min_pagesz; + unsigned long orig_iova = iova; + + if (unlikely(domain->ops->unmap == NULL || + domain->ops->pgsize_bitmap == 0UL)) + return -ENODEV; + + if (unlikely(!(domain->type & __IOMMU_DOMAIN_PAGING))) + return -EINVAL; + + /* find out the minimum page size supported */ + min_pagesz = 1 << __ffs(domain->ops->pgsize_bitmap); + + /* + * The virtual address, as well as the size of the mapping, must be + * aligned (at least) to the size of the smallest page supported + * by the hardware + */ + if (!IS_ALIGNED(iova | size, min_pagesz)) { + pr_err("unaligned: iova 0x%lx size 0x%zx min_pagesz 0x%x\n", + iova, size, min_pagesz); + return -EINVAL; + } + + pr_debug("unmap this: iova 0x%lx size 0x%zx\n", iova, size); + + /* + * Keep iterating until we either unmap 'size' bytes (or more) + * or we hit an area that isn't mapped. + */ + while (unmapped < size) { + size_t pgsize = iommu_pgsize(domain, iova, size - unmapped); + + unmapped_page = domain->ops->unmap(domain, iova, pgsize); + if (!unmapped_page) + break; + + pr_debug("unmapped: iova 0x%lx size 0x%zx\n", + iova, unmapped_page); + + iova += unmapped_page; + unmapped += unmapped_page; + } + + trace_unmap(orig_iova, size, unmapped); + return unmapped; +} +EXPORT_SYMBOL_GPL(iommu_unmap); + +size_t default_iommu_map_sg(struct iommu_domain *domain, unsigned long iova, + struct scatterlist *sg, unsigned int nents, int prot) +{ + struct scatterlist *s; + size_t mapped = 0; + unsigned int i, min_pagesz; + int ret; + + if (unlikely(domain->ops->pgsize_bitmap == 0UL)) + return 0; + + min_pagesz = 1 << __ffs(domain->ops->pgsize_bitmap); + + for_each_sg(sg, s, nents, i) { + phys_addr_t phys = page_to_phys(sg_page(s)) + s->offset; + + /* + * We are mapping on IOMMU page boundaries, so offset within + * the page must be 0. However, the IOMMU may support pages + * smaller than PAGE_SIZE, so s->offset may still represent + * an offset of that boundary within the CPU page. + */ + if (!IS_ALIGNED(s->offset, min_pagesz)) + goto out_err; + + ret = iommu_map(domain, iova + mapped, phys, s->length, prot); + if (ret) + goto out_err; + + mapped += s->length; + } + + return mapped; + +out_err: + /* undo mappings already done */ + iommu_unmap(domain, iova, mapped); + + return 0; + +} +EXPORT_SYMBOL_GPL(default_iommu_map_sg); + +int iommu_domain_window_enable(struct iommu_domain *domain, u32 wnd_nr, + phys_addr_t paddr, u64 size, int prot) +{ + if (unlikely(domain->ops->domain_window_enable == NULL)) + return -ENODEV; + + return domain->ops->domain_window_enable(domain, wnd_nr, paddr, size, + prot); +} +EXPORT_SYMBOL_GPL(iommu_domain_window_enable); + +void iommu_domain_window_disable(struct iommu_domain *domain, u32 wnd_nr) +{ + if (unlikely(domain->ops->domain_window_disable == NULL)) + return; + + return domain->ops->domain_window_disable(domain, wnd_nr); +} +EXPORT_SYMBOL_GPL(iommu_domain_window_disable); + +static int __init iommu_init(void) +{ + iommu_group_kset = kset_create_and_add("iommu_groups", + NULL, kernel_kobj); + ida_init(&iommu_group_ida); + mutex_init(&iommu_group_mutex); + + BUG_ON(!iommu_group_kset); + + return 0; +} +arch_initcall(iommu_init); + +int iommu_domain_get_attr(struct iommu_domain *domain, + enum iommu_attr attr, void *data) +{ + struct iommu_domain_geometry *geometry; + bool *paging; + int ret = 0; + u32 *count; + + switch (attr) { + case DOMAIN_ATTR_GEOMETRY: + geometry = data; + *geometry = domain->geometry; + + break; + case DOMAIN_ATTR_PAGING: + paging = data; + *paging = (domain->ops->pgsize_bitmap != 0UL); + break; + case DOMAIN_ATTR_WINDOWS: + count = data; + + if (domain->ops->domain_get_windows != NULL) + *count = domain->ops->domain_get_windows(domain); + else + ret = -ENODEV; + + break; + default: + if (!domain->ops->domain_get_attr) + return -EINVAL; + + ret = domain->ops->domain_get_attr(domain, attr, data); + } + + return ret; +} +EXPORT_SYMBOL_GPL(iommu_domain_get_attr); + +int iommu_domain_set_attr(struct iommu_domain *domain, + enum iommu_attr attr, void *data) +{ + int ret = 0; + u32 *count; + + switch (attr) { + case DOMAIN_ATTR_WINDOWS: + count = data; + + if (domain->ops->domain_set_windows != NULL) + ret = domain->ops->domain_set_windows(domain, *count); + else + ret = -ENODEV; + + break; + default: + if (domain->ops->domain_set_attr == NULL) + return -EINVAL; + + ret = domain->ops->domain_set_attr(domain, attr, data); + } + + return ret; +} +EXPORT_SYMBOL_GPL(iommu_domain_set_attr); diff --git a/drivers/iommu/iova.c b/drivers/iommu/iova.c new file mode 100644 index 000000000..9dd820831 --- /dev/null +++ b/drivers/iommu/iova.c @@ -0,0 +1,532 @@ +/* + * Copyright © 2006-2009, Intel Corporation. + * + * This program is free software; you can redistribute it and/or modify it + * under the terms and conditions of the GNU General Public License, + * version 2, as published by the Free Software Foundation. + * + * This program is distributed in the hope it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for + * more details. + * + * You should have received a copy of the GNU General Public License along with + * this program; if not, write to the Free Software Foundation, Inc., 59 Temple + * Place - Suite 330, Boston, MA 02111-1307 USA. + * + * Author: Anil S Keshavamurthy <anil.s.keshavamurthy@intel.com> + */ + +#include <linux/iova.h> +#include <linux/slab.h> + +static struct kmem_cache *iommu_iova_cache; + +int iommu_iova_cache_init(void) +{ + int ret = 0; + + iommu_iova_cache = kmem_cache_create("iommu_iova", + sizeof(struct iova), + 0, + SLAB_HWCACHE_ALIGN, + NULL); + if (!iommu_iova_cache) { + pr_err("Couldn't create iova cache\n"); + ret = -ENOMEM; + } + + return ret; +} + +void iommu_iova_cache_destroy(void) +{ + kmem_cache_destroy(iommu_iova_cache); +} + +struct iova *alloc_iova_mem(void) +{ + return kmem_cache_alloc(iommu_iova_cache, GFP_ATOMIC); +} + +void free_iova_mem(struct iova *iova) +{ + kmem_cache_free(iommu_iova_cache, iova); +} + +void +init_iova_domain(struct iova_domain *iovad, unsigned long granule, + unsigned long start_pfn, unsigned long pfn_32bit) +{ + /* + * IOVA granularity will normally be equal to the smallest + * supported IOMMU page size; both *must* be capable of + * representing individual CPU pages exactly. + */ + BUG_ON((granule > PAGE_SIZE) || !is_power_of_2(granule)); + + spin_lock_init(&iovad->iova_rbtree_lock); + iovad->rbroot = RB_ROOT; + iovad->cached32_node = NULL; + iovad->granule = granule; + iovad->start_pfn = start_pfn; + iovad->dma_32bit_pfn = pfn_32bit; +} + +static struct rb_node * +__get_cached_rbnode(struct iova_domain *iovad, unsigned long *limit_pfn) +{ + if ((*limit_pfn != iovad->dma_32bit_pfn) || + (iovad->cached32_node == NULL)) + return rb_last(&iovad->rbroot); + else { + struct rb_node *prev_node = rb_prev(iovad->cached32_node); + struct iova *curr_iova = + container_of(iovad->cached32_node, struct iova, node); + *limit_pfn = curr_iova->pfn_lo - 1; + return prev_node; + } +} + +static void +__cached_rbnode_insert_update(struct iova_domain *iovad, + unsigned long limit_pfn, struct iova *new) +{ + if (limit_pfn != iovad->dma_32bit_pfn) + return; + iovad->cached32_node = &new->node; +} + +static void +__cached_rbnode_delete_update(struct iova_domain *iovad, struct iova *free) +{ + struct iova *cached_iova; + struct rb_node *curr; + + if (!iovad->cached32_node) + return; + curr = iovad->cached32_node; + cached_iova = container_of(curr, struct iova, node); + + if (free->pfn_lo >= cached_iova->pfn_lo) { + struct rb_node *node = rb_next(&free->node); + struct iova *iova = container_of(node, struct iova, node); + + /* only cache if it's below 32bit pfn */ + if (node && iova->pfn_lo < iovad->dma_32bit_pfn) + iovad->cached32_node = node; + else + iovad->cached32_node = NULL; + } +} + +/* Computes the padding size required, to make the + * the start address naturally aligned on its size + */ +static int +iova_get_pad_size(int size, unsigned int limit_pfn) +{ + unsigned int pad_size = 0; + unsigned int order = ilog2(size); + + if (order) + pad_size = (limit_pfn + 1) % (1 << order); + + return pad_size; +} + +static int __alloc_and_insert_iova_range(struct iova_domain *iovad, + unsigned long size, unsigned long limit_pfn, + struct iova *new, bool size_aligned) +{ + struct rb_node *prev, *curr = NULL; + unsigned long flags; + unsigned long saved_pfn; + unsigned int pad_size = 0; + + /* Walk the tree backwards */ + spin_lock_irqsave(&iovad->iova_rbtree_lock, flags); + saved_pfn = limit_pfn; + curr = __get_cached_rbnode(iovad, &limit_pfn); + prev = curr; + while (curr) { + struct iova *curr_iova = container_of(curr, struct iova, node); + + if (limit_pfn < curr_iova->pfn_lo) + goto move_left; + else if (limit_pfn < curr_iova->pfn_hi) + goto adjust_limit_pfn; + else { + if (size_aligned) + pad_size = iova_get_pad_size(size, limit_pfn); + if ((curr_iova->pfn_hi + size + pad_size) <= limit_pfn) + break; /* found a free slot */ + } +adjust_limit_pfn: + limit_pfn = curr_iova->pfn_lo - 1; +move_left: + prev = curr; + curr = rb_prev(curr); + } + + if (!curr) { + if (size_aligned) + pad_size = iova_get_pad_size(size, limit_pfn); + if ((iovad->start_pfn + size + pad_size) > limit_pfn) { + spin_unlock_irqrestore(&iovad->iova_rbtree_lock, flags); + return -ENOMEM; + } + } + + /* pfn_lo will point to size aligned address if size_aligned is set */ + new->pfn_lo = limit_pfn - (size + pad_size) + 1; + new->pfn_hi = new->pfn_lo + size - 1; + + /* Insert the new_iova into domain rbtree by holding writer lock */ + /* Add new node and rebalance tree. */ + { + struct rb_node **entry, *parent = NULL; + + /* If we have 'prev', it's a valid place to start the + insertion. Otherwise, start from the root. */ + if (prev) + entry = &prev; + else + entry = &iovad->rbroot.rb_node; + + /* Figure out where to put new node */ + while (*entry) { + struct iova *this = container_of(*entry, + struct iova, node); + parent = *entry; + + if (new->pfn_lo < this->pfn_lo) + entry = &((*entry)->rb_left); + else if (new->pfn_lo > this->pfn_lo) + entry = &((*entry)->rb_right); + else + BUG(); /* this should not happen */ + } + + /* Add new node and rebalance tree. */ + rb_link_node(&new->node, parent, entry); + rb_insert_color(&new->node, &iovad->rbroot); + } + __cached_rbnode_insert_update(iovad, saved_pfn, new); + + spin_unlock_irqrestore(&iovad->iova_rbtree_lock, flags); + + + return 0; +} + +static void +iova_insert_rbtree(struct rb_root *root, struct iova *iova) +{ + struct rb_node **new = &(root->rb_node), *parent = NULL; + /* Figure out where to put new node */ + while (*new) { + struct iova *this = container_of(*new, struct iova, node); + parent = *new; + + if (iova->pfn_lo < this->pfn_lo) + new = &((*new)->rb_left); + else if (iova->pfn_lo > this->pfn_lo) + new = &((*new)->rb_right); + else + BUG(); /* this should not happen */ + } + /* Add new node and rebalance tree. */ + rb_link_node(&iova->node, parent, new); + rb_insert_color(&iova->node, root); +} + +/** + * alloc_iova - allocates an iova + * @iovad: - iova domain in question + * @size: - size of page frames to allocate + * @limit_pfn: - max limit address + * @size_aligned: - set if size_aligned address range is required + * This function allocates an iova in the range iovad->start_pfn to limit_pfn, + * searching top-down from limit_pfn to iovad->start_pfn. If the size_aligned + * flag is set then the allocated address iova->pfn_lo will be naturally + * aligned on roundup_power_of_two(size). + */ +struct iova * +alloc_iova(struct iova_domain *iovad, unsigned long size, + unsigned long limit_pfn, + bool size_aligned) +{ + struct iova *new_iova; + int ret; + + new_iova = alloc_iova_mem(); + if (!new_iova) + return NULL; + + /* If size aligned is set then round the size to + * to next power of two. + */ + if (size_aligned) + size = __roundup_pow_of_two(size); + + ret = __alloc_and_insert_iova_range(iovad, size, limit_pfn, + new_iova, size_aligned); + + if (ret) { + free_iova_mem(new_iova); + return NULL; + } + + return new_iova; +} + +/** + * find_iova - find's an iova for a given pfn + * @iovad: - iova domain in question. + * @pfn: - page frame number + * This function finds and returns an iova belonging to the + * given doamin which matches the given pfn. + */ +struct iova *find_iova(struct iova_domain *iovad, unsigned long pfn) +{ + unsigned long flags; + struct rb_node *node; + + /* Take the lock so that no other thread is manipulating the rbtree */ + spin_lock_irqsave(&iovad->iova_rbtree_lock, flags); + node = iovad->rbroot.rb_node; + while (node) { + struct iova *iova = container_of(node, struct iova, node); + + /* If pfn falls within iova's range, return iova */ + if ((pfn >= iova->pfn_lo) && (pfn <= iova->pfn_hi)) { + spin_unlock_irqrestore(&iovad->iova_rbtree_lock, flags); + /* We are not holding the lock while this iova + * is referenced by the caller as the same thread + * which called this function also calls __free_iova() + * and it is by design that only one thread can possibly + * reference a particular iova and hence no conflict. + */ + return iova; + } + + if (pfn < iova->pfn_lo) + node = node->rb_left; + else if (pfn > iova->pfn_lo) + node = node->rb_right; + } + + spin_unlock_irqrestore(&iovad->iova_rbtree_lock, flags); + return NULL; +} + +/** + * __free_iova - frees the given iova + * @iovad: iova domain in question. + * @iova: iova in question. + * Frees the given iova belonging to the giving domain + */ +void +__free_iova(struct iova_domain *iovad, struct iova *iova) +{ + unsigned long flags; + + spin_lock_irqsave(&iovad->iova_rbtree_lock, flags); + __cached_rbnode_delete_update(iovad, iova); + rb_erase(&iova->node, &iovad->rbroot); + spin_unlock_irqrestore(&iovad->iova_rbtree_lock, flags); + free_iova_mem(iova); +} + +/** + * free_iova - finds and frees the iova for a given pfn + * @iovad: - iova domain in question. + * @pfn: - pfn that is allocated previously + * This functions finds an iova for a given pfn and then + * frees the iova from that domain. + */ +void +free_iova(struct iova_domain *iovad, unsigned long pfn) +{ + struct iova *iova = find_iova(iovad, pfn); + if (iova) + __free_iova(iovad, iova); + +} + +/** + * put_iova_domain - destroys the iova doamin + * @iovad: - iova domain in question. + * All the iova's in that domain are destroyed. + */ +void put_iova_domain(struct iova_domain *iovad) +{ + struct rb_node *node; + unsigned long flags; + + spin_lock_irqsave(&iovad->iova_rbtree_lock, flags); + node = rb_first(&iovad->rbroot); + while (node) { + struct iova *iova = container_of(node, struct iova, node); + rb_erase(node, &iovad->rbroot); + free_iova_mem(iova); + node = rb_first(&iovad->rbroot); + } + spin_unlock_irqrestore(&iovad->iova_rbtree_lock, flags); +} + +static int +__is_range_overlap(struct rb_node *node, + unsigned long pfn_lo, unsigned long pfn_hi) +{ + struct iova *iova = container_of(node, struct iova, node); + + if ((pfn_lo <= iova->pfn_hi) && (pfn_hi >= iova->pfn_lo)) + return 1; + return 0; +} + +static inline struct iova * +alloc_and_init_iova(unsigned long pfn_lo, unsigned long pfn_hi) +{ + struct iova *iova; + + iova = alloc_iova_mem(); + if (iova) { + iova->pfn_lo = pfn_lo; + iova->pfn_hi = pfn_hi; + } + + return iova; +} + +static struct iova * +__insert_new_range(struct iova_domain *iovad, + unsigned long pfn_lo, unsigned long pfn_hi) +{ + struct iova *iova; + + iova = alloc_and_init_iova(pfn_lo, pfn_hi); + if (iova) + iova_insert_rbtree(&iovad->rbroot, iova); + + return iova; +} + +static void +__adjust_overlap_range(struct iova *iova, + unsigned long *pfn_lo, unsigned long *pfn_hi) +{ + if (*pfn_lo < iova->pfn_lo) + iova->pfn_lo = *pfn_lo; + if (*pfn_hi > iova->pfn_hi) + *pfn_lo = iova->pfn_hi + 1; +} + +/** + * reserve_iova - reserves an iova in the given range + * @iovad: - iova domain pointer + * @pfn_lo: - lower page frame address + * @pfn_hi:- higher pfn adderss + * This function allocates reserves the address range from pfn_lo to pfn_hi so + * that this address is not dished out as part of alloc_iova. + */ +struct iova * +reserve_iova(struct iova_domain *iovad, + unsigned long pfn_lo, unsigned long pfn_hi) +{ + struct rb_node *node; + unsigned long flags; + struct iova *iova; + unsigned int overlap = 0; + + spin_lock_irqsave(&iovad->iova_rbtree_lock, flags); + for (node = rb_first(&iovad->rbroot); node; node = rb_next(node)) { + if (__is_range_overlap(node, pfn_lo, pfn_hi)) { + iova = container_of(node, struct iova, node); + __adjust_overlap_range(iova, &pfn_lo, &pfn_hi); + if ((pfn_lo >= iova->pfn_lo) && + (pfn_hi <= iova->pfn_hi)) + goto finish; + overlap = 1; + + } else if (overlap) + break; + } + + /* We are here either because this is the first reserver node + * or need to insert remaining non overlap addr range + */ + iova = __insert_new_range(iovad, pfn_lo, pfn_hi); +finish: + + spin_unlock_irqrestore(&iovad->iova_rbtree_lock, flags); + return iova; +} + +/** + * copy_reserved_iova - copies the reserved between domains + * @from: - source doamin from where to copy + * @to: - destination domin where to copy + * This function copies reserved iova's from one doamin to + * other. + */ +void +copy_reserved_iova(struct iova_domain *from, struct iova_domain *to) +{ + unsigned long flags; + struct rb_node *node; + + spin_lock_irqsave(&from->iova_rbtree_lock, flags); + for (node = rb_first(&from->rbroot); node; node = rb_next(node)) { + struct iova *iova = container_of(node, struct iova, node); + struct iova *new_iova; + new_iova = reserve_iova(to, iova->pfn_lo, iova->pfn_hi); + if (!new_iova) + printk(KERN_ERR "Reserve iova range %lx@%lx failed\n", + iova->pfn_lo, iova->pfn_lo); + } + spin_unlock_irqrestore(&from->iova_rbtree_lock, flags); +} + +struct iova * +split_and_remove_iova(struct iova_domain *iovad, struct iova *iova, + unsigned long pfn_lo, unsigned long pfn_hi) +{ + unsigned long flags; + struct iova *prev = NULL, *next = NULL; + + spin_lock_irqsave(&iovad->iova_rbtree_lock, flags); + if (iova->pfn_lo < pfn_lo) { + prev = alloc_and_init_iova(iova->pfn_lo, pfn_lo - 1); + if (prev == NULL) + goto error; + } + if (iova->pfn_hi > pfn_hi) { + next = alloc_and_init_iova(pfn_hi + 1, iova->pfn_hi); + if (next == NULL) + goto error; + } + + __cached_rbnode_delete_update(iovad, iova); + rb_erase(&iova->node, &iovad->rbroot); + + if (prev) { + iova_insert_rbtree(&iovad->rbroot, prev); + iova->pfn_lo = pfn_lo; + } + if (next) { + iova_insert_rbtree(&iovad->rbroot, next); + iova->pfn_hi = pfn_hi; + } + spin_unlock_irqrestore(&iovad->iova_rbtree_lock, flags); + + return iova; + +error: + spin_unlock_irqrestore(&iovad->iova_rbtree_lock, flags); + if (prev) + free_iova_mem(prev); + return NULL; +} diff --git a/drivers/iommu/ipmmu-vmsa.c b/drivers/iommu/ipmmu-vmsa.c new file mode 100644 index 000000000..1a67c531a --- /dev/null +++ b/drivers/iommu/ipmmu-vmsa.c @@ -0,0 +1,895 @@ +/* + * IPMMU VMSA + * + * Copyright (C) 2014 Renesas Electronics Corporation + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; version 2 of the License. + */ + +#include <linux/delay.h> +#include <linux/dma-mapping.h> +#include <linux/err.h> +#include <linux/export.h> +#include <linux/interrupt.h> +#include <linux/io.h> +#include <linux/iommu.h> +#include <linux/module.h> +#include <linux/of.h> +#include <linux/platform_device.h> +#include <linux/sizes.h> +#include <linux/slab.h> + +#include <asm/dma-iommu.h> +#include <asm/pgalloc.h> + +#include "io-pgtable.h" + +struct ipmmu_vmsa_device { + struct device *dev; + void __iomem *base; + struct list_head list; + + unsigned int num_utlbs; + + struct dma_iommu_mapping *mapping; +}; + +struct ipmmu_vmsa_domain { + struct ipmmu_vmsa_device *mmu; + struct iommu_domain io_domain; + + struct io_pgtable_cfg cfg; + struct io_pgtable_ops *iop; + + unsigned int context_id; + spinlock_t lock; /* Protects mappings */ +}; + +struct ipmmu_vmsa_archdata { + struct ipmmu_vmsa_device *mmu; + unsigned int *utlbs; + unsigned int num_utlbs; +}; + +static DEFINE_SPINLOCK(ipmmu_devices_lock); +static LIST_HEAD(ipmmu_devices); + +static struct ipmmu_vmsa_domain *to_vmsa_domain(struct iommu_domain *dom) +{ + return container_of(dom, struct ipmmu_vmsa_domain, io_domain); +} + +#define TLB_LOOP_TIMEOUT 100 /* 100us */ + +/* ----------------------------------------------------------------------------- + * Registers Definition + */ + +#define IM_NS_ALIAS_OFFSET 0x800 + +#define IM_CTX_SIZE 0x40 + +#define IMCTR 0x0000 +#define IMCTR_TRE (1 << 17) +#define IMCTR_AFE (1 << 16) +#define IMCTR_RTSEL_MASK (3 << 4) +#define IMCTR_RTSEL_SHIFT 4 +#define IMCTR_TREN (1 << 3) +#define IMCTR_INTEN (1 << 2) +#define IMCTR_FLUSH (1 << 1) +#define IMCTR_MMUEN (1 << 0) + +#define IMCAAR 0x0004 + +#define IMTTBCR 0x0008 +#define IMTTBCR_EAE (1 << 31) +#define IMTTBCR_PMB (1 << 30) +#define IMTTBCR_SH1_NON_SHAREABLE (0 << 28) +#define IMTTBCR_SH1_OUTER_SHAREABLE (2 << 28) +#define IMTTBCR_SH1_INNER_SHAREABLE (3 << 28) +#define IMTTBCR_SH1_MASK (3 << 28) +#define IMTTBCR_ORGN1_NC (0 << 26) +#define IMTTBCR_ORGN1_WB_WA (1 << 26) +#define IMTTBCR_ORGN1_WT (2 << 26) +#define IMTTBCR_ORGN1_WB (3 << 26) +#define IMTTBCR_ORGN1_MASK (3 << 26) +#define IMTTBCR_IRGN1_NC (0 << 24) +#define IMTTBCR_IRGN1_WB_WA (1 << 24) +#define IMTTBCR_IRGN1_WT (2 << 24) +#define IMTTBCR_IRGN1_WB (3 << 24) +#define IMTTBCR_IRGN1_MASK (3 << 24) +#define IMTTBCR_TSZ1_MASK (7 << 16) +#define IMTTBCR_TSZ1_SHIFT 16 +#define IMTTBCR_SH0_NON_SHAREABLE (0 << 12) +#define IMTTBCR_SH0_OUTER_SHAREABLE (2 << 12) +#define IMTTBCR_SH0_INNER_SHAREABLE (3 << 12) +#define IMTTBCR_SH0_MASK (3 << 12) +#define IMTTBCR_ORGN0_NC (0 << 10) +#define IMTTBCR_ORGN0_WB_WA (1 << 10) +#define IMTTBCR_ORGN0_WT (2 << 10) +#define IMTTBCR_ORGN0_WB (3 << 10) +#define IMTTBCR_ORGN0_MASK (3 << 10) +#define IMTTBCR_IRGN0_NC (0 << 8) +#define IMTTBCR_IRGN0_WB_WA (1 << 8) +#define IMTTBCR_IRGN0_WT (2 << 8) +#define IMTTBCR_IRGN0_WB (3 << 8) +#define IMTTBCR_IRGN0_MASK (3 << 8) +#define IMTTBCR_SL0_LVL_2 (0 << 4) +#define IMTTBCR_SL0_LVL_1 (1 << 4) +#define IMTTBCR_TSZ0_MASK (7 << 0) +#define IMTTBCR_TSZ0_SHIFT O + +#define IMBUSCR 0x000c +#define IMBUSCR_DVM (1 << 2) +#define IMBUSCR_BUSSEL_SYS (0 << 0) +#define IMBUSCR_BUSSEL_CCI (1 << 0) +#define IMBUSCR_BUSSEL_IMCAAR (2 << 0) +#define IMBUSCR_BUSSEL_CCI_IMCAAR (3 << 0) +#define IMBUSCR_BUSSEL_MASK (3 << 0) + +#define IMTTLBR0 0x0010 +#define IMTTUBR0 0x0014 +#define IMTTLBR1 0x0018 +#define IMTTUBR1 0x001c + +#define IMSTR 0x0020 +#define IMSTR_ERRLVL_MASK (3 << 12) +#define IMSTR_ERRLVL_SHIFT 12 +#define IMSTR_ERRCODE_TLB_FORMAT (1 << 8) +#define IMSTR_ERRCODE_ACCESS_PERM (4 << 8) +#define IMSTR_ERRCODE_SECURE_ACCESS (5 << 8) +#define IMSTR_ERRCODE_MASK (7 << 8) +#define IMSTR_MHIT (1 << 4) +#define IMSTR_ABORT (1 << 2) +#define IMSTR_PF (1 << 1) +#define IMSTR_TF (1 << 0) + +#define IMMAIR0 0x0028 +#define IMMAIR1 0x002c +#define IMMAIR_ATTR_MASK 0xff +#define IMMAIR_ATTR_DEVICE 0x04 +#define IMMAIR_ATTR_NC 0x44 +#define IMMAIR_ATTR_WBRWA 0xff +#define IMMAIR_ATTR_SHIFT(n) ((n) << 3) +#define IMMAIR_ATTR_IDX_NC 0 +#define IMMAIR_ATTR_IDX_WBRWA 1 +#define IMMAIR_ATTR_IDX_DEV 2 + +#define IMEAR 0x0030 + +#define IMPCTR 0x0200 +#define IMPSTR 0x0208 +#define IMPEAR 0x020c +#define IMPMBA(n) (0x0280 + ((n) * 4)) +#define IMPMBD(n) (0x02c0 + ((n) * 4)) + +#define IMUCTR(n) (0x0300 + ((n) * 16)) +#define IMUCTR_FIXADDEN (1 << 31) +#define IMUCTR_FIXADD_MASK (0xff << 16) +#define IMUCTR_FIXADD_SHIFT 16 +#define IMUCTR_TTSEL_MMU(n) ((n) << 4) +#define IMUCTR_TTSEL_PMB (8 << 4) +#define IMUCTR_TTSEL_MASK (15 << 4) +#define IMUCTR_FLUSH (1 << 1) +#define IMUCTR_MMUEN (1 << 0) + +#define IMUASID(n) (0x0308 + ((n) * 16)) +#define IMUASID_ASID8_MASK (0xff << 8) +#define IMUASID_ASID8_SHIFT 8 +#define IMUASID_ASID0_MASK (0xff << 0) +#define IMUASID_ASID0_SHIFT 0 + +/* ----------------------------------------------------------------------------- + * Read/Write Access + */ + +static u32 ipmmu_read(struct ipmmu_vmsa_device *mmu, unsigned int offset) +{ + return ioread32(mmu->base + offset); +} + +static void ipmmu_write(struct ipmmu_vmsa_device *mmu, unsigned int offset, + u32 data) +{ + iowrite32(data, mmu->base + offset); +} + +static u32 ipmmu_ctx_read(struct ipmmu_vmsa_domain *domain, unsigned int reg) +{ + return ipmmu_read(domain->mmu, domain->context_id * IM_CTX_SIZE + reg); +} + +static void ipmmu_ctx_write(struct ipmmu_vmsa_domain *domain, unsigned int reg, + u32 data) +{ + ipmmu_write(domain->mmu, domain->context_id * IM_CTX_SIZE + reg, data); +} + +/* ----------------------------------------------------------------------------- + * TLB and microTLB Management + */ + +/* Wait for any pending TLB invalidations to complete */ +static void ipmmu_tlb_sync(struct ipmmu_vmsa_domain *domain) +{ + unsigned int count = 0; + + while (ipmmu_ctx_read(domain, IMCTR) & IMCTR_FLUSH) { + cpu_relax(); + if (++count == TLB_LOOP_TIMEOUT) { + dev_err_ratelimited(domain->mmu->dev, + "TLB sync timed out -- MMU may be deadlocked\n"); + return; + } + udelay(1); + } +} + +static void ipmmu_tlb_invalidate(struct ipmmu_vmsa_domain *domain) +{ + u32 reg; + + reg = ipmmu_ctx_read(domain, IMCTR); + reg |= IMCTR_FLUSH; + ipmmu_ctx_write(domain, IMCTR, reg); + + ipmmu_tlb_sync(domain); +} + +/* + * Enable MMU translation for the microTLB. + */ +static void ipmmu_utlb_enable(struct ipmmu_vmsa_domain *domain, + unsigned int utlb) +{ + struct ipmmu_vmsa_device *mmu = domain->mmu; + + /* + * TODO: Reference-count the microTLB as several bus masters can be + * connected to the same microTLB. + */ + + /* TODO: What should we set the ASID to ? */ + ipmmu_write(mmu, IMUASID(utlb), 0); + /* TODO: Do we need to flush the microTLB ? */ + ipmmu_write(mmu, IMUCTR(utlb), + IMUCTR_TTSEL_MMU(domain->context_id) | IMUCTR_FLUSH | + IMUCTR_MMUEN); +} + +/* + * Disable MMU translation for the microTLB. + */ +static void ipmmu_utlb_disable(struct ipmmu_vmsa_domain *domain, + unsigned int utlb) +{ + struct ipmmu_vmsa_device *mmu = domain->mmu; + + ipmmu_write(mmu, IMUCTR(utlb), 0); +} + +static void ipmmu_tlb_flush_all(void *cookie) +{ + struct ipmmu_vmsa_domain *domain = cookie; + + ipmmu_tlb_invalidate(domain); +} + +static void ipmmu_tlb_add_flush(unsigned long iova, size_t size, bool leaf, + void *cookie) +{ + /* The hardware doesn't support selective TLB flush. */ +} + +static void ipmmu_flush_pgtable(void *ptr, size_t size, void *cookie) +{ + unsigned long offset = (unsigned long)ptr & ~PAGE_MASK; + struct ipmmu_vmsa_domain *domain = cookie; + + /* + * TODO: Add support for coherent walk through CCI with DVM and remove + * cache handling. + */ + dma_map_page(domain->mmu->dev, virt_to_page(ptr), offset, size, + DMA_TO_DEVICE); +} + +static struct iommu_gather_ops ipmmu_gather_ops = { + .tlb_flush_all = ipmmu_tlb_flush_all, + .tlb_add_flush = ipmmu_tlb_add_flush, + .tlb_sync = ipmmu_tlb_flush_all, + .flush_pgtable = ipmmu_flush_pgtable, +}; + +/* ----------------------------------------------------------------------------- + * Domain/Context Management + */ + +static int ipmmu_domain_init_context(struct ipmmu_vmsa_domain *domain) +{ + phys_addr_t ttbr; + + /* + * Allocate the page table operations. + * + * VMSA states in section B3.6.3 "Control of Secure or Non-secure memory + * access, Long-descriptor format" that the NStable bit being set in a + * table descriptor will result in the NStable and NS bits of all child + * entries being ignored and considered as being set. The IPMMU seems + * not to comply with this, as it generates a secure access page fault + * if any of the NStable and NS bits isn't set when running in + * non-secure mode. + */ + domain->cfg.quirks = IO_PGTABLE_QUIRK_ARM_NS; + domain->cfg.pgsize_bitmap = SZ_1G | SZ_2M | SZ_4K, + domain->cfg.ias = 32; + domain->cfg.oas = 40; + domain->cfg.tlb = &ipmmu_gather_ops; + + domain->iop = alloc_io_pgtable_ops(ARM_32_LPAE_S1, &domain->cfg, + domain); + if (!domain->iop) + return -EINVAL; + + /* + * TODO: When adding support for multiple contexts, find an unused + * context. + */ + domain->context_id = 0; + + /* TTBR0 */ + ttbr = domain->cfg.arm_lpae_s1_cfg.ttbr[0]; + ipmmu_ctx_write(domain, IMTTLBR0, ttbr); + ipmmu_ctx_write(domain, IMTTUBR0, ttbr >> 32); + + /* + * TTBCR + * We use long descriptors with inner-shareable WBWA tables and allocate + * the whole 32-bit VA space to TTBR0. + */ + ipmmu_ctx_write(domain, IMTTBCR, IMTTBCR_EAE | + IMTTBCR_SH0_INNER_SHAREABLE | IMTTBCR_ORGN0_WB_WA | + IMTTBCR_IRGN0_WB_WA | IMTTBCR_SL0_LVL_1); + + /* MAIR0 */ + ipmmu_ctx_write(domain, IMMAIR0, domain->cfg.arm_lpae_s1_cfg.mair[0]); + + /* IMBUSCR */ + ipmmu_ctx_write(domain, IMBUSCR, + ipmmu_ctx_read(domain, IMBUSCR) & + ~(IMBUSCR_DVM | IMBUSCR_BUSSEL_MASK)); + + /* + * IMSTR + * Clear all interrupt flags. + */ + ipmmu_ctx_write(domain, IMSTR, ipmmu_ctx_read(domain, IMSTR)); + + /* + * IMCTR + * Enable the MMU and interrupt generation. The long-descriptor + * translation table format doesn't use TEX remapping. Don't enable AF + * software management as we have no use for it. Flush the TLB as + * required when modifying the context registers. + */ + ipmmu_ctx_write(domain, IMCTR, IMCTR_INTEN | IMCTR_FLUSH | IMCTR_MMUEN); + + return 0; +} + +static void ipmmu_domain_destroy_context(struct ipmmu_vmsa_domain *domain) +{ + /* + * Disable the context. Flush the TLB as required when modifying the + * context registers. + * + * TODO: Is TLB flush really needed ? + */ + ipmmu_ctx_write(domain, IMCTR, IMCTR_FLUSH); + ipmmu_tlb_sync(domain); +} + +/* ----------------------------------------------------------------------------- + * Fault Handling + */ + +static irqreturn_t ipmmu_domain_irq(struct ipmmu_vmsa_domain *domain) +{ + const u32 err_mask = IMSTR_MHIT | IMSTR_ABORT | IMSTR_PF | IMSTR_TF; + struct ipmmu_vmsa_device *mmu = domain->mmu; + u32 status; + u32 iova; + + status = ipmmu_ctx_read(domain, IMSTR); + if (!(status & err_mask)) + return IRQ_NONE; + + iova = ipmmu_ctx_read(domain, IMEAR); + + /* + * Clear the error status flags. Unlike traditional interrupt flag + * registers that must be cleared by writing 1, this status register + * seems to require 0. The error address register must be read before, + * otherwise its value will be 0. + */ + ipmmu_ctx_write(domain, IMSTR, 0); + + /* Log fatal errors. */ + if (status & IMSTR_MHIT) + dev_err_ratelimited(mmu->dev, "Multiple TLB hits @0x%08x\n", + iova); + if (status & IMSTR_ABORT) + dev_err_ratelimited(mmu->dev, "Page Table Walk Abort @0x%08x\n", + iova); + + if (!(status & (IMSTR_PF | IMSTR_TF))) + return IRQ_NONE; + + /* + * Try to handle page faults and translation faults. + * + * TODO: We need to look up the faulty device based on the I/O VA. Use + * the IOMMU device for now. + */ + if (!report_iommu_fault(&domain->io_domain, mmu->dev, iova, 0)) + return IRQ_HANDLED; + + dev_err_ratelimited(mmu->dev, + "Unhandled fault: status 0x%08x iova 0x%08x\n", + status, iova); + + return IRQ_HANDLED; +} + +static irqreturn_t ipmmu_irq(int irq, void *dev) +{ + struct ipmmu_vmsa_device *mmu = dev; + struct iommu_domain *io_domain; + struct ipmmu_vmsa_domain *domain; + + if (!mmu->mapping) + return IRQ_NONE; + + io_domain = mmu->mapping->domain; + domain = to_vmsa_domain(io_domain); + + return ipmmu_domain_irq(domain); +} + +/* ----------------------------------------------------------------------------- + * IOMMU Operations + */ + +static struct iommu_domain *ipmmu_domain_alloc(unsigned type) +{ + struct ipmmu_vmsa_domain *domain; + + if (type != IOMMU_DOMAIN_UNMANAGED) + return NULL; + + domain = kzalloc(sizeof(*domain), GFP_KERNEL); + if (!domain) + return NULL; + + spin_lock_init(&domain->lock); + + return &domain->io_domain; +} + +static void ipmmu_domain_free(struct iommu_domain *io_domain) +{ + struct ipmmu_vmsa_domain *domain = to_vmsa_domain(io_domain); + + /* + * Free the domain resources. We assume that all devices have already + * been detached. + */ + ipmmu_domain_destroy_context(domain); + free_io_pgtable_ops(domain->iop); + kfree(domain); +} + +static int ipmmu_attach_device(struct iommu_domain *io_domain, + struct device *dev) +{ + struct ipmmu_vmsa_archdata *archdata = dev->archdata.iommu; + struct ipmmu_vmsa_device *mmu = archdata->mmu; + struct ipmmu_vmsa_domain *domain = to_vmsa_domain(io_domain); + unsigned long flags; + unsigned int i; + int ret = 0; + + if (!mmu) { + dev_err(dev, "Cannot attach to IPMMU\n"); + return -ENXIO; + } + + spin_lock_irqsave(&domain->lock, flags); + + if (!domain->mmu) { + /* The domain hasn't been used yet, initialize it. */ + domain->mmu = mmu; + ret = ipmmu_domain_init_context(domain); + } else if (domain->mmu != mmu) { + /* + * Something is wrong, we can't attach two devices using + * different IOMMUs to the same domain. + */ + dev_err(dev, "Can't attach IPMMU %s to domain on IPMMU %s\n", + dev_name(mmu->dev), dev_name(domain->mmu->dev)); + ret = -EINVAL; + } + + spin_unlock_irqrestore(&domain->lock, flags); + + if (ret < 0) + return ret; + + for (i = 0; i < archdata->num_utlbs; ++i) + ipmmu_utlb_enable(domain, archdata->utlbs[i]); + + return 0; +} + +static void ipmmu_detach_device(struct iommu_domain *io_domain, + struct device *dev) +{ + struct ipmmu_vmsa_archdata *archdata = dev->archdata.iommu; + struct ipmmu_vmsa_domain *domain = to_vmsa_domain(io_domain); + unsigned int i; + + for (i = 0; i < archdata->num_utlbs; ++i) + ipmmu_utlb_disable(domain, archdata->utlbs[i]); + + /* + * TODO: Optimize by disabling the context when no device is attached. + */ +} + +static int ipmmu_map(struct iommu_domain *io_domain, unsigned long iova, + phys_addr_t paddr, size_t size, int prot) +{ + struct ipmmu_vmsa_domain *domain = to_vmsa_domain(io_domain); + + if (!domain) + return -ENODEV; + + return domain->iop->map(domain->iop, iova, paddr, size, prot); +} + +static size_t ipmmu_unmap(struct iommu_domain *io_domain, unsigned long iova, + size_t size) +{ + struct ipmmu_vmsa_domain *domain = to_vmsa_domain(io_domain); + + return domain->iop->unmap(domain->iop, iova, size); +} + +static phys_addr_t ipmmu_iova_to_phys(struct iommu_domain *io_domain, + dma_addr_t iova) +{ + struct ipmmu_vmsa_domain *domain = to_vmsa_domain(io_domain); + + /* TODO: Is locking needed ? */ + + return domain->iop->iova_to_phys(domain->iop, iova); +} + +static int ipmmu_find_utlbs(struct ipmmu_vmsa_device *mmu, struct device *dev, + unsigned int *utlbs, unsigned int num_utlbs) +{ + unsigned int i; + + for (i = 0; i < num_utlbs; ++i) { + struct of_phandle_args args; + int ret; + + ret = of_parse_phandle_with_args(dev->of_node, "iommus", + "#iommu-cells", i, &args); + if (ret < 0) + return ret; + + of_node_put(args.np); + + if (args.np != mmu->dev->of_node || args.args_count != 1) + return -EINVAL; + + utlbs[i] = args.args[0]; + } + + return 0; +} + +static int ipmmu_add_device(struct device *dev) +{ + struct ipmmu_vmsa_archdata *archdata; + struct ipmmu_vmsa_device *mmu; + struct iommu_group *group = NULL; + unsigned int *utlbs; + unsigned int i; + int num_utlbs; + int ret = -ENODEV; + + if (dev->archdata.iommu) { + dev_warn(dev, "IOMMU driver already assigned to device %s\n", + dev_name(dev)); + return -EINVAL; + } + + /* Find the master corresponding to the device. */ + + num_utlbs = of_count_phandle_with_args(dev->of_node, "iommus", + "#iommu-cells"); + if (num_utlbs < 0) + return -ENODEV; + + utlbs = kcalloc(num_utlbs, sizeof(*utlbs), GFP_KERNEL); + if (!utlbs) + return -ENOMEM; + + spin_lock(&ipmmu_devices_lock); + + list_for_each_entry(mmu, &ipmmu_devices, list) { + ret = ipmmu_find_utlbs(mmu, dev, utlbs, num_utlbs); + if (!ret) { + /* + * TODO Take a reference to the MMU to protect + * against device removal. + */ + break; + } + } + + spin_unlock(&ipmmu_devices_lock); + + if (ret < 0) + return -ENODEV; + + for (i = 0; i < num_utlbs; ++i) { + if (utlbs[i] >= mmu->num_utlbs) { + ret = -EINVAL; + goto error; + } + } + + /* Create a device group and add the device to it. */ + group = iommu_group_alloc(); + if (IS_ERR(group)) { + dev_err(dev, "Failed to allocate IOMMU group\n"); + ret = PTR_ERR(group); + goto error; + } + + ret = iommu_group_add_device(group, dev); + iommu_group_put(group); + + if (ret < 0) { + dev_err(dev, "Failed to add device to IPMMU group\n"); + group = NULL; + goto error; + } + + archdata = kzalloc(sizeof(*archdata), GFP_KERNEL); + if (!archdata) { + ret = -ENOMEM; + goto error; + } + + archdata->mmu = mmu; + archdata->utlbs = utlbs; + archdata->num_utlbs = num_utlbs; + dev->archdata.iommu = archdata; + + /* + * Create the ARM mapping, used by the ARM DMA mapping core to allocate + * VAs. This will allocate a corresponding IOMMU domain. + * + * TODO: + * - Create one mapping per context (TLB). + * - Make the mapping size configurable ? We currently use a 2GB mapping + * at a 1GB offset to ensure that NULL VAs will fault. + */ + if (!mmu->mapping) { + struct dma_iommu_mapping *mapping; + + mapping = arm_iommu_create_mapping(&platform_bus_type, + SZ_1G, SZ_2G); + if (IS_ERR(mapping)) { + dev_err(mmu->dev, "failed to create ARM IOMMU mapping\n"); + ret = PTR_ERR(mapping); + goto error; + } + + mmu->mapping = mapping; + } + + /* Attach the ARM VA mapping to the device. */ + ret = arm_iommu_attach_device(dev, mmu->mapping); + if (ret < 0) { + dev_err(dev, "Failed to attach device to VA mapping\n"); + goto error; + } + + return 0; + +error: + arm_iommu_release_mapping(mmu->mapping); + + kfree(dev->archdata.iommu); + kfree(utlbs); + + dev->archdata.iommu = NULL; + + if (!IS_ERR_OR_NULL(group)) + iommu_group_remove_device(dev); + + return ret; +} + +static void ipmmu_remove_device(struct device *dev) +{ + struct ipmmu_vmsa_archdata *archdata = dev->archdata.iommu; + + arm_iommu_detach_device(dev); + iommu_group_remove_device(dev); + + kfree(archdata->utlbs); + kfree(archdata); + + dev->archdata.iommu = NULL; +} + +static const struct iommu_ops ipmmu_ops = { + .domain_alloc = ipmmu_domain_alloc, + .domain_free = ipmmu_domain_free, + .attach_dev = ipmmu_attach_device, + .detach_dev = ipmmu_detach_device, + .map = ipmmu_map, + .unmap = ipmmu_unmap, + .map_sg = default_iommu_map_sg, + .iova_to_phys = ipmmu_iova_to_phys, + .add_device = ipmmu_add_device, + .remove_device = ipmmu_remove_device, + .pgsize_bitmap = SZ_1G | SZ_2M | SZ_4K, +}; + +/* ----------------------------------------------------------------------------- + * Probe/remove and init + */ + +static void ipmmu_device_reset(struct ipmmu_vmsa_device *mmu) +{ + unsigned int i; + + /* Disable all contexts. */ + for (i = 0; i < 4; ++i) + ipmmu_write(mmu, i * IM_CTX_SIZE + IMCTR, 0); +} + +static int ipmmu_probe(struct platform_device *pdev) +{ + struct ipmmu_vmsa_device *mmu; + struct resource *res; + int irq; + int ret; + + if (!IS_ENABLED(CONFIG_OF) && !pdev->dev.platform_data) { + dev_err(&pdev->dev, "missing platform data\n"); + return -EINVAL; + } + + mmu = devm_kzalloc(&pdev->dev, sizeof(*mmu), GFP_KERNEL); + if (!mmu) { + dev_err(&pdev->dev, "cannot allocate device data\n"); + return -ENOMEM; + } + + mmu->dev = &pdev->dev; + mmu->num_utlbs = 32; + + /* Map I/O memory and request IRQ. */ + res = platform_get_resource(pdev, IORESOURCE_MEM, 0); + mmu->base = devm_ioremap_resource(&pdev->dev, res); + if (IS_ERR(mmu->base)) + return PTR_ERR(mmu->base); + + /* + * The IPMMU has two register banks, for secure and non-secure modes. + * The bank mapped at the beginning of the IPMMU address space + * corresponds to the running mode of the CPU. When running in secure + * mode the non-secure register bank is also available at an offset. + * + * Secure mode operation isn't clearly documented and is thus currently + * not implemented in the driver. Furthermore, preliminary tests of + * non-secure operation with the main register bank were not successful. + * Offset the registers base unconditionally to point to the non-secure + * alias space for now. + */ + mmu->base += IM_NS_ALIAS_OFFSET; + + irq = platform_get_irq(pdev, 0); + if (irq < 0) { + dev_err(&pdev->dev, "no IRQ found\n"); + return irq; + } + + ret = devm_request_irq(&pdev->dev, irq, ipmmu_irq, 0, + dev_name(&pdev->dev), mmu); + if (ret < 0) { + dev_err(&pdev->dev, "failed to request IRQ %d\n", irq); + return ret; + } + + ipmmu_device_reset(mmu); + + /* + * We can't create the ARM mapping here as it requires the bus to have + * an IOMMU, which only happens when bus_set_iommu() is called in + * ipmmu_init() after the probe function returns. + */ + + spin_lock(&ipmmu_devices_lock); + list_add(&mmu->list, &ipmmu_devices); + spin_unlock(&ipmmu_devices_lock); + + platform_set_drvdata(pdev, mmu); + + return 0; +} + +static int ipmmu_remove(struct platform_device *pdev) +{ + struct ipmmu_vmsa_device *mmu = platform_get_drvdata(pdev); + + spin_lock(&ipmmu_devices_lock); + list_del(&mmu->list); + spin_unlock(&ipmmu_devices_lock); + + arm_iommu_release_mapping(mmu->mapping); + + ipmmu_device_reset(mmu); + + return 0; +} + +static const struct of_device_id ipmmu_of_ids[] = { + { .compatible = "renesas,ipmmu-vmsa", }, + { } +}; + +static struct platform_driver ipmmu_driver = { + .driver = { + .name = "ipmmu-vmsa", + .of_match_table = of_match_ptr(ipmmu_of_ids), + }, + .probe = ipmmu_probe, + .remove = ipmmu_remove, +}; + +static int __init ipmmu_init(void) +{ + int ret; + + ret = platform_driver_register(&ipmmu_driver); + if (ret < 0) + return ret; + + if (!iommu_present(&platform_bus_type)) + bus_set_iommu(&platform_bus_type, &ipmmu_ops); + + return 0; +} + +static void __exit ipmmu_exit(void) +{ + return platform_driver_unregister(&ipmmu_driver); +} + +subsys_initcall(ipmmu_init); +module_exit(ipmmu_exit); + +MODULE_DESCRIPTION("IOMMU API for Renesas VMSA-compatible IPMMU"); +MODULE_AUTHOR("Laurent Pinchart <laurent.pinchart@ideasonboard.com>"); +MODULE_LICENSE("GPL v2"); diff --git a/drivers/iommu/irq_remapping.c b/drivers/iommu/irq_remapping.c new file mode 100644 index 000000000..390079ee1 --- /dev/null +++ b/drivers/iommu/irq_remapping.c @@ -0,0 +1,366 @@ +#include <linux/seq_file.h> +#include <linux/cpumask.h> +#include <linux/kernel.h> +#include <linux/string.h> +#include <linux/errno.h> +#include <linux/msi.h> +#include <linux/irq.h> +#include <linux/pci.h> + +#include <asm/hw_irq.h> +#include <asm/irq_remapping.h> +#include <asm/processor.h> +#include <asm/x86_init.h> +#include <asm/apic.h> +#include <asm/hpet.h> + +#include "irq_remapping.h" + +int irq_remapping_enabled; +int irq_remap_broken; +int disable_sourceid_checking; +int no_x2apic_optout; + +static int disable_irq_remap; +static struct irq_remap_ops *remap_ops; + +static int msi_alloc_remapped_irq(struct pci_dev *pdev, int irq, int nvec); +static int msi_setup_remapped_irq(struct pci_dev *pdev, unsigned int irq, + int index, int sub_handle); +static int set_remapped_irq_affinity(struct irq_data *data, + const struct cpumask *mask, + bool force); + +static bool irq_remapped(struct irq_cfg *cfg) +{ + return (cfg->remapped == 1); +} + +static void irq_remapping_disable_io_apic(void) +{ + /* + * With interrupt-remapping, for now we will use virtual wire A + * mode, as virtual wire B is little complex (need to configure + * both IOAPIC RTE as well as interrupt-remapping table entry). + * As this gets called during crash dump, keep this simple for + * now. + */ + if (cpu_has_apic || apic_from_smp_config()) + disconnect_bsp_APIC(0); +} + +static int do_setup_msi_irqs(struct pci_dev *dev, int nvec) +{ + int ret, sub_handle, nvec_pow2, index = 0; + unsigned int irq; + struct msi_desc *msidesc; + + msidesc = list_entry(dev->msi_list.next, struct msi_desc, list); + + irq = irq_alloc_hwirqs(nvec, dev_to_node(&dev->dev)); + if (irq == 0) + return -ENOSPC; + + nvec_pow2 = __roundup_pow_of_two(nvec); + for (sub_handle = 0; sub_handle < nvec; sub_handle++) { + if (!sub_handle) { + index = msi_alloc_remapped_irq(dev, irq, nvec_pow2); + if (index < 0) { + ret = index; + goto error; + } + } else { + ret = msi_setup_remapped_irq(dev, irq + sub_handle, + index, sub_handle); + if (ret < 0) + goto error; + } + ret = setup_msi_irq(dev, msidesc, irq, sub_handle); + if (ret < 0) + goto error; + } + return 0; + +error: + irq_free_hwirqs(irq, nvec); + + /* + * Restore altered MSI descriptor fields and prevent just destroyed + * IRQs from tearing down again in default_teardown_msi_irqs() + */ + msidesc->irq = 0; + + return ret; +} + +static int do_setup_msix_irqs(struct pci_dev *dev, int nvec) +{ + int node, ret, sub_handle, index = 0; + struct msi_desc *msidesc; + unsigned int irq; + + node = dev_to_node(&dev->dev); + sub_handle = 0; + + list_for_each_entry(msidesc, &dev->msi_list, list) { + + irq = irq_alloc_hwirq(node); + if (irq == 0) + return -1; + + if (sub_handle == 0) + ret = index = msi_alloc_remapped_irq(dev, irq, nvec); + else + ret = msi_setup_remapped_irq(dev, irq, index, sub_handle); + + if (ret < 0) + goto error; + + ret = setup_msi_irq(dev, msidesc, irq, 0); + if (ret < 0) + goto error; + + sub_handle += 1; + irq += 1; + } + + return 0; + +error: + irq_free_hwirq(irq); + return ret; +} + +static int irq_remapping_setup_msi_irqs(struct pci_dev *dev, + int nvec, int type) +{ + if (type == PCI_CAP_ID_MSI) + return do_setup_msi_irqs(dev, nvec); + else + return do_setup_msix_irqs(dev, nvec); +} + +static void eoi_ioapic_pin_remapped(int apic, int pin, int vector) +{ + /* + * Intr-remapping uses pin number as the virtual vector + * in the RTE. Actual vector is programmed in + * intr-remapping table entry. Hence for the io-apic + * EOI we use the pin number. + */ + io_apic_eoi(apic, pin); +} + +static void __init irq_remapping_modify_x86_ops(void) +{ + x86_io_apic_ops.disable = irq_remapping_disable_io_apic; + x86_io_apic_ops.set_affinity = set_remapped_irq_affinity; + x86_io_apic_ops.setup_entry = setup_ioapic_remapped_entry; + x86_io_apic_ops.eoi_ioapic_pin = eoi_ioapic_pin_remapped; + x86_msi.setup_msi_irqs = irq_remapping_setup_msi_irqs; + x86_msi.setup_hpet_msi = setup_hpet_msi_remapped; + x86_msi.compose_msi_msg = compose_remapped_msi_msg; +} + +static __init int setup_nointremap(char *str) +{ + disable_irq_remap = 1; + return 0; +} +early_param("nointremap", setup_nointremap); + +static __init int setup_irqremap(char *str) +{ + if (!str) + return -EINVAL; + + while (*str) { + if (!strncmp(str, "on", 2)) + disable_irq_remap = 0; + else if (!strncmp(str, "off", 3)) + disable_irq_remap = 1; + else if (!strncmp(str, "nosid", 5)) + disable_sourceid_checking = 1; + else if (!strncmp(str, "no_x2apic_optout", 16)) + no_x2apic_optout = 1; + + str += strcspn(str, ","); + while (*str == ',') + str++; + } + + return 0; +} +early_param("intremap", setup_irqremap); + +void set_irq_remapping_broken(void) +{ + irq_remap_broken = 1; +} + +int __init irq_remapping_prepare(void) +{ + if (disable_irq_remap) + return -ENOSYS; + + if (intel_irq_remap_ops.prepare() == 0) + remap_ops = &intel_irq_remap_ops; + else if (IS_ENABLED(CONFIG_AMD_IOMMU) && + amd_iommu_irq_ops.prepare() == 0) + remap_ops = &amd_iommu_irq_ops; + else + return -ENOSYS; + + return 0; +} + +int __init irq_remapping_enable(void) +{ + int ret; + + if (!remap_ops->enable) + return -ENODEV; + + ret = remap_ops->enable(); + + if (irq_remapping_enabled) + irq_remapping_modify_x86_ops(); + + return ret; +} + +void irq_remapping_disable(void) +{ + if (irq_remapping_enabled && remap_ops->disable) + remap_ops->disable(); +} + +int irq_remapping_reenable(int mode) +{ + if (irq_remapping_enabled && remap_ops->reenable) + return remap_ops->reenable(mode); + + return 0; +} + +int __init irq_remap_enable_fault_handling(void) +{ + if (!irq_remapping_enabled) + return 0; + + if (!remap_ops->enable_faulting) + return -ENODEV; + + return remap_ops->enable_faulting(); +} + +int setup_ioapic_remapped_entry(int irq, + struct IO_APIC_route_entry *entry, + unsigned int destination, int vector, + struct io_apic_irq_attr *attr) +{ + if (!remap_ops->setup_ioapic_entry) + return -ENODEV; + + return remap_ops->setup_ioapic_entry(irq, entry, destination, + vector, attr); +} + +static int set_remapped_irq_affinity(struct irq_data *data, + const struct cpumask *mask, bool force) +{ + if (!config_enabled(CONFIG_SMP) || !remap_ops->set_affinity) + return 0; + + return remap_ops->set_affinity(data, mask, force); +} + +void free_remapped_irq(int irq) +{ + struct irq_cfg *cfg = irq_cfg(irq); + + if (irq_remapped(cfg) && remap_ops->free_irq) + remap_ops->free_irq(irq); +} + +void compose_remapped_msi_msg(struct pci_dev *pdev, + unsigned int irq, unsigned int dest, + struct msi_msg *msg, u8 hpet_id) +{ + struct irq_cfg *cfg = irq_cfg(irq); + + if (!irq_remapped(cfg)) + native_compose_msi_msg(pdev, irq, dest, msg, hpet_id); + else if (remap_ops->compose_msi_msg) + remap_ops->compose_msi_msg(pdev, irq, dest, msg, hpet_id); +} + +static int msi_alloc_remapped_irq(struct pci_dev *pdev, int irq, int nvec) +{ + if (!remap_ops->msi_alloc_irq) + return -ENODEV; + + return remap_ops->msi_alloc_irq(pdev, irq, nvec); +} + +static int msi_setup_remapped_irq(struct pci_dev *pdev, unsigned int irq, + int index, int sub_handle) +{ + if (!remap_ops->msi_setup_irq) + return -ENODEV; + + return remap_ops->msi_setup_irq(pdev, irq, index, sub_handle); +} + +int setup_hpet_msi_remapped(unsigned int irq, unsigned int id) +{ + int ret; + + if (!remap_ops->alloc_hpet_msi) + return -ENODEV; + + ret = remap_ops->alloc_hpet_msi(irq, id); + if (ret) + return -EINVAL; + + return default_setup_hpet_msi(irq, id); +} + +void panic_if_irq_remap(const char *msg) +{ + if (irq_remapping_enabled) + panic(msg); +} + +static void ir_ack_apic_edge(struct irq_data *data) +{ + ack_APIC_irq(); +} + +static void ir_ack_apic_level(struct irq_data *data) +{ + ack_APIC_irq(); + eoi_ioapic_irq(data->irq, irqd_cfg(data)); +} + +static void ir_print_prefix(struct irq_data *data, struct seq_file *p) +{ + seq_printf(p, " IR-%s", data->chip->name); +} + +void irq_remap_modify_chip_defaults(struct irq_chip *chip) +{ + chip->irq_print_chip = ir_print_prefix; + chip->irq_ack = ir_ack_apic_edge; + chip->irq_eoi = ir_ack_apic_level; + chip->irq_set_affinity = x86_io_apic_ops.set_affinity; +} + +bool setup_remapped_irq(int irq, struct irq_cfg *cfg, struct irq_chip *chip) +{ + if (!irq_remapped(cfg)) + return false; + irq_set_status_flags(irq, IRQ_MOVE_PCNTXT); + irq_remap_modify_chip_defaults(chip); + return true; +} diff --git a/drivers/iommu/irq_remapping.h b/drivers/iommu/irq_remapping.h new file mode 100644 index 000000000..7c70cc29f --- /dev/null +++ b/drivers/iommu/irq_remapping.h @@ -0,0 +1,92 @@ +/* + * Copyright (C) 2012 Advanced Micro Devices, Inc. + * Author: Joerg Roedel <jroedel@suse.de> + * + * This program is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 as published + * by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + * + * This header file contains stuff that is shared between different interrupt + * remapping drivers but with no need to be visible outside of the IOMMU layer. + */ + +#ifndef __IRQ_REMAPPING_H +#define __IRQ_REMAPPING_H + +#ifdef CONFIG_IRQ_REMAP + +struct IO_APIC_route_entry; +struct io_apic_irq_attr; +struct irq_data; +struct cpumask; +struct pci_dev; +struct msi_msg; + +extern int irq_remap_broken; +extern int disable_sourceid_checking; +extern int no_x2apic_optout; +extern int irq_remapping_enabled; + +struct irq_remap_ops { + /* Initializes hardware and makes it ready for remapping interrupts */ + int (*prepare)(void); + + /* Enables the remapping hardware */ + int (*enable)(void); + + /* Disables the remapping hardware */ + void (*disable)(void); + + /* Reenables the remapping hardware */ + int (*reenable)(int); + + /* Enable fault handling */ + int (*enable_faulting)(void); + + /* IO-APIC setup routine */ + int (*setup_ioapic_entry)(int irq, struct IO_APIC_route_entry *, + unsigned int, int, + struct io_apic_irq_attr *); + + /* Set the CPU affinity of a remapped interrupt */ + int (*set_affinity)(struct irq_data *data, const struct cpumask *mask, + bool force); + + /* Free an IRQ */ + int (*free_irq)(int); + + /* Create MSI msg to use for interrupt remapping */ + void (*compose_msi_msg)(struct pci_dev *, + unsigned int, unsigned int, + struct msi_msg *, u8); + + /* Allocate remapping resources for MSI */ + int (*msi_alloc_irq)(struct pci_dev *, int, int); + + /* Setup the remapped MSI irq */ + int (*msi_setup_irq)(struct pci_dev *, unsigned int, int, int); + + /* Setup interrupt remapping for an HPET MSI */ + int (*alloc_hpet_msi)(unsigned int, unsigned int); +}; + +extern struct irq_remap_ops intel_irq_remap_ops; +extern struct irq_remap_ops amd_iommu_irq_ops; + +#else /* CONFIG_IRQ_REMAP */ + +#define irq_remapping_enabled 0 +#define irq_remap_broken 0 + +#endif /* CONFIG_IRQ_REMAP */ + +#endif /* __IRQ_REMAPPING_H */ diff --git a/drivers/iommu/msm_iommu.c b/drivers/iommu/msm_iommu.c new file mode 100644 index 000000000..15a206381 --- /dev/null +++ b/drivers/iommu/msm_iommu.c @@ -0,0 +1,735 @@ +/* Copyright (c) 2010-2011, Code Aurora Forum. All rights reserved. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 and + * only version 2 as published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA + * 02110-1301, USA. + */ + +#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt +#include <linux/kernel.h> +#include <linux/module.h> +#include <linux/platform_device.h> +#include <linux/errno.h> +#include <linux/io.h> +#include <linux/interrupt.h> +#include <linux/list.h> +#include <linux/spinlock.h> +#include <linux/slab.h> +#include <linux/iommu.h> +#include <linux/clk.h> + +#include <asm/cacheflush.h> +#include <asm/sizes.h> + +#include "msm_iommu_hw-8xxx.h" +#include "msm_iommu.h" + +#define MRC(reg, processor, op1, crn, crm, op2) \ +__asm__ __volatile__ ( \ +" mrc " #processor "," #op1 ", %0," #crn "," #crm "," #op2 "\n" \ +: "=r" (reg)) + +#define RCP15_PRRR(reg) MRC(reg, p15, 0, c10, c2, 0) +#define RCP15_NMRR(reg) MRC(reg, p15, 0, c10, c2, 1) + +/* bitmap of the page sizes currently supported */ +#define MSM_IOMMU_PGSIZES (SZ_4K | SZ_64K | SZ_1M | SZ_16M) + +static int msm_iommu_tex_class[4]; + +DEFINE_SPINLOCK(msm_iommu_lock); + +struct msm_priv { + unsigned long *pgtable; + struct list_head list_attached; + struct iommu_domain domain; +}; + +static struct msm_priv *to_msm_priv(struct iommu_domain *dom) +{ + return container_of(dom, struct msm_priv, domain); +} + +static int __enable_clocks(struct msm_iommu_drvdata *drvdata) +{ + int ret; + + ret = clk_enable(drvdata->pclk); + if (ret) + goto fail; + + if (drvdata->clk) { + ret = clk_enable(drvdata->clk); + if (ret) + clk_disable(drvdata->pclk); + } +fail: + return ret; +} + +static void __disable_clocks(struct msm_iommu_drvdata *drvdata) +{ + clk_disable(drvdata->clk); + clk_disable(drvdata->pclk); +} + +static int __flush_iotlb(struct iommu_domain *domain) +{ + struct msm_priv *priv = to_msm_priv(domain); + struct msm_iommu_drvdata *iommu_drvdata; + struct msm_iommu_ctx_drvdata *ctx_drvdata; + int ret = 0; +#ifndef CONFIG_IOMMU_PGTABLES_L2 + unsigned long *fl_table = priv->pgtable; + int i; + + if (!list_empty(&priv->list_attached)) { + dmac_flush_range(fl_table, fl_table + SZ_16K); + + for (i = 0; i < NUM_FL_PTE; i++) + if ((fl_table[i] & 0x03) == FL_TYPE_TABLE) { + void *sl_table = __va(fl_table[i] & + FL_BASE_MASK); + dmac_flush_range(sl_table, sl_table + SZ_4K); + } + } +#endif + + list_for_each_entry(ctx_drvdata, &priv->list_attached, attached_elm) { + if (!ctx_drvdata->pdev || !ctx_drvdata->pdev->dev.parent) + BUG(); + + iommu_drvdata = dev_get_drvdata(ctx_drvdata->pdev->dev.parent); + BUG_ON(!iommu_drvdata); + + ret = __enable_clocks(iommu_drvdata); + if (ret) + goto fail; + + SET_CTX_TLBIALL(iommu_drvdata->base, ctx_drvdata->num, 0); + __disable_clocks(iommu_drvdata); + } +fail: + return ret; +} + +static void __reset_context(void __iomem *base, int ctx) +{ + SET_BPRCOSH(base, ctx, 0); + SET_BPRCISH(base, ctx, 0); + SET_BPRCNSH(base, ctx, 0); + SET_BPSHCFG(base, ctx, 0); + SET_BPMTCFG(base, ctx, 0); + SET_ACTLR(base, ctx, 0); + SET_SCTLR(base, ctx, 0); + SET_FSRRESTORE(base, ctx, 0); + SET_TTBR0(base, ctx, 0); + SET_TTBR1(base, ctx, 0); + SET_TTBCR(base, ctx, 0); + SET_BFBCR(base, ctx, 0); + SET_PAR(base, ctx, 0); + SET_FAR(base, ctx, 0); + SET_CTX_TLBIALL(base, ctx, 0); + SET_TLBFLPTER(base, ctx, 0); + SET_TLBSLPTER(base, ctx, 0); + SET_TLBLKCR(base, ctx, 0); + SET_PRRR(base, ctx, 0); + SET_NMRR(base, ctx, 0); +} + +static void __program_context(void __iomem *base, int ctx, phys_addr_t pgtable) +{ + unsigned int prrr, nmrr; + __reset_context(base, ctx); + + /* Set up HTW mode */ + /* TLB miss configuration: perform HTW on miss */ + SET_TLBMCFG(base, ctx, 0x3); + + /* V2P configuration: HTW for access */ + SET_V2PCFG(base, ctx, 0x3); + + SET_TTBCR(base, ctx, 0); + SET_TTBR0_PA(base, ctx, (pgtable >> 14)); + + /* Invalidate the TLB for this context */ + SET_CTX_TLBIALL(base, ctx, 0); + + /* Set interrupt number to "secure" interrupt */ + SET_IRPTNDX(base, ctx, 0); + + /* Enable context fault interrupt */ + SET_CFEIE(base, ctx, 1); + + /* Stall access on a context fault and let the handler deal with it */ + SET_CFCFG(base, ctx, 1); + + /* Redirect all cacheable requests to L2 slave port. */ + SET_RCISH(base, ctx, 1); + SET_RCOSH(base, ctx, 1); + SET_RCNSH(base, ctx, 1); + + /* Turn on TEX Remap */ + SET_TRE(base, ctx, 1); + + /* Set TEX remap attributes */ + RCP15_PRRR(prrr); + RCP15_NMRR(nmrr); + SET_PRRR(base, ctx, prrr); + SET_NMRR(base, ctx, nmrr); + + /* Turn on BFB prefetch */ + SET_BFBDFE(base, ctx, 1); + +#ifdef CONFIG_IOMMU_PGTABLES_L2 + /* Configure page tables as inner-cacheable and shareable to reduce + * the TLB miss penalty. + */ + SET_TTBR0_SH(base, ctx, 1); + SET_TTBR1_SH(base, ctx, 1); + + SET_TTBR0_NOS(base, ctx, 1); + SET_TTBR1_NOS(base, ctx, 1); + + SET_TTBR0_IRGNH(base, ctx, 0); /* WB, WA */ + SET_TTBR0_IRGNL(base, ctx, 1); + + SET_TTBR1_IRGNH(base, ctx, 0); /* WB, WA */ + SET_TTBR1_IRGNL(base, ctx, 1); + + SET_TTBR0_ORGN(base, ctx, 1); /* WB, WA */ + SET_TTBR1_ORGN(base, ctx, 1); /* WB, WA */ +#endif + + /* Enable the MMU */ + SET_M(base, ctx, 1); +} + +static struct iommu_domain *msm_iommu_domain_alloc(unsigned type) +{ + struct msm_priv *priv; + + if (type != IOMMU_DOMAIN_UNMANAGED) + return NULL; + + priv = kzalloc(sizeof(*priv), GFP_KERNEL); + if (!priv) + goto fail_nomem; + + INIT_LIST_HEAD(&priv->list_attached); + priv->pgtable = (unsigned long *)__get_free_pages(GFP_KERNEL, + get_order(SZ_16K)); + + if (!priv->pgtable) + goto fail_nomem; + + memset(priv->pgtable, 0, SZ_16K); + + priv->domain.geometry.aperture_start = 0; + priv->domain.geometry.aperture_end = (1ULL << 32) - 1; + priv->domain.geometry.force_aperture = true; + + return &priv->domain; + +fail_nomem: + kfree(priv); + return NULL; +} + +static void msm_iommu_domain_free(struct iommu_domain *domain) +{ + struct msm_priv *priv; + unsigned long flags; + unsigned long *fl_table; + int i; + + spin_lock_irqsave(&msm_iommu_lock, flags); + priv = to_msm_priv(domain); + + fl_table = priv->pgtable; + + for (i = 0; i < NUM_FL_PTE; i++) + if ((fl_table[i] & 0x03) == FL_TYPE_TABLE) + free_page((unsigned long) __va(((fl_table[i]) & + FL_BASE_MASK))); + + free_pages((unsigned long)priv->pgtable, get_order(SZ_16K)); + priv->pgtable = NULL; + + kfree(priv); + spin_unlock_irqrestore(&msm_iommu_lock, flags); +} + +static int msm_iommu_attach_dev(struct iommu_domain *domain, struct device *dev) +{ + struct msm_priv *priv; + struct msm_iommu_ctx_dev *ctx_dev; + struct msm_iommu_drvdata *iommu_drvdata; + struct msm_iommu_ctx_drvdata *ctx_drvdata; + struct msm_iommu_ctx_drvdata *tmp_drvdata; + int ret = 0; + unsigned long flags; + + spin_lock_irqsave(&msm_iommu_lock, flags); + + priv = to_msm_priv(domain); + + if (!dev) { + ret = -EINVAL; + goto fail; + } + + iommu_drvdata = dev_get_drvdata(dev->parent); + ctx_drvdata = dev_get_drvdata(dev); + ctx_dev = dev->platform_data; + + if (!iommu_drvdata || !ctx_drvdata || !ctx_dev) { + ret = -EINVAL; + goto fail; + } + + if (!list_empty(&ctx_drvdata->attached_elm)) { + ret = -EBUSY; + goto fail; + } + + list_for_each_entry(tmp_drvdata, &priv->list_attached, attached_elm) + if (tmp_drvdata == ctx_drvdata) { + ret = -EBUSY; + goto fail; + } + + ret = __enable_clocks(iommu_drvdata); + if (ret) + goto fail; + + __program_context(iommu_drvdata->base, ctx_dev->num, + __pa(priv->pgtable)); + + __disable_clocks(iommu_drvdata); + list_add(&(ctx_drvdata->attached_elm), &priv->list_attached); + ret = __flush_iotlb(domain); + +fail: + spin_unlock_irqrestore(&msm_iommu_lock, flags); + return ret; +} + +static void msm_iommu_detach_dev(struct iommu_domain *domain, + struct device *dev) +{ + struct msm_priv *priv; + struct msm_iommu_ctx_dev *ctx_dev; + struct msm_iommu_drvdata *iommu_drvdata; + struct msm_iommu_ctx_drvdata *ctx_drvdata; + unsigned long flags; + int ret; + + spin_lock_irqsave(&msm_iommu_lock, flags); + priv = to_msm_priv(domain); + + if (!dev) + goto fail; + + iommu_drvdata = dev_get_drvdata(dev->parent); + ctx_drvdata = dev_get_drvdata(dev); + ctx_dev = dev->platform_data; + + if (!iommu_drvdata || !ctx_drvdata || !ctx_dev) + goto fail; + + ret = __flush_iotlb(domain); + if (ret) + goto fail; + + ret = __enable_clocks(iommu_drvdata); + if (ret) + goto fail; + + __reset_context(iommu_drvdata->base, ctx_dev->num); + __disable_clocks(iommu_drvdata); + list_del_init(&ctx_drvdata->attached_elm); + +fail: + spin_unlock_irqrestore(&msm_iommu_lock, flags); +} + +static int msm_iommu_map(struct iommu_domain *domain, unsigned long va, + phys_addr_t pa, size_t len, int prot) +{ + struct msm_priv *priv; + unsigned long flags; + unsigned long *fl_table; + unsigned long *fl_pte; + unsigned long fl_offset; + unsigned long *sl_table; + unsigned long *sl_pte; + unsigned long sl_offset; + unsigned int pgprot; + int ret = 0, tex, sh; + + spin_lock_irqsave(&msm_iommu_lock, flags); + + sh = (prot & MSM_IOMMU_ATTR_SH) ? 1 : 0; + tex = msm_iommu_tex_class[prot & MSM_IOMMU_CP_MASK]; + + if (tex < 0 || tex > NUM_TEX_CLASS - 1) { + ret = -EINVAL; + goto fail; + } + + priv = to_msm_priv(domain); + + fl_table = priv->pgtable; + + if (len != SZ_16M && len != SZ_1M && + len != SZ_64K && len != SZ_4K) { + pr_debug("Bad size: %d\n", len); + ret = -EINVAL; + goto fail; + } + + if (!fl_table) { + pr_debug("Null page table\n"); + ret = -EINVAL; + goto fail; + } + + if (len == SZ_16M || len == SZ_1M) { + pgprot = sh ? FL_SHARED : 0; + pgprot |= tex & 0x01 ? FL_BUFFERABLE : 0; + pgprot |= tex & 0x02 ? FL_CACHEABLE : 0; + pgprot |= tex & 0x04 ? FL_TEX0 : 0; + } else { + pgprot = sh ? SL_SHARED : 0; + pgprot |= tex & 0x01 ? SL_BUFFERABLE : 0; + pgprot |= tex & 0x02 ? SL_CACHEABLE : 0; + pgprot |= tex & 0x04 ? SL_TEX0 : 0; + } + + fl_offset = FL_OFFSET(va); /* Upper 12 bits */ + fl_pte = fl_table + fl_offset; /* int pointers, 4 bytes */ + + if (len == SZ_16M) { + int i = 0; + for (i = 0; i < 16; i++) + *(fl_pte+i) = (pa & 0xFF000000) | FL_SUPERSECTION | + FL_AP_READ | FL_AP_WRITE | FL_TYPE_SECT | + FL_SHARED | FL_NG | pgprot; + } + + if (len == SZ_1M) + *fl_pte = (pa & 0xFFF00000) | FL_AP_READ | FL_AP_WRITE | FL_NG | + FL_TYPE_SECT | FL_SHARED | pgprot; + + /* Need a 2nd level table */ + if ((len == SZ_4K || len == SZ_64K) && (*fl_pte) == 0) { + unsigned long *sl; + sl = (unsigned long *) __get_free_pages(GFP_ATOMIC, + get_order(SZ_4K)); + + if (!sl) { + pr_debug("Could not allocate second level table\n"); + ret = -ENOMEM; + goto fail; + } + + memset(sl, 0, SZ_4K); + *fl_pte = ((((int)__pa(sl)) & FL_BASE_MASK) | FL_TYPE_TABLE); + } + + sl_table = (unsigned long *) __va(((*fl_pte) & FL_BASE_MASK)); + sl_offset = SL_OFFSET(va); + sl_pte = sl_table + sl_offset; + + + if (len == SZ_4K) + *sl_pte = (pa & SL_BASE_MASK_SMALL) | SL_AP0 | SL_AP1 | SL_NG | + SL_SHARED | SL_TYPE_SMALL | pgprot; + + if (len == SZ_64K) { + int i; + + for (i = 0; i < 16; i++) + *(sl_pte+i) = (pa & SL_BASE_MASK_LARGE) | SL_AP0 | + SL_NG | SL_AP1 | SL_SHARED | SL_TYPE_LARGE | pgprot; + } + + ret = __flush_iotlb(domain); +fail: + spin_unlock_irqrestore(&msm_iommu_lock, flags); + return ret; +} + +static size_t msm_iommu_unmap(struct iommu_domain *domain, unsigned long va, + size_t len) +{ + struct msm_priv *priv; + unsigned long flags; + unsigned long *fl_table; + unsigned long *fl_pte; + unsigned long fl_offset; + unsigned long *sl_table; + unsigned long *sl_pte; + unsigned long sl_offset; + int i, ret = 0; + + spin_lock_irqsave(&msm_iommu_lock, flags); + + priv = to_msm_priv(domain); + + fl_table = priv->pgtable; + + if (len != SZ_16M && len != SZ_1M && + len != SZ_64K && len != SZ_4K) { + pr_debug("Bad length: %d\n", len); + goto fail; + } + + if (!fl_table) { + pr_debug("Null page table\n"); + goto fail; + } + + fl_offset = FL_OFFSET(va); /* Upper 12 bits */ + fl_pte = fl_table + fl_offset; /* int pointers, 4 bytes */ + + if (*fl_pte == 0) { + pr_debug("First level PTE is 0\n"); + goto fail; + } + + /* Unmap supersection */ + if (len == SZ_16M) + for (i = 0; i < 16; i++) + *(fl_pte+i) = 0; + + if (len == SZ_1M) + *fl_pte = 0; + + sl_table = (unsigned long *) __va(((*fl_pte) & FL_BASE_MASK)); + sl_offset = SL_OFFSET(va); + sl_pte = sl_table + sl_offset; + + if (len == SZ_64K) { + for (i = 0; i < 16; i++) + *(sl_pte+i) = 0; + } + + if (len == SZ_4K) + *sl_pte = 0; + + if (len == SZ_4K || len == SZ_64K) { + int used = 0; + + for (i = 0; i < NUM_SL_PTE; i++) + if (sl_table[i]) + used = 1; + if (!used) { + free_page((unsigned long)sl_table); + *fl_pte = 0; + } + } + + ret = __flush_iotlb(domain); + +fail: + spin_unlock_irqrestore(&msm_iommu_lock, flags); + + /* the IOMMU API requires us to return how many bytes were unmapped */ + len = ret ? 0 : len; + return len; +} + +static phys_addr_t msm_iommu_iova_to_phys(struct iommu_domain *domain, + dma_addr_t va) +{ + struct msm_priv *priv; + struct msm_iommu_drvdata *iommu_drvdata; + struct msm_iommu_ctx_drvdata *ctx_drvdata; + unsigned int par; + unsigned long flags; + void __iomem *base; + phys_addr_t ret = 0; + int ctx; + + spin_lock_irqsave(&msm_iommu_lock, flags); + + priv = to_msm_priv(domain); + if (list_empty(&priv->list_attached)) + goto fail; + + ctx_drvdata = list_entry(priv->list_attached.next, + struct msm_iommu_ctx_drvdata, attached_elm); + iommu_drvdata = dev_get_drvdata(ctx_drvdata->pdev->dev.parent); + + base = iommu_drvdata->base; + ctx = ctx_drvdata->num; + + ret = __enable_clocks(iommu_drvdata); + if (ret) + goto fail; + + /* Invalidate context TLB */ + SET_CTX_TLBIALL(base, ctx, 0); + SET_V2PPR(base, ctx, va & V2Pxx_VA); + + par = GET_PAR(base, ctx); + + /* We are dealing with a supersection */ + if (GET_NOFAULT_SS(base, ctx)) + ret = (par & 0xFF000000) | (va & 0x00FFFFFF); + else /* Upper 20 bits from PAR, lower 12 from VA */ + ret = (par & 0xFFFFF000) | (va & 0x00000FFF); + + if (GET_FAULT(base, ctx)) + ret = 0; + + __disable_clocks(iommu_drvdata); +fail: + spin_unlock_irqrestore(&msm_iommu_lock, flags); + return ret; +} + +static bool msm_iommu_capable(enum iommu_cap cap) +{ + return false; +} + +static void print_ctx_regs(void __iomem *base, int ctx) +{ + unsigned int fsr = GET_FSR(base, ctx); + pr_err("FAR = %08x PAR = %08x\n", + GET_FAR(base, ctx), GET_PAR(base, ctx)); + pr_err("FSR = %08x [%s%s%s%s%s%s%s%s%s%s]\n", fsr, + (fsr & 0x02) ? "TF " : "", + (fsr & 0x04) ? "AFF " : "", + (fsr & 0x08) ? "APF " : "", + (fsr & 0x10) ? "TLBMF " : "", + (fsr & 0x20) ? "HTWDEEF " : "", + (fsr & 0x40) ? "HTWSEEF " : "", + (fsr & 0x80) ? "MHF " : "", + (fsr & 0x10000) ? "SL " : "", + (fsr & 0x40000000) ? "SS " : "", + (fsr & 0x80000000) ? "MULTI " : ""); + + pr_err("FSYNR0 = %08x FSYNR1 = %08x\n", + GET_FSYNR0(base, ctx), GET_FSYNR1(base, ctx)); + pr_err("TTBR0 = %08x TTBR1 = %08x\n", + GET_TTBR0(base, ctx), GET_TTBR1(base, ctx)); + pr_err("SCTLR = %08x ACTLR = %08x\n", + GET_SCTLR(base, ctx), GET_ACTLR(base, ctx)); + pr_err("PRRR = %08x NMRR = %08x\n", + GET_PRRR(base, ctx), GET_NMRR(base, ctx)); +} + +irqreturn_t msm_iommu_fault_handler(int irq, void *dev_id) +{ + struct msm_iommu_drvdata *drvdata = dev_id; + void __iomem *base; + unsigned int fsr; + int i, ret; + + spin_lock(&msm_iommu_lock); + + if (!drvdata) { + pr_err("Invalid device ID in context interrupt handler\n"); + goto fail; + } + + base = drvdata->base; + + pr_err("Unexpected IOMMU page fault!\n"); + pr_err("base = %08x\n", (unsigned int) base); + + ret = __enable_clocks(drvdata); + if (ret) + goto fail; + + for (i = 0; i < drvdata->ncb; i++) { + fsr = GET_FSR(base, i); + if (fsr) { + pr_err("Fault occurred in context %d.\n", i); + pr_err("Interesting registers:\n"); + print_ctx_regs(base, i); + SET_FSR(base, i, 0x4000000F); + } + } + __disable_clocks(drvdata); +fail: + spin_unlock(&msm_iommu_lock); + return 0; +} + +static const struct iommu_ops msm_iommu_ops = { + .capable = msm_iommu_capable, + .domain_alloc = msm_iommu_domain_alloc, + .domain_free = msm_iommu_domain_free, + .attach_dev = msm_iommu_attach_dev, + .detach_dev = msm_iommu_detach_dev, + .map = msm_iommu_map, + .unmap = msm_iommu_unmap, + .map_sg = default_iommu_map_sg, + .iova_to_phys = msm_iommu_iova_to_phys, + .pgsize_bitmap = MSM_IOMMU_PGSIZES, +}; + +static int __init get_tex_class(int icp, int ocp, int mt, int nos) +{ + int i = 0; + unsigned int prrr = 0; + unsigned int nmrr = 0; + int c_icp, c_ocp, c_mt, c_nos; + + RCP15_PRRR(prrr); + RCP15_NMRR(nmrr); + + for (i = 0; i < NUM_TEX_CLASS; i++) { + c_nos = PRRR_NOS(prrr, i); + c_mt = PRRR_MT(prrr, i); + c_icp = NMRR_ICP(nmrr, i); + c_ocp = NMRR_OCP(nmrr, i); + + if (icp == c_icp && ocp == c_ocp && c_mt == mt && c_nos == nos) + return i; + } + + return -ENODEV; +} + +static void __init setup_iommu_tex_classes(void) +{ + msm_iommu_tex_class[MSM_IOMMU_ATTR_NONCACHED] = + get_tex_class(CP_NONCACHED, CP_NONCACHED, MT_NORMAL, 1); + + msm_iommu_tex_class[MSM_IOMMU_ATTR_CACHED_WB_WA] = + get_tex_class(CP_WB_WA, CP_WB_WA, MT_NORMAL, 1); + + msm_iommu_tex_class[MSM_IOMMU_ATTR_CACHED_WB_NWA] = + get_tex_class(CP_WB_NWA, CP_WB_NWA, MT_NORMAL, 1); + + msm_iommu_tex_class[MSM_IOMMU_ATTR_CACHED_WT] = + get_tex_class(CP_WT, CP_WT, MT_NORMAL, 1); +} + +static int __init msm_iommu_init(void) +{ + setup_iommu_tex_classes(); + bus_set_iommu(&platform_bus_type, &msm_iommu_ops); + return 0; +} + +subsys_initcall(msm_iommu_init); + +MODULE_LICENSE("GPL v2"); +MODULE_AUTHOR("Stepan Moskovchenko <stepanm@codeaurora.org>"); diff --git a/drivers/iommu/msm_iommu.h b/drivers/iommu/msm_iommu.h new file mode 100644 index 000000000..5c7c955e6 --- /dev/null +++ b/drivers/iommu/msm_iommu.h @@ -0,0 +1,120 @@ +/* Copyright (c) 2010-2011, Code Aurora Forum. All rights reserved. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 and + * only version 2 as published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA + * 02110-1301, USA. + */ + +#ifndef MSM_IOMMU_H +#define MSM_IOMMU_H + +#include <linux/interrupt.h> +#include <linux/clk.h> + +/* Sharability attributes of MSM IOMMU mappings */ +#define MSM_IOMMU_ATTR_NON_SH 0x0 +#define MSM_IOMMU_ATTR_SH 0x4 + +/* Cacheability attributes of MSM IOMMU mappings */ +#define MSM_IOMMU_ATTR_NONCACHED 0x0 +#define MSM_IOMMU_ATTR_CACHED_WB_WA 0x1 +#define MSM_IOMMU_ATTR_CACHED_WB_NWA 0x2 +#define MSM_IOMMU_ATTR_CACHED_WT 0x3 + +/* Mask for the cache policy attribute */ +#define MSM_IOMMU_CP_MASK 0x03 + +/* Maximum number of Machine IDs that we are allowing to be mapped to the same + * context bank. The number of MIDs mapped to the same CB does not affect + * performance, but there is a practical limit on how many distinct MIDs may + * be present. These mappings are typically determined at design time and are + * not expected to change at run time. + */ +#define MAX_NUM_MIDS 32 + +/** + * struct msm_iommu_dev - a single IOMMU hardware instance + * name Human-readable name given to this IOMMU HW instance + * ncb Number of context banks present on this IOMMU HW instance + */ +struct msm_iommu_dev { + const char *name; + int ncb; +}; + +/** + * struct msm_iommu_ctx_dev - an IOMMU context bank instance + * name Human-readable name given to this context bank + * num Index of this context bank within the hardware + * mids List of Machine IDs that are to be mapped into this context + * bank, terminated by -1. The MID is a set of signals on the + * AXI bus that identifies the function associated with a specific + * memory request. (See ARM spec). + */ +struct msm_iommu_ctx_dev { + const char *name; + int num; + int mids[MAX_NUM_MIDS]; +}; + + +/** + * struct msm_iommu_drvdata - A single IOMMU hardware instance + * @base: IOMMU config port base address (VA) + * @ncb The number of contexts on this IOMMU + * @irq: Interrupt number + * @clk: The bus clock for this IOMMU hardware instance + * @pclk: The clock for the IOMMU bus interconnect + * + * A msm_iommu_drvdata holds the global driver data about a single piece + * of an IOMMU hardware instance. + */ +struct msm_iommu_drvdata { + void __iomem *base; + int irq; + int ncb; + struct clk *clk; + struct clk *pclk; +}; + +/** + * struct msm_iommu_ctx_drvdata - an IOMMU context bank instance + * @num: Hardware context number of this context + * @pdev: Platform device associated wit this HW instance + * @attached_elm: List element for domains to track which devices are + * attached to them + * + * A msm_iommu_ctx_drvdata holds the driver data for a single context bank + * within each IOMMU hardware instance + */ +struct msm_iommu_ctx_drvdata { + int num; + struct platform_device *pdev; + struct list_head attached_elm; +}; + +/* + * Look up an IOMMU context device by its context name. NULL if none found. + * Useful for testing and drivers that do not yet fully have IOMMU stuff in + * their platform devices. + */ +struct device *msm_iommu_get_ctx(const char *ctx_name); + +/* + * Interrupt handler for the IOMMU context fault interrupt. Hooking the + * interrupt is not supported in the API yet, but this will print an error + * message and dump useful IOMMU registers. + */ +irqreturn_t msm_iommu_fault_handler(int irq, void *dev_id); + +#endif diff --git a/drivers/iommu/msm_iommu_dev.c b/drivers/iommu/msm_iommu_dev.c new file mode 100644 index 000000000..b6d01f97e --- /dev/null +++ b/drivers/iommu/msm_iommu_dev.c @@ -0,0 +1,392 @@ +/* Copyright (c) 2010-2011, Code Aurora Forum. All rights reserved. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 and + * only version 2 as published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA + * 02110-1301, USA. + */ + +#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt + +#include <linux/kernel.h> +#include <linux/module.h> +#include <linux/platform_device.h> +#include <linux/io.h> +#include <linux/clk.h> +#include <linux/iommu.h> +#include <linux/interrupt.h> +#include <linux/err.h> +#include <linux/slab.h> + +#include "msm_iommu_hw-8xxx.h" +#include "msm_iommu.h" + +struct iommu_ctx_iter_data { + /* input */ + const char *name; + + /* output */ + struct device *dev; +}; + +static struct platform_device *msm_iommu_root_dev; + +static int each_iommu_ctx(struct device *dev, void *data) +{ + struct iommu_ctx_iter_data *res = data; + struct msm_iommu_ctx_dev *c = dev->platform_data; + + if (!res || !c || !c->name || !res->name) + return -EINVAL; + + if (!strcmp(res->name, c->name)) { + res->dev = dev; + return 1; + } + return 0; +} + +static int each_iommu(struct device *dev, void *data) +{ + return device_for_each_child(dev, data, each_iommu_ctx); +} + +struct device *msm_iommu_get_ctx(const char *ctx_name) +{ + struct iommu_ctx_iter_data r; + int found; + + if (!msm_iommu_root_dev) { + pr_err("No root IOMMU device.\n"); + goto fail; + } + + r.name = ctx_name; + found = device_for_each_child(&msm_iommu_root_dev->dev, &r, each_iommu); + + if (!found) { + pr_err("Could not find context <%s>\n", ctx_name); + goto fail; + } + + return r.dev; +fail: + return NULL; +} +EXPORT_SYMBOL(msm_iommu_get_ctx); + +static void msm_iommu_reset(void __iomem *base, int ncb) +{ + int ctx; + + SET_RPUE(base, 0); + SET_RPUEIE(base, 0); + SET_ESRRESTORE(base, 0); + SET_TBE(base, 0); + SET_CR(base, 0); + SET_SPDMBE(base, 0); + SET_TESTBUSCR(base, 0); + SET_TLBRSW(base, 0); + SET_GLOBAL_TLBIALL(base, 0); + SET_RPU_ACR(base, 0); + SET_TLBLKCRWE(base, 1); + + for (ctx = 0; ctx < ncb; ctx++) { + SET_BPRCOSH(base, ctx, 0); + SET_BPRCISH(base, ctx, 0); + SET_BPRCNSH(base, ctx, 0); + SET_BPSHCFG(base, ctx, 0); + SET_BPMTCFG(base, ctx, 0); + SET_ACTLR(base, ctx, 0); + SET_SCTLR(base, ctx, 0); + SET_FSRRESTORE(base, ctx, 0); + SET_TTBR0(base, ctx, 0); + SET_TTBR1(base, ctx, 0); + SET_TTBCR(base, ctx, 0); + SET_BFBCR(base, ctx, 0); + SET_PAR(base, ctx, 0); + SET_FAR(base, ctx, 0); + SET_CTX_TLBIALL(base, ctx, 0); + SET_TLBFLPTER(base, ctx, 0); + SET_TLBSLPTER(base, ctx, 0); + SET_TLBLKCR(base, ctx, 0); + SET_PRRR(base, ctx, 0); + SET_NMRR(base, ctx, 0); + SET_CONTEXTIDR(base, ctx, 0); + } +} + +static int msm_iommu_probe(struct platform_device *pdev) +{ + struct resource *r; + struct clk *iommu_clk; + struct clk *iommu_pclk; + struct msm_iommu_drvdata *drvdata; + struct msm_iommu_dev *iommu_dev = dev_get_platdata(&pdev->dev); + void __iomem *regs_base; + int ret, irq, par; + + if (pdev->id == -1) { + msm_iommu_root_dev = pdev; + return 0; + } + + drvdata = kzalloc(sizeof(*drvdata), GFP_KERNEL); + + if (!drvdata) { + ret = -ENOMEM; + goto fail; + } + + if (!iommu_dev) { + ret = -ENODEV; + goto fail; + } + + iommu_pclk = clk_get(NULL, "smmu_pclk"); + if (IS_ERR(iommu_pclk)) { + ret = -ENODEV; + goto fail; + } + + ret = clk_prepare_enable(iommu_pclk); + if (ret) + goto fail_enable; + + iommu_clk = clk_get(&pdev->dev, "iommu_clk"); + + if (!IS_ERR(iommu_clk)) { + if (clk_get_rate(iommu_clk) == 0) + clk_set_rate(iommu_clk, 1); + + ret = clk_prepare_enable(iommu_clk); + if (ret) { + clk_put(iommu_clk); + goto fail_pclk; + } + } else + iommu_clk = NULL; + + r = platform_get_resource_byname(pdev, IORESOURCE_MEM, "physbase"); + regs_base = devm_ioremap_resource(&pdev->dev, r); + if (IS_ERR(regs_base)) { + ret = PTR_ERR(regs_base); + goto fail_clk; + } + + irq = platform_get_irq_byname(pdev, "secure_irq"); + if (irq < 0) { + ret = -ENODEV; + goto fail_clk; + } + + msm_iommu_reset(regs_base, iommu_dev->ncb); + + SET_M(regs_base, 0, 1); + SET_PAR(regs_base, 0, 0); + SET_V2PCFG(regs_base, 0, 1); + SET_V2PPR(regs_base, 0, 0); + par = GET_PAR(regs_base, 0); + SET_V2PCFG(regs_base, 0, 0); + SET_M(regs_base, 0, 0); + + if (!par) { + pr_err("%s: Invalid PAR value detected\n", iommu_dev->name); + ret = -ENODEV; + goto fail_clk; + } + + ret = request_irq(irq, msm_iommu_fault_handler, 0, + "msm_iommu_secure_irpt_handler", drvdata); + if (ret) { + pr_err("Request IRQ %d failed with ret=%d\n", irq, ret); + goto fail_clk; + } + + + drvdata->pclk = iommu_pclk; + drvdata->clk = iommu_clk; + drvdata->base = regs_base; + drvdata->irq = irq; + drvdata->ncb = iommu_dev->ncb; + + pr_info("device %s mapped at %p, irq %d with %d ctx banks\n", + iommu_dev->name, regs_base, irq, iommu_dev->ncb); + + platform_set_drvdata(pdev, drvdata); + + clk_disable(iommu_clk); + + clk_disable(iommu_pclk); + + return 0; +fail_clk: + if (iommu_clk) { + clk_disable(iommu_clk); + clk_put(iommu_clk); + } +fail_pclk: + clk_disable_unprepare(iommu_pclk); +fail_enable: + clk_put(iommu_pclk); +fail: + kfree(drvdata); + return ret; +} + +static int msm_iommu_remove(struct platform_device *pdev) +{ + struct msm_iommu_drvdata *drv = NULL; + + drv = platform_get_drvdata(pdev); + if (drv) { + if (drv->clk) { + clk_unprepare(drv->clk); + clk_put(drv->clk); + } + clk_unprepare(drv->pclk); + clk_put(drv->pclk); + memset(drv, 0, sizeof(*drv)); + kfree(drv); + } + return 0; +} + +static int msm_iommu_ctx_probe(struct platform_device *pdev) +{ + struct msm_iommu_ctx_dev *c = dev_get_platdata(&pdev->dev); + struct msm_iommu_drvdata *drvdata; + struct msm_iommu_ctx_drvdata *ctx_drvdata; + int i, ret; + + if (!c || !pdev->dev.parent) + return -EINVAL; + + drvdata = dev_get_drvdata(pdev->dev.parent); + if (!drvdata) + return -ENODEV; + + ctx_drvdata = kzalloc(sizeof(*ctx_drvdata), GFP_KERNEL); + if (!ctx_drvdata) + return -ENOMEM; + + ctx_drvdata->num = c->num; + ctx_drvdata->pdev = pdev; + + INIT_LIST_HEAD(&ctx_drvdata->attached_elm); + platform_set_drvdata(pdev, ctx_drvdata); + + ret = clk_prepare_enable(drvdata->pclk); + if (ret) + goto fail; + + if (drvdata->clk) { + ret = clk_prepare_enable(drvdata->clk); + if (ret) { + clk_disable_unprepare(drvdata->pclk); + goto fail; + } + } + + /* Program the M2V tables for this context */ + for (i = 0; i < MAX_NUM_MIDS; i++) { + int mid = c->mids[i]; + if (mid == -1) + break; + + SET_M2VCBR_N(drvdata->base, mid, 0); + SET_CBACR_N(drvdata->base, c->num, 0); + + /* Set VMID = 0 */ + SET_VMID(drvdata->base, mid, 0); + + /* Set the context number for that MID to this context */ + SET_CBNDX(drvdata->base, mid, c->num); + + /* Set MID associated with this context bank to 0*/ + SET_CBVMID(drvdata->base, c->num, 0); + + /* Set the ASID for TLB tagging for this context */ + SET_CONTEXTIDR_ASID(drvdata->base, c->num, c->num); + + /* Set security bit override to be Non-secure */ + SET_NSCFG(drvdata->base, mid, 3); + } + + clk_disable(drvdata->clk); + clk_disable(drvdata->pclk); + + dev_info(&pdev->dev, "context %s using bank %d\n", c->name, c->num); + return 0; +fail: + kfree(ctx_drvdata); + return ret; +} + +static int msm_iommu_ctx_remove(struct platform_device *pdev) +{ + struct msm_iommu_ctx_drvdata *drv = NULL; + drv = platform_get_drvdata(pdev); + if (drv) { + memset(drv, 0, sizeof(struct msm_iommu_ctx_drvdata)); + kfree(drv); + } + return 0; +} + +static struct platform_driver msm_iommu_driver = { + .driver = { + .name = "msm_iommu", + }, + .probe = msm_iommu_probe, + .remove = msm_iommu_remove, +}; + +static struct platform_driver msm_iommu_ctx_driver = { + .driver = { + .name = "msm_iommu_ctx", + }, + .probe = msm_iommu_ctx_probe, + .remove = msm_iommu_ctx_remove, +}; + +static int __init msm_iommu_driver_init(void) +{ + int ret; + ret = platform_driver_register(&msm_iommu_driver); + if (ret != 0) { + pr_err("Failed to register IOMMU driver\n"); + goto error; + } + + ret = platform_driver_register(&msm_iommu_ctx_driver); + if (ret != 0) { + platform_driver_unregister(&msm_iommu_driver); + pr_err("Failed to register IOMMU context driver\n"); + goto error; + } + +error: + return ret; +} + +static void __exit msm_iommu_driver_exit(void) +{ + platform_driver_unregister(&msm_iommu_ctx_driver); + platform_driver_unregister(&msm_iommu_driver); +} + +subsys_initcall(msm_iommu_driver_init); +module_exit(msm_iommu_driver_exit); + +MODULE_LICENSE("GPL v2"); +MODULE_AUTHOR("Stepan Moskovchenko <stepanm@codeaurora.org>"); diff --git a/drivers/iommu/msm_iommu_hw-8xxx.h b/drivers/iommu/msm_iommu_hw-8xxx.h new file mode 100644 index 000000000..fc160101d --- /dev/null +++ b/drivers/iommu/msm_iommu_hw-8xxx.h @@ -0,0 +1,1865 @@ +/* Copyright (c) 2010-2011, Code Aurora Forum. All rights reserved. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 and + * only version 2 as published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA + * 02110-1301, USA. + */ + +#ifndef __ARCH_ARM_MACH_MSM_IOMMU_HW_8XXX_H +#define __ARCH_ARM_MACH_MSM_IOMMU_HW_8XXX_H + +#define CTX_SHIFT 12 + +#define GET_GLOBAL_REG(reg, base) (readl((base) + (reg))) +#define GET_CTX_REG(reg, base, ctx) \ + (readl((base) + (reg) + ((ctx) << CTX_SHIFT))) + +#define SET_GLOBAL_REG(reg, base, val) writel((val), ((base) + (reg))) + +#define SET_CTX_REG(reg, base, ctx, val) \ + writel((val), ((base) + (reg) + ((ctx) << CTX_SHIFT))) + +/* Wrappers for numbered registers */ +#define SET_GLOBAL_REG_N(b, n, r, v) SET_GLOBAL_REG(b, ((r) + (n << 2)), (v)) +#define GET_GLOBAL_REG_N(b, n, r) GET_GLOBAL_REG(b, ((r) + (n << 2))) + +/* Field wrappers */ +#define GET_GLOBAL_FIELD(b, r, F) GET_FIELD(((b) + (r)), F##_MASK, F##_SHIFT) +#define GET_CONTEXT_FIELD(b, c, r, F) \ + GET_FIELD(((b) + (r) + ((c) << CTX_SHIFT)), F##_MASK, F##_SHIFT) + +#define SET_GLOBAL_FIELD(b, r, F, v) \ + SET_FIELD(((b) + (r)), F##_MASK, F##_SHIFT, (v)) +#define SET_CONTEXT_FIELD(b, c, r, F, v) \ + SET_FIELD(((b) + (r) + ((c) << CTX_SHIFT)), F##_MASK, F##_SHIFT, (v)) + +#define GET_FIELD(addr, mask, shift) ((readl(addr) >> (shift)) & (mask)) + +#define SET_FIELD(addr, mask, shift, v) \ +do { \ + int t = readl(addr); \ + writel((t & ~((mask) << (shift))) + (((v) & (mask)) << (shift)), addr);\ +} while (0) + + +#define NUM_FL_PTE 4096 +#define NUM_SL_PTE 256 +#define NUM_TEX_CLASS 8 + +/* First-level page table bits */ +#define FL_BASE_MASK 0xFFFFFC00 +#define FL_TYPE_TABLE (1 << 0) +#define FL_TYPE_SECT (2 << 0) +#define FL_SUPERSECTION (1 << 18) +#define FL_AP_WRITE (1 << 10) +#define FL_AP_READ (1 << 11) +#define FL_SHARED (1 << 16) +#define FL_BUFFERABLE (1 << 2) +#define FL_CACHEABLE (1 << 3) +#define FL_TEX0 (1 << 12) +#define FL_OFFSET(va) (((va) & 0xFFF00000) >> 20) +#define FL_NG (1 << 17) + +/* Second-level page table bits */ +#define SL_BASE_MASK_LARGE 0xFFFF0000 +#define SL_BASE_MASK_SMALL 0xFFFFF000 +#define SL_TYPE_LARGE (1 << 0) +#define SL_TYPE_SMALL (2 << 0) +#define SL_AP0 (1 << 4) +#define SL_AP1 (2 << 4) +#define SL_SHARED (1 << 10) +#define SL_BUFFERABLE (1 << 2) +#define SL_CACHEABLE (1 << 3) +#define SL_TEX0 (1 << 6) +#define SL_OFFSET(va) (((va) & 0xFF000) >> 12) +#define SL_NG (1 << 11) + +/* Memory type and cache policy attributes */ +#define MT_SO 0 +#define MT_DEV 1 +#define MT_NORMAL 2 +#define CP_NONCACHED 0 +#define CP_WB_WA 1 +#define CP_WT 2 +#define CP_WB_NWA 3 + +/* Global register setters / getters */ +#define SET_M2VCBR_N(b, N, v) SET_GLOBAL_REG_N(M2VCBR_N, N, (b), (v)) +#define SET_CBACR_N(b, N, v) SET_GLOBAL_REG_N(CBACR_N, N, (b), (v)) +#define SET_TLBRSW(b, v) SET_GLOBAL_REG(TLBRSW, (b), (v)) +#define SET_TLBTR0(b, v) SET_GLOBAL_REG(TLBTR0, (b), (v)) +#define SET_TLBTR1(b, v) SET_GLOBAL_REG(TLBTR1, (b), (v)) +#define SET_TLBTR2(b, v) SET_GLOBAL_REG(TLBTR2, (b), (v)) +#define SET_TESTBUSCR(b, v) SET_GLOBAL_REG(TESTBUSCR, (b), (v)) +#define SET_GLOBAL_TLBIALL(b, v) SET_GLOBAL_REG(GLOBAL_TLBIALL, (b), (v)) +#define SET_TLBIVMID(b, v) SET_GLOBAL_REG(TLBIVMID, (b), (v)) +#define SET_CR(b, v) SET_GLOBAL_REG(CR, (b), (v)) +#define SET_EAR(b, v) SET_GLOBAL_REG(EAR, (b), (v)) +#define SET_ESR(b, v) SET_GLOBAL_REG(ESR, (b), (v)) +#define SET_ESRRESTORE(b, v) SET_GLOBAL_REG(ESRRESTORE, (b), (v)) +#define SET_ESYNR0(b, v) SET_GLOBAL_REG(ESYNR0, (b), (v)) +#define SET_ESYNR1(b, v) SET_GLOBAL_REG(ESYNR1, (b), (v)) +#define SET_RPU_ACR(b, v) SET_GLOBAL_REG(RPU_ACR, (b), (v)) + +#define GET_M2VCBR_N(b, N) GET_GLOBAL_REG_N(M2VCBR_N, N, (b)) +#define GET_CBACR_N(b, N) GET_GLOBAL_REG_N(CBACR_N, N, (b)) +#define GET_TLBTR0(b) GET_GLOBAL_REG(TLBTR0, (b)) +#define GET_TLBTR1(b) GET_GLOBAL_REG(TLBTR1, (b)) +#define GET_TLBTR2(b) GET_GLOBAL_REG(TLBTR2, (b)) +#define GET_TESTBUSCR(b) GET_GLOBAL_REG(TESTBUSCR, (b)) +#define GET_GLOBAL_TLBIALL(b) GET_GLOBAL_REG(GLOBAL_TLBIALL, (b)) +#define GET_TLBIVMID(b) GET_GLOBAL_REG(TLBIVMID, (b)) +#define GET_CR(b) GET_GLOBAL_REG(CR, (b)) +#define GET_EAR(b) GET_GLOBAL_REG(EAR, (b)) +#define GET_ESR(b) GET_GLOBAL_REG(ESR, (b)) +#define GET_ESRRESTORE(b) GET_GLOBAL_REG(ESRRESTORE, (b)) +#define GET_ESYNR0(b) GET_GLOBAL_REG(ESYNR0, (b)) +#define GET_ESYNR1(b) GET_GLOBAL_REG(ESYNR1, (b)) +#define GET_REV(b) GET_GLOBAL_REG(REV, (b)) +#define GET_IDR(b) GET_GLOBAL_REG(IDR, (b)) +#define GET_RPU_ACR(b) GET_GLOBAL_REG(RPU_ACR, (b)) + + +/* Context register setters/getters */ +#define SET_SCTLR(b, c, v) SET_CTX_REG(SCTLR, (b), (c), (v)) +#define SET_ACTLR(b, c, v) SET_CTX_REG(ACTLR, (b), (c), (v)) +#define SET_CONTEXTIDR(b, c, v) SET_CTX_REG(CONTEXTIDR, (b), (c), (v)) +#define SET_TTBR0(b, c, v) SET_CTX_REG(TTBR0, (b), (c), (v)) +#define SET_TTBR1(b, c, v) SET_CTX_REG(TTBR1, (b), (c), (v)) +#define SET_TTBCR(b, c, v) SET_CTX_REG(TTBCR, (b), (c), (v)) +#define SET_PAR(b, c, v) SET_CTX_REG(PAR, (b), (c), (v)) +#define SET_FSR(b, c, v) SET_CTX_REG(FSR, (b), (c), (v)) +#define SET_FSRRESTORE(b, c, v) SET_CTX_REG(FSRRESTORE, (b), (c), (v)) +#define SET_FAR(b, c, v) SET_CTX_REG(FAR, (b), (c), (v)) +#define SET_FSYNR0(b, c, v) SET_CTX_REG(FSYNR0, (b), (c), (v)) +#define SET_FSYNR1(b, c, v) SET_CTX_REG(FSYNR1, (b), (c), (v)) +#define SET_PRRR(b, c, v) SET_CTX_REG(PRRR, (b), (c), (v)) +#define SET_NMRR(b, c, v) SET_CTX_REG(NMRR, (b), (c), (v)) +#define SET_TLBLKCR(b, c, v) SET_CTX_REG(TLBLCKR, (b), (c), (v)) +#define SET_V2PSR(b, c, v) SET_CTX_REG(V2PSR, (b), (c), (v)) +#define SET_TLBFLPTER(b, c, v) SET_CTX_REG(TLBFLPTER, (b), (c), (v)) +#define SET_TLBSLPTER(b, c, v) SET_CTX_REG(TLBSLPTER, (b), (c), (v)) +#define SET_BFBCR(b, c, v) SET_CTX_REG(BFBCR, (b), (c), (v)) +#define SET_CTX_TLBIALL(b, c, v) SET_CTX_REG(CTX_TLBIALL, (b), (c), (v)) +#define SET_TLBIASID(b, c, v) SET_CTX_REG(TLBIASID, (b), (c), (v)) +#define SET_TLBIVA(b, c, v) SET_CTX_REG(TLBIVA, (b), (c), (v)) +#define SET_TLBIVAA(b, c, v) SET_CTX_REG(TLBIVAA, (b), (c), (v)) +#define SET_V2PPR(b, c, v) SET_CTX_REG(V2PPR, (b), (c), (v)) +#define SET_V2PPW(b, c, v) SET_CTX_REG(V2PPW, (b), (c), (v)) +#define SET_V2PUR(b, c, v) SET_CTX_REG(V2PUR, (b), (c), (v)) +#define SET_V2PUW(b, c, v) SET_CTX_REG(V2PUW, (b), (c), (v)) +#define SET_RESUME(b, c, v) SET_CTX_REG(RESUME, (b), (c), (v)) + +#define GET_SCTLR(b, c) GET_CTX_REG(SCTLR, (b), (c)) +#define GET_ACTLR(b, c) GET_CTX_REG(ACTLR, (b), (c)) +#define GET_CONTEXTIDR(b, c) GET_CTX_REG(CONTEXTIDR, (b), (c)) +#define GET_TTBR0(b, c) GET_CTX_REG(TTBR0, (b), (c)) +#define GET_TTBR1(b, c) GET_CTX_REG(TTBR1, (b), (c)) +#define GET_TTBCR(b, c) GET_CTX_REG(TTBCR, (b), (c)) +#define GET_PAR(b, c) GET_CTX_REG(PAR, (b), (c)) +#define GET_FSR(b, c) GET_CTX_REG(FSR, (b), (c)) +#define GET_FSRRESTORE(b, c) GET_CTX_REG(FSRRESTORE, (b), (c)) +#define GET_FAR(b, c) GET_CTX_REG(FAR, (b), (c)) +#define GET_FSYNR0(b, c) GET_CTX_REG(FSYNR0, (b), (c)) +#define GET_FSYNR1(b, c) GET_CTX_REG(FSYNR1, (b), (c)) +#define GET_PRRR(b, c) GET_CTX_REG(PRRR, (b), (c)) +#define GET_NMRR(b, c) GET_CTX_REG(NMRR, (b), (c)) +#define GET_TLBLCKR(b, c) GET_CTX_REG(TLBLCKR, (b), (c)) +#define GET_V2PSR(b, c) GET_CTX_REG(V2PSR, (b), (c)) +#define GET_TLBFLPTER(b, c) GET_CTX_REG(TLBFLPTER, (b), (c)) +#define GET_TLBSLPTER(b, c) GET_CTX_REG(TLBSLPTER, (b), (c)) +#define GET_BFBCR(b, c) GET_CTX_REG(BFBCR, (b), (c)) +#define GET_CTX_TLBIALL(b, c) GET_CTX_REG(CTX_TLBIALL, (b), (c)) +#define GET_TLBIASID(b, c) GET_CTX_REG(TLBIASID, (b), (c)) +#define GET_TLBIVA(b, c) GET_CTX_REG(TLBIVA, (b), (c)) +#define GET_TLBIVAA(b, c) GET_CTX_REG(TLBIVAA, (b), (c)) +#define GET_V2PPR(b, c) GET_CTX_REG(V2PPR, (b), (c)) +#define GET_V2PPW(b, c) GET_CTX_REG(V2PPW, (b), (c)) +#define GET_V2PUR(b, c) GET_CTX_REG(V2PUR, (b), (c)) +#define GET_V2PUW(b, c) GET_CTX_REG(V2PUW, (b), (c)) +#define GET_RESUME(b, c) GET_CTX_REG(RESUME, (b), (c)) + + +/* Global field setters / getters */ +/* Global Field Setters: */ +/* CBACR_N */ +#define SET_RWVMID(b, n, v) SET_GLOBAL_FIELD(b, (n<<2)|(CBACR_N), RWVMID, v) +#define SET_RWE(b, n, v) SET_GLOBAL_FIELD(b, (n<<2)|(CBACR_N), RWE, v) +#define SET_RWGE(b, n, v) SET_GLOBAL_FIELD(b, (n<<2)|(CBACR_N), RWGE, v) +#define SET_CBVMID(b, n, v) SET_GLOBAL_FIELD(b, (n<<2)|(CBACR_N), CBVMID, v) +#define SET_IRPTNDX(b, n, v) SET_GLOBAL_FIELD(b, (n<<2)|(CBACR_N), IRPTNDX, v) + + +/* M2VCBR_N */ +#define SET_VMID(b, n, v) SET_GLOBAL_FIELD(b, (n<<2)|(M2VCBR_N), VMID, v) +#define SET_CBNDX(b, n, v) SET_GLOBAL_FIELD(b, (n<<2)|(M2VCBR_N), CBNDX, v) +#define SET_BYPASSD(b, n, v) SET_GLOBAL_FIELD(b, (n<<2)|(M2VCBR_N), BYPASSD, v) +#define SET_BPRCOSH(b, n, v) SET_GLOBAL_FIELD(b, (n<<2)|(M2VCBR_N), BPRCOSH, v) +#define SET_BPRCISH(b, n, v) SET_GLOBAL_FIELD(b, (n<<2)|(M2VCBR_N), BPRCISH, v) +#define SET_BPRCNSH(b, n, v) SET_GLOBAL_FIELD(b, (n<<2)|(M2VCBR_N), BPRCNSH, v) +#define SET_BPSHCFG(b, n, v) SET_GLOBAL_FIELD(b, (n<<2)|(M2VCBR_N), BPSHCFG, v) +#define SET_NSCFG(b, n, v) SET_GLOBAL_FIELD(b, (n<<2)|(M2VCBR_N), NSCFG, v) +#define SET_BPMTCFG(b, n, v) SET_GLOBAL_FIELD(b, (n<<2)|(M2VCBR_N), BPMTCFG, v) +#define SET_BPMEMTYPE(b, n, v) \ + SET_GLOBAL_FIELD(b, (n<<2)|(M2VCBR_N), BPMEMTYPE, v) + + +/* CR */ +#define SET_RPUE(b, v) SET_GLOBAL_FIELD(b, CR, RPUE, v) +#define SET_RPUERE(b, v) SET_GLOBAL_FIELD(b, CR, RPUERE, v) +#define SET_RPUEIE(b, v) SET_GLOBAL_FIELD(b, CR, RPUEIE, v) +#define SET_DCDEE(b, v) SET_GLOBAL_FIELD(b, CR, DCDEE, v) +#define SET_CLIENTPD(b, v) SET_GLOBAL_FIELD(b, CR, CLIENTPD, v) +#define SET_STALLD(b, v) SET_GLOBAL_FIELD(b, CR, STALLD, v) +#define SET_TLBLKCRWE(b, v) SET_GLOBAL_FIELD(b, CR, TLBLKCRWE, v) +#define SET_CR_TLBIALLCFG(b, v) SET_GLOBAL_FIELD(b, CR, CR_TLBIALLCFG, v) +#define SET_TLBIVMIDCFG(b, v) SET_GLOBAL_FIELD(b, CR, TLBIVMIDCFG, v) +#define SET_CR_HUME(b, v) SET_GLOBAL_FIELD(b, CR, CR_HUME, v) + + +/* ESR */ +#define SET_CFG(b, v) SET_GLOBAL_FIELD(b, ESR, CFG, v) +#define SET_BYPASS(b, v) SET_GLOBAL_FIELD(b, ESR, BYPASS, v) +#define SET_ESR_MULTI(b, v) SET_GLOBAL_FIELD(b, ESR, ESR_MULTI, v) + + +/* ESYNR0 */ +#define SET_ESYNR0_AMID(b, v) SET_GLOBAL_FIELD(b, ESYNR0, ESYNR0_AMID, v) +#define SET_ESYNR0_APID(b, v) SET_GLOBAL_FIELD(b, ESYNR0, ESYNR0_APID, v) +#define SET_ESYNR0_ABID(b, v) SET_GLOBAL_FIELD(b, ESYNR0, ESYNR0_ABID, v) +#define SET_ESYNR0_AVMID(b, v) SET_GLOBAL_FIELD(b, ESYNR0, ESYNR0_AVMID, v) +#define SET_ESYNR0_ATID(b, v) SET_GLOBAL_FIELD(b, ESYNR0, ESYNR0_ATID, v) + + +/* ESYNR1 */ +#define SET_ESYNR1_AMEMTYPE(b, v) \ + SET_GLOBAL_FIELD(b, ESYNR1, ESYNR1_AMEMTYPE, v) +#define SET_ESYNR1_ASHARED(b, v) SET_GLOBAL_FIELD(b, ESYNR1, ESYNR1_ASHARED, v) +#define SET_ESYNR1_AINNERSHARED(b, v) \ + SET_GLOBAL_FIELD(b, ESYNR1, ESYNR1_AINNERSHARED, v) +#define SET_ESYNR1_APRIV(b, v) SET_GLOBAL_FIELD(b, ESYNR1, ESYNR1_APRIV, v) +#define SET_ESYNR1_APROTNS(b, v) SET_GLOBAL_FIELD(b, ESYNR1, ESYNR1_APROTNS, v) +#define SET_ESYNR1_AINST(b, v) SET_GLOBAL_FIELD(b, ESYNR1, ESYNR1_AINST, v) +#define SET_ESYNR1_AWRITE(b, v) SET_GLOBAL_FIELD(b, ESYNR1, ESYNR1_AWRITE, v) +#define SET_ESYNR1_ABURST(b, v) SET_GLOBAL_FIELD(b, ESYNR1, ESYNR1_ABURST, v) +#define SET_ESYNR1_ALEN(b, v) SET_GLOBAL_FIELD(b, ESYNR1, ESYNR1_ALEN, v) +#define SET_ESYNR1_ASIZE(b, v) SET_GLOBAL_FIELD(b, ESYNR1, ESYNR1_ASIZE, v) +#define SET_ESYNR1_ALOCK(b, v) SET_GLOBAL_FIELD(b, ESYNR1, ESYNR1_ALOCK, v) +#define SET_ESYNR1_AOOO(b, v) SET_GLOBAL_FIELD(b, ESYNR1, ESYNR1_AOOO, v) +#define SET_ESYNR1_AFULL(b, v) SET_GLOBAL_FIELD(b, ESYNR1, ESYNR1_AFULL, v) +#define SET_ESYNR1_AC(b, v) SET_GLOBAL_FIELD(b, ESYNR1, ESYNR1_AC, v) +#define SET_ESYNR1_DCD(b, v) SET_GLOBAL_FIELD(b, ESYNR1, ESYNR1_DCD, v) + + +/* TESTBUSCR */ +#define SET_TBE(b, v) SET_GLOBAL_FIELD(b, TESTBUSCR, TBE, v) +#define SET_SPDMBE(b, v) SET_GLOBAL_FIELD(b, TESTBUSCR, SPDMBE, v) +#define SET_WGSEL(b, v) SET_GLOBAL_FIELD(b, TESTBUSCR, WGSEL, v) +#define SET_TBLSEL(b, v) SET_GLOBAL_FIELD(b, TESTBUSCR, TBLSEL, v) +#define SET_TBHSEL(b, v) SET_GLOBAL_FIELD(b, TESTBUSCR, TBHSEL, v) +#define SET_SPDM0SEL(b, v) SET_GLOBAL_FIELD(b, TESTBUSCR, SPDM0SEL, v) +#define SET_SPDM1SEL(b, v) SET_GLOBAL_FIELD(b, TESTBUSCR, SPDM1SEL, v) +#define SET_SPDM2SEL(b, v) SET_GLOBAL_FIELD(b, TESTBUSCR, SPDM2SEL, v) +#define SET_SPDM3SEL(b, v) SET_GLOBAL_FIELD(b, TESTBUSCR, SPDM3SEL, v) + + +/* TLBIVMID */ +#define SET_TLBIVMID_VMID(b, v) SET_GLOBAL_FIELD(b, TLBIVMID, TLBIVMID_VMID, v) + + +/* TLBRSW */ +#define SET_TLBRSW_INDEX(b, v) SET_GLOBAL_FIELD(b, TLBRSW, TLBRSW_INDEX, v) +#define SET_TLBBFBS(b, v) SET_GLOBAL_FIELD(b, TLBRSW, TLBBFBS, v) + + +/* TLBTR0 */ +#define SET_PR(b, v) SET_GLOBAL_FIELD(b, TLBTR0, PR, v) +#define SET_PW(b, v) SET_GLOBAL_FIELD(b, TLBTR0, PW, v) +#define SET_UR(b, v) SET_GLOBAL_FIELD(b, TLBTR0, UR, v) +#define SET_UW(b, v) SET_GLOBAL_FIELD(b, TLBTR0, UW, v) +#define SET_XN(b, v) SET_GLOBAL_FIELD(b, TLBTR0, XN, v) +#define SET_NSDESC(b, v) SET_GLOBAL_FIELD(b, TLBTR0, NSDESC, v) +#define SET_ISH(b, v) SET_GLOBAL_FIELD(b, TLBTR0, ISH, v) +#define SET_SH(b, v) SET_GLOBAL_FIELD(b, TLBTR0, SH, v) +#define SET_MT(b, v) SET_GLOBAL_FIELD(b, TLBTR0, MT, v) +#define SET_DPSIZR(b, v) SET_GLOBAL_FIELD(b, TLBTR0, DPSIZR, v) +#define SET_DPSIZC(b, v) SET_GLOBAL_FIELD(b, TLBTR0, DPSIZC, v) + + +/* TLBTR1 */ +#define SET_TLBTR1_VMID(b, v) SET_GLOBAL_FIELD(b, TLBTR1, TLBTR1_VMID, v) +#define SET_TLBTR1_PA(b, v) SET_GLOBAL_FIELD(b, TLBTR1, TLBTR1_PA, v) + + +/* TLBTR2 */ +#define SET_TLBTR2_ASID(b, v) SET_GLOBAL_FIELD(b, TLBTR2, TLBTR2_ASID, v) +#define SET_TLBTR2_V(b, v) SET_GLOBAL_FIELD(b, TLBTR2, TLBTR2_V, v) +#define SET_TLBTR2_NSTID(b, v) SET_GLOBAL_FIELD(b, TLBTR2, TLBTR2_NSTID, v) +#define SET_TLBTR2_NV(b, v) SET_GLOBAL_FIELD(b, TLBTR2, TLBTR2_NV, v) +#define SET_TLBTR2_VA(b, v) SET_GLOBAL_FIELD(b, TLBTR2, TLBTR2_VA, v) + + +/* Global Field Getters */ +/* CBACR_N */ +#define GET_RWVMID(b, n) GET_GLOBAL_FIELD(b, (n<<2)|(CBACR_N), RWVMID) +#define GET_RWE(b, n) GET_GLOBAL_FIELD(b, (n<<2)|(CBACR_N), RWE) +#define GET_RWGE(b, n) GET_GLOBAL_FIELD(b, (n<<2)|(CBACR_N), RWGE) +#define GET_CBVMID(b, n) GET_GLOBAL_FIELD(b, (n<<2)|(CBACR_N), CBVMID) +#define GET_IRPTNDX(b, n) GET_GLOBAL_FIELD(b, (n<<2)|(CBACR_N), IRPTNDX) + + +/* M2VCBR_N */ +#define GET_VMID(b, n) GET_GLOBAL_FIELD(b, (n<<2)|(M2VCBR_N), VMID) +#define GET_CBNDX(b, n) GET_GLOBAL_FIELD(b, (n<<2)|(M2VCBR_N), CBNDX) +#define GET_BYPASSD(b, n) GET_GLOBAL_FIELD(b, (n<<2)|(M2VCBR_N), BYPASSD) +#define GET_BPRCOSH(b, n) GET_GLOBAL_FIELD(b, (n<<2)|(M2VCBR_N), BPRCOSH) +#define GET_BPRCISH(b, n) GET_GLOBAL_FIELD(b, (n<<2)|(M2VCBR_N), BPRCISH) +#define GET_BPRCNSH(b, n) GET_GLOBAL_FIELD(b, (n<<2)|(M2VCBR_N), BPRCNSH) +#define GET_BPSHCFG(b, n) GET_GLOBAL_FIELD(b, (n<<2)|(M2VCBR_N), BPSHCFG) +#define GET_NSCFG(b, n) GET_GLOBAL_FIELD(b, (n<<2)|(M2VCBR_N), NSCFG) +#define GET_BPMTCFG(b, n) GET_GLOBAL_FIELD(b, (n<<2)|(M2VCBR_N), BPMTCFG) +#define GET_BPMEMTYPE(b, n) GET_GLOBAL_FIELD(b, (n<<2)|(M2VCBR_N), BPMEMTYPE) + + +/* CR */ +#define GET_RPUE(b) GET_GLOBAL_FIELD(b, CR, RPUE) +#define GET_RPUERE(b) GET_GLOBAL_FIELD(b, CR, RPUERE) +#define GET_RPUEIE(b) GET_GLOBAL_FIELD(b, CR, RPUEIE) +#define GET_DCDEE(b) GET_GLOBAL_FIELD(b, CR, DCDEE) +#define GET_CLIENTPD(b) GET_GLOBAL_FIELD(b, CR, CLIENTPD) +#define GET_STALLD(b) GET_GLOBAL_FIELD(b, CR, STALLD) +#define GET_TLBLKCRWE(b) GET_GLOBAL_FIELD(b, CR, TLBLKCRWE) +#define GET_CR_TLBIALLCFG(b) GET_GLOBAL_FIELD(b, CR, CR_TLBIALLCFG) +#define GET_TLBIVMIDCFG(b) GET_GLOBAL_FIELD(b, CR, TLBIVMIDCFG) +#define GET_CR_HUME(b) GET_GLOBAL_FIELD(b, CR, CR_HUME) + + +/* ESR */ +#define GET_CFG(b) GET_GLOBAL_FIELD(b, ESR, CFG) +#define GET_BYPASS(b) GET_GLOBAL_FIELD(b, ESR, BYPASS) +#define GET_ESR_MULTI(b) GET_GLOBAL_FIELD(b, ESR, ESR_MULTI) + + +/* ESYNR0 */ +#define GET_ESYNR0_AMID(b) GET_GLOBAL_FIELD(b, ESYNR0, ESYNR0_AMID) +#define GET_ESYNR0_APID(b) GET_GLOBAL_FIELD(b, ESYNR0, ESYNR0_APID) +#define GET_ESYNR0_ABID(b) GET_GLOBAL_FIELD(b, ESYNR0, ESYNR0_ABID) +#define GET_ESYNR0_AVMID(b) GET_GLOBAL_FIELD(b, ESYNR0, ESYNR0_AVMID) +#define GET_ESYNR0_ATID(b) GET_GLOBAL_FIELD(b, ESYNR0, ESYNR0_ATID) + + +/* ESYNR1 */ +#define GET_ESYNR1_AMEMTYPE(b) GET_GLOBAL_FIELD(b, ESYNR1, ESYNR1_AMEMTYPE) +#define GET_ESYNR1_ASHARED(b) GET_GLOBAL_FIELD(b, ESYNR1, ESYNR1_ASHARED) +#define GET_ESYNR1_AINNERSHARED(b) \ + GET_GLOBAL_FIELD(b, ESYNR1, ESYNR1_AINNERSHARED) +#define GET_ESYNR1_APRIV(b) GET_GLOBAL_FIELD(b, ESYNR1, ESYNR1_APRIV) +#define GET_ESYNR1_APROTNS(b) GET_GLOBAL_FIELD(b, ESYNR1, ESYNR1_APROTNS) +#define GET_ESYNR1_AINST(b) GET_GLOBAL_FIELD(b, ESYNR1, ESYNR1_AINST) +#define GET_ESYNR1_AWRITE(b) GET_GLOBAL_FIELD(b, ESYNR1, ESYNR1_AWRITE) +#define GET_ESYNR1_ABURST(b) GET_GLOBAL_FIELD(b, ESYNR1, ESYNR1_ABURST) +#define GET_ESYNR1_ALEN(b) GET_GLOBAL_FIELD(b, ESYNR1, ESYNR1_ALEN) +#define GET_ESYNR1_ASIZE(b) GET_GLOBAL_FIELD(b, ESYNR1, ESYNR1_ASIZE) +#define GET_ESYNR1_ALOCK(b) GET_GLOBAL_FIELD(b, ESYNR1, ESYNR1_ALOCK) +#define GET_ESYNR1_AOOO(b) GET_GLOBAL_FIELD(b, ESYNR1, ESYNR1_AOOO) +#define GET_ESYNR1_AFULL(b) GET_GLOBAL_FIELD(b, ESYNR1, ESYNR1_AFULL) +#define GET_ESYNR1_AC(b) GET_GLOBAL_FIELD(b, ESYNR1, ESYNR1_AC) +#define GET_ESYNR1_DCD(b) GET_GLOBAL_FIELD(b, ESYNR1, ESYNR1_DCD) + + +/* IDR */ +#define GET_NM2VCBMT(b) GET_GLOBAL_FIELD(b, IDR, NM2VCBMT) +#define GET_HTW(b) GET_GLOBAL_FIELD(b, IDR, HTW) +#define GET_HUM(b) GET_GLOBAL_FIELD(b, IDR, HUM) +#define GET_TLBSIZE(b) GET_GLOBAL_FIELD(b, IDR, TLBSIZE) +#define GET_NCB(b) GET_GLOBAL_FIELD(b, IDR, NCB) +#define GET_NIRPT(b) GET_GLOBAL_FIELD(b, IDR, NIRPT) + + +/* REV */ +#define GET_MAJOR(b) GET_GLOBAL_FIELD(b, REV, MAJOR) +#define GET_MINOR(b) GET_GLOBAL_FIELD(b, REV, MINOR) + + +/* TESTBUSCR */ +#define GET_TBE(b) GET_GLOBAL_FIELD(b, TESTBUSCR, TBE) +#define GET_SPDMBE(b) GET_GLOBAL_FIELD(b, TESTBUSCR, SPDMBE) +#define GET_WGSEL(b) GET_GLOBAL_FIELD(b, TESTBUSCR, WGSEL) +#define GET_TBLSEL(b) GET_GLOBAL_FIELD(b, TESTBUSCR, TBLSEL) +#define GET_TBHSEL(b) GET_GLOBAL_FIELD(b, TESTBUSCR, TBHSEL) +#define GET_SPDM0SEL(b) GET_GLOBAL_FIELD(b, TESTBUSCR, SPDM0SEL) +#define GET_SPDM1SEL(b) GET_GLOBAL_FIELD(b, TESTBUSCR, SPDM1SEL) +#define GET_SPDM2SEL(b) GET_GLOBAL_FIELD(b, TESTBUSCR, SPDM2SEL) +#define GET_SPDM3SEL(b) GET_GLOBAL_FIELD(b, TESTBUSCR, SPDM3SEL) + + +/* TLBIVMID */ +#define GET_TLBIVMID_VMID(b) GET_GLOBAL_FIELD(b, TLBIVMID, TLBIVMID_VMID) + + +/* TLBTR0 */ +#define GET_PR(b) GET_GLOBAL_FIELD(b, TLBTR0, PR) +#define GET_PW(b) GET_GLOBAL_FIELD(b, TLBTR0, PW) +#define GET_UR(b) GET_GLOBAL_FIELD(b, TLBTR0, UR) +#define GET_UW(b) GET_GLOBAL_FIELD(b, TLBTR0, UW) +#define GET_XN(b) GET_GLOBAL_FIELD(b, TLBTR0, XN) +#define GET_NSDESC(b) GET_GLOBAL_FIELD(b, TLBTR0, NSDESC) +#define GET_ISH(b) GET_GLOBAL_FIELD(b, TLBTR0, ISH) +#define GET_SH(b) GET_GLOBAL_FIELD(b, TLBTR0, SH) +#define GET_MT(b) GET_GLOBAL_FIELD(b, TLBTR0, MT) +#define GET_DPSIZR(b) GET_GLOBAL_FIELD(b, TLBTR0, DPSIZR) +#define GET_DPSIZC(b) GET_GLOBAL_FIELD(b, TLBTR0, DPSIZC) + + +/* TLBTR1 */ +#define GET_TLBTR1_VMID(b) GET_GLOBAL_FIELD(b, TLBTR1, TLBTR1_VMID) +#define GET_TLBTR1_PA(b) GET_GLOBAL_FIELD(b, TLBTR1, TLBTR1_PA) + + +/* TLBTR2 */ +#define GET_TLBTR2_ASID(b) GET_GLOBAL_FIELD(b, TLBTR2, TLBTR2_ASID) +#define GET_TLBTR2_V(b) GET_GLOBAL_FIELD(b, TLBTR2, TLBTR2_V) +#define GET_TLBTR2_NSTID(b) GET_GLOBAL_FIELD(b, TLBTR2, TLBTR2_NSTID) +#define GET_TLBTR2_NV(b) GET_GLOBAL_FIELD(b, TLBTR2, TLBTR2_NV) +#define GET_TLBTR2_VA(b) GET_GLOBAL_FIELD(b, TLBTR2, TLBTR2_VA) + + +/* Context Register setters / getters */ +/* Context Register setters */ +/* ACTLR */ +#define SET_CFERE(b, c, v) SET_CONTEXT_FIELD(b, c, ACTLR, CFERE, v) +#define SET_CFEIE(b, c, v) SET_CONTEXT_FIELD(b, c, ACTLR, CFEIE, v) +#define SET_PTSHCFG(b, c, v) SET_CONTEXT_FIELD(b, c, ACTLR, PTSHCFG, v) +#define SET_RCOSH(b, c, v) SET_CONTEXT_FIELD(b, c, ACTLR, RCOSH, v) +#define SET_RCISH(b, c, v) SET_CONTEXT_FIELD(b, c, ACTLR, RCISH, v) +#define SET_RCNSH(b, c, v) SET_CONTEXT_FIELD(b, c, ACTLR, RCNSH, v) +#define SET_PRIVCFG(b, c, v) SET_CONTEXT_FIELD(b, c, ACTLR, PRIVCFG, v) +#define SET_DNA(b, c, v) SET_CONTEXT_FIELD(b, c, ACTLR, DNA, v) +#define SET_DNLV2PA(b, c, v) SET_CONTEXT_FIELD(b, c, ACTLR, DNLV2PA, v) +#define SET_TLBMCFG(b, c, v) SET_CONTEXT_FIELD(b, c, ACTLR, TLBMCFG, v) +#define SET_CFCFG(b, c, v) SET_CONTEXT_FIELD(b, c, ACTLR, CFCFG, v) +#define SET_TIPCF(b, c, v) SET_CONTEXT_FIELD(b, c, ACTLR, TIPCF, v) +#define SET_V2PCFG(b, c, v) SET_CONTEXT_FIELD(b, c, ACTLR, V2PCFG, v) +#define SET_HUME(b, c, v) SET_CONTEXT_FIELD(b, c, ACTLR, HUME, v) +#define SET_PTMTCFG(b, c, v) SET_CONTEXT_FIELD(b, c, ACTLR, PTMTCFG, v) +#define SET_PTMEMTYPE(b, c, v) SET_CONTEXT_FIELD(b, c, ACTLR, PTMEMTYPE, v) + + +/* BFBCR */ +#define SET_BFBDFE(b, c, v) SET_CONTEXT_FIELD(b, c, BFBCR, BFBDFE, v) +#define SET_BFBSFE(b, c, v) SET_CONTEXT_FIELD(b, c, BFBCR, BFBSFE, v) +#define SET_SFVS(b, c, v) SET_CONTEXT_FIELD(b, c, BFBCR, SFVS, v) +#define SET_FLVIC(b, c, v) SET_CONTEXT_FIELD(b, c, BFBCR, FLVIC, v) +#define SET_SLVIC(b, c, v) SET_CONTEXT_FIELD(b, c, BFBCR, SLVIC, v) + + +/* CONTEXTIDR */ +#define SET_CONTEXTIDR_ASID(b, c, v) \ + SET_CONTEXT_FIELD(b, c, CONTEXTIDR, CONTEXTIDR_ASID, v) +#define SET_CONTEXTIDR_PROCID(b, c, v) \ + SET_CONTEXT_FIELD(b, c, CONTEXTIDR, PROCID, v) + + +/* FSR */ +#define SET_TF(b, c, v) SET_CONTEXT_FIELD(b, c, FSR, TF, v) +#define SET_AFF(b, c, v) SET_CONTEXT_FIELD(b, c, FSR, AFF, v) +#define SET_APF(b, c, v) SET_CONTEXT_FIELD(b, c, FSR, APF, v) +#define SET_TLBMF(b, c, v) SET_CONTEXT_FIELD(b, c, FSR, TLBMF, v) +#define SET_HTWDEEF(b, c, v) SET_CONTEXT_FIELD(b, c, FSR, HTWDEEF, v) +#define SET_HTWSEEF(b, c, v) SET_CONTEXT_FIELD(b, c, FSR, HTWSEEF, v) +#define SET_MHF(b, c, v) SET_CONTEXT_FIELD(b, c, FSR, MHF, v) +#define SET_SL(b, c, v) SET_CONTEXT_FIELD(b, c, FSR, SL, v) +#define SET_SS(b, c, v) SET_CONTEXT_FIELD(b, c, FSR, SS, v) +#define SET_MULTI(b, c, v) SET_CONTEXT_FIELD(b, c, FSR, MULTI, v) + + +/* FSYNR0 */ +#define SET_AMID(b, c, v) SET_CONTEXT_FIELD(b, c, FSYNR0, AMID, v) +#define SET_APID(b, c, v) SET_CONTEXT_FIELD(b, c, FSYNR0, APID, v) +#define SET_ABID(b, c, v) SET_CONTEXT_FIELD(b, c, FSYNR0, ABID, v) +#define SET_ATID(b, c, v) SET_CONTEXT_FIELD(b, c, FSYNR0, ATID, v) + + +/* FSYNR1 */ +#define SET_AMEMTYPE(b, c, v) SET_CONTEXT_FIELD(b, c, FSYNR1, AMEMTYPE, v) +#define SET_ASHARED(b, c, v) SET_CONTEXT_FIELD(b, c, FSYNR1, ASHARED, v) +#define SET_AINNERSHARED(b, c, v) \ + SET_CONTEXT_FIELD(b, c, FSYNR1, AINNERSHARED, v) +#define SET_APRIV(b, c, v) SET_CONTEXT_FIELD(b, c, FSYNR1, APRIV, v) +#define SET_APROTNS(b, c, v) SET_CONTEXT_FIELD(b, c, FSYNR1, APROTNS, v) +#define SET_AINST(b, c, v) SET_CONTEXT_FIELD(b, c, FSYNR1, AINST, v) +#define SET_AWRITE(b, c, v) SET_CONTEXT_FIELD(b, c, FSYNR1, AWRITE, v) +#define SET_ABURST(b, c, v) SET_CONTEXT_FIELD(b, c, FSYNR1, ABURST, v) +#define SET_ALEN(b, c, v) SET_CONTEXT_FIELD(b, c, FSYNR1, ALEN, v) +#define SET_FSYNR1_ASIZE(b, c, v) \ + SET_CONTEXT_FIELD(b, c, FSYNR1, FSYNR1_ASIZE, v) +#define SET_ALOCK(b, c, v) SET_CONTEXT_FIELD(b, c, FSYNR1, ALOCK, v) +#define SET_AFULL(b, c, v) SET_CONTEXT_FIELD(b, c, FSYNR1, AFULL, v) + + +/* NMRR */ +#define SET_ICPC0(b, c, v) SET_CONTEXT_FIELD(b, c, NMRR, ICPC0, v) +#define SET_ICPC1(b, c, v) SET_CONTEXT_FIELD(b, c, NMRR, ICPC1, v) +#define SET_ICPC2(b, c, v) SET_CONTEXT_FIELD(b, c, NMRR, ICPC2, v) +#define SET_ICPC3(b, c, v) SET_CONTEXT_FIELD(b, c, NMRR, ICPC3, v) +#define SET_ICPC4(b, c, v) SET_CONTEXT_FIELD(b, c, NMRR, ICPC4, v) +#define SET_ICPC5(b, c, v) SET_CONTEXT_FIELD(b, c, NMRR, ICPC5, v) +#define SET_ICPC6(b, c, v) SET_CONTEXT_FIELD(b, c, NMRR, ICPC6, v) +#define SET_ICPC7(b, c, v) SET_CONTEXT_FIELD(b, c, NMRR, ICPC7, v) +#define SET_OCPC0(b, c, v) SET_CONTEXT_FIELD(b, c, NMRR, OCPC0, v) +#define SET_OCPC1(b, c, v) SET_CONTEXT_FIELD(b, c, NMRR, OCPC1, v) +#define SET_OCPC2(b, c, v) SET_CONTEXT_FIELD(b, c, NMRR, OCPC2, v) +#define SET_OCPC3(b, c, v) SET_CONTEXT_FIELD(b, c, NMRR, OCPC3, v) +#define SET_OCPC4(b, c, v) SET_CONTEXT_FIELD(b, c, NMRR, OCPC4, v) +#define SET_OCPC5(b, c, v) SET_CONTEXT_FIELD(b, c, NMRR, OCPC5, v) +#define SET_OCPC6(b, c, v) SET_CONTEXT_FIELD(b, c, NMRR, OCPC6, v) +#define SET_OCPC7(b, c, v) SET_CONTEXT_FIELD(b, c, NMRR, OCPC7, v) + + +/* PAR */ +#define SET_FAULT(b, c, v) SET_CONTEXT_FIELD(b, c, PAR, FAULT, v) + +#define SET_FAULT_TF(b, c, v) SET_CONTEXT_FIELD(b, c, PAR, FAULT_TF, v) +#define SET_FAULT_AFF(b, c, v) SET_CONTEXT_FIELD(b, c, PAR, FAULT_AFF, v) +#define SET_FAULT_APF(b, c, v) SET_CONTEXT_FIELD(b, c, PAR, FAULT_APF, v) +#define SET_FAULT_TLBMF(b, c, v) SET_CONTEXT_FIELD(b, c, PAR, FAULT_TLBMF, v) +#define SET_FAULT_HTWDEEF(b, c, v) \ + SET_CONTEXT_FIELD(b, c, PAR, FAULT_HTWDEEF, v) +#define SET_FAULT_HTWSEEF(b, c, v) \ + SET_CONTEXT_FIELD(b, c, PAR, FAULT_HTWSEEF, v) +#define SET_FAULT_MHF(b, c, v) SET_CONTEXT_FIELD(b, c, PAR, FAULT_MHF, v) +#define SET_FAULT_SL(b, c, v) SET_CONTEXT_FIELD(b, c, PAR, FAULT_SL, v) +#define SET_FAULT_SS(b, c, v) SET_CONTEXT_FIELD(b, c, PAR, FAULT_SS, v) + +#define SET_NOFAULT_SS(b, c, v) SET_CONTEXT_FIELD(b, c, PAR, NOFAULT_SS, v) +#define SET_NOFAULT_MT(b, c, v) SET_CONTEXT_FIELD(b, c, PAR, NOFAULT_MT, v) +#define SET_NOFAULT_SH(b, c, v) SET_CONTEXT_FIELD(b, c, PAR, NOFAULT_SH, v) +#define SET_NOFAULT_NS(b, c, v) SET_CONTEXT_FIELD(b, c, PAR, NOFAULT_NS, v) +#define SET_NOFAULT_NOS(b, c, v) SET_CONTEXT_FIELD(b, c, PAR, NOFAULT_NOS, v) +#define SET_NPFAULT_PA(b, c, v) SET_CONTEXT_FIELD(b, c, PAR, NPFAULT_PA, v) + + +/* PRRR */ +#define SET_MTC0(b, c, v) SET_CONTEXT_FIELD(b, c, PRRR, MTC0, v) +#define SET_MTC1(b, c, v) SET_CONTEXT_FIELD(b, c, PRRR, MTC1, v) +#define SET_MTC2(b, c, v) SET_CONTEXT_FIELD(b, c, PRRR, MTC2, v) +#define SET_MTC3(b, c, v) SET_CONTEXT_FIELD(b, c, PRRR, MTC3, v) +#define SET_MTC4(b, c, v) SET_CONTEXT_FIELD(b, c, PRRR, MTC4, v) +#define SET_MTC5(b, c, v) SET_CONTEXT_FIELD(b, c, PRRR, MTC5, v) +#define SET_MTC6(b, c, v) SET_CONTEXT_FIELD(b, c, PRRR, MTC6, v) +#define SET_MTC7(b, c, v) SET_CONTEXT_FIELD(b, c, PRRR, MTC7, v) +#define SET_SHDSH0(b, c, v) SET_CONTEXT_FIELD(b, c, PRRR, SHDSH0, v) +#define SET_SHDSH1(b, c, v) SET_CONTEXT_FIELD(b, c, PRRR, SHDSH1, v) +#define SET_SHNMSH0(b, c, v) SET_CONTEXT_FIELD(b, c, PRRR, SHNMSH0, v) +#define SET_SHNMSH1(b, c, v) SET_CONTEXT_FIELD(b, c, PRRR, SHNMSH1, v) +#define SET_NOS0(b, c, v) SET_CONTEXT_FIELD(b, c, PRRR, NOS0, v) +#define SET_NOS1(b, c, v) SET_CONTEXT_FIELD(b, c, PRRR, NOS1, v) +#define SET_NOS2(b, c, v) SET_CONTEXT_FIELD(b, c, PRRR, NOS2, v) +#define SET_NOS3(b, c, v) SET_CONTEXT_FIELD(b, c, PRRR, NOS3, v) +#define SET_NOS4(b, c, v) SET_CONTEXT_FIELD(b, c, PRRR, NOS4, v) +#define SET_NOS5(b, c, v) SET_CONTEXT_FIELD(b, c, PRRR, NOS5, v) +#define SET_NOS6(b, c, v) SET_CONTEXT_FIELD(b, c, PRRR, NOS6, v) +#define SET_NOS7(b, c, v) SET_CONTEXT_FIELD(b, c, PRRR, NOS7, v) + + +/* RESUME */ +#define SET_TNR(b, c, v) SET_CONTEXT_FIELD(b, c, RESUME, TNR, v) + + +/* SCTLR */ +#define SET_M(b, c, v) SET_CONTEXT_FIELD(b, c, SCTLR, M, v) +#define SET_TRE(b, c, v) SET_CONTEXT_FIELD(b, c, SCTLR, TRE, v) +#define SET_AFE(b, c, v) SET_CONTEXT_FIELD(b, c, SCTLR, AFE, v) +#define SET_HAF(b, c, v) SET_CONTEXT_FIELD(b, c, SCTLR, HAF, v) +#define SET_BE(b, c, v) SET_CONTEXT_FIELD(b, c, SCTLR, BE, v) +#define SET_AFFD(b, c, v) SET_CONTEXT_FIELD(b, c, SCTLR, AFFD, v) + + +/* TLBLKCR */ +#define SET_LKE(b, c, v) SET_CONTEXT_FIELD(b, c, TLBLKCR, LKE, v) +#define SET_TLBLKCR_TLBIALLCFG(b, c, v) \ + SET_CONTEXT_FIELD(b, c, TLBLKCR, TLBLCKR_TLBIALLCFG, v) +#define SET_TLBIASIDCFG(b, c, v) \ + SET_CONTEXT_FIELD(b, c, TLBLKCR, TLBIASIDCFG, v) +#define SET_TLBIVAACFG(b, c, v) SET_CONTEXT_FIELD(b, c, TLBLKCR, TLBIVAACFG, v) +#define SET_FLOOR(b, c, v) SET_CONTEXT_FIELD(b, c, TLBLKCR, FLOOR, v) +#define SET_VICTIM(b, c, v) SET_CONTEXT_FIELD(b, c, TLBLKCR, VICTIM, v) + + +/* TTBCR */ +#define SET_N(b, c, v) SET_CONTEXT_FIELD(b, c, TTBCR, N, v) +#define SET_PD0(b, c, v) SET_CONTEXT_FIELD(b, c, TTBCR, PD0, v) +#define SET_PD1(b, c, v) SET_CONTEXT_FIELD(b, c, TTBCR, PD1, v) + + +/* TTBR0 */ +#define SET_TTBR0_IRGNH(b, c, v) SET_CONTEXT_FIELD(b, c, TTBR0, TTBR0_IRGNH, v) +#define SET_TTBR0_SH(b, c, v) SET_CONTEXT_FIELD(b, c, TTBR0, TTBR0_SH, v) +#define SET_TTBR0_ORGN(b, c, v) SET_CONTEXT_FIELD(b, c, TTBR0, TTBR0_ORGN, v) +#define SET_TTBR0_NOS(b, c, v) SET_CONTEXT_FIELD(b, c, TTBR0, TTBR0_NOS, v) +#define SET_TTBR0_IRGNL(b, c, v) SET_CONTEXT_FIELD(b, c, TTBR0, TTBR0_IRGNL, v) +#define SET_TTBR0_PA(b, c, v) SET_CONTEXT_FIELD(b, c, TTBR0, TTBR0_PA, v) + + +/* TTBR1 */ +#define SET_TTBR1_IRGNH(b, c, v) SET_CONTEXT_FIELD(b, c, TTBR1, TTBR1_IRGNH, v) +#define SET_TTBR1_SH(b, c, v) SET_CONTEXT_FIELD(b, c, TTBR1, TTBR1_SH, v) +#define SET_TTBR1_ORGN(b, c, v) SET_CONTEXT_FIELD(b, c, TTBR1, TTBR1_ORGN, v) +#define SET_TTBR1_NOS(b, c, v) SET_CONTEXT_FIELD(b, c, TTBR1, TTBR1_NOS, v) +#define SET_TTBR1_IRGNL(b, c, v) SET_CONTEXT_FIELD(b, c, TTBR1, TTBR1_IRGNL, v) +#define SET_TTBR1_PA(b, c, v) SET_CONTEXT_FIELD(b, c, TTBR1, TTBR1_PA, v) + + +/* V2PSR */ +#define SET_HIT(b, c, v) SET_CONTEXT_FIELD(b, c, V2PSR, HIT, v) +#define SET_INDEX(b, c, v) SET_CONTEXT_FIELD(b, c, V2PSR, INDEX, v) + + +/* Context Register getters */ +/* ACTLR */ +#define GET_CFERE(b, c) GET_CONTEXT_FIELD(b, c, ACTLR, CFERE) +#define GET_CFEIE(b, c) GET_CONTEXT_FIELD(b, c, ACTLR, CFEIE) +#define GET_PTSHCFG(b, c) GET_CONTEXT_FIELD(b, c, ACTLR, PTSHCFG) +#define GET_RCOSH(b, c) GET_CONTEXT_FIELD(b, c, ACTLR, RCOSH) +#define GET_RCISH(b, c) GET_CONTEXT_FIELD(b, c, ACTLR, RCISH) +#define GET_RCNSH(b, c) GET_CONTEXT_FIELD(b, c, ACTLR, RCNSH) +#define GET_PRIVCFG(b, c) GET_CONTEXT_FIELD(b, c, ACTLR, PRIVCFG) +#define GET_DNA(b, c) GET_CONTEXT_FIELD(b, c, ACTLR, DNA) +#define GET_DNLV2PA(b, c) GET_CONTEXT_FIELD(b, c, ACTLR, DNLV2PA) +#define GET_TLBMCFG(b, c) GET_CONTEXT_FIELD(b, c, ACTLR, TLBMCFG) +#define GET_CFCFG(b, c) GET_CONTEXT_FIELD(b, c, ACTLR, CFCFG) +#define GET_TIPCF(b, c) GET_CONTEXT_FIELD(b, c, ACTLR, TIPCF) +#define GET_V2PCFG(b, c) GET_CONTEXT_FIELD(b, c, ACTLR, V2PCFG) +#define GET_HUME(b, c) GET_CONTEXT_FIELD(b, c, ACTLR, HUME) +#define GET_PTMTCFG(b, c) GET_CONTEXT_FIELD(b, c, ACTLR, PTMTCFG) +#define GET_PTMEMTYPE(b, c) GET_CONTEXT_FIELD(b, c, ACTLR, PTMEMTYPE) + +/* BFBCR */ +#define GET_BFBDFE(b, c) GET_CONTEXT_FIELD(b, c, BFBCR, BFBDFE) +#define GET_BFBSFE(b, c) GET_CONTEXT_FIELD(b, c, BFBCR, BFBSFE) +#define GET_SFVS(b, c) GET_CONTEXT_FIELD(b, c, BFBCR, SFVS) +#define GET_FLVIC(b, c) GET_CONTEXT_FIELD(b, c, BFBCR, FLVIC) +#define GET_SLVIC(b, c) GET_CONTEXT_FIELD(b, c, BFBCR, SLVIC) + + +/* CONTEXTIDR */ +#define GET_CONTEXTIDR_ASID(b, c) \ + GET_CONTEXT_FIELD(b, c, CONTEXTIDR, CONTEXTIDR_ASID) +#define GET_CONTEXTIDR_PROCID(b, c) GET_CONTEXT_FIELD(b, c, CONTEXTIDR, PROCID) + + +/* FSR */ +#define GET_TF(b, c) GET_CONTEXT_FIELD(b, c, FSR, TF) +#define GET_AFF(b, c) GET_CONTEXT_FIELD(b, c, FSR, AFF) +#define GET_APF(b, c) GET_CONTEXT_FIELD(b, c, FSR, APF) +#define GET_TLBMF(b, c) GET_CONTEXT_FIELD(b, c, FSR, TLBMF) +#define GET_HTWDEEF(b, c) GET_CONTEXT_FIELD(b, c, FSR, HTWDEEF) +#define GET_HTWSEEF(b, c) GET_CONTEXT_FIELD(b, c, FSR, HTWSEEF) +#define GET_MHF(b, c) GET_CONTEXT_FIELD(b, c, FSR, MHF) +#define GET_SL(b, c) GET_CONTEXT_FIELD(b, c, FSR, SL) +#define GET_SS(b, c) GET_CONTEXT_FIELD(b, c, FSR, SS) +#define GET_MULTI(b, c) GET_CONTEXT_FIELD(b, c, FSR, MULTI) + + +/* FSYNR0 */ +#define GET_AMID(b, c) GET_CONTEXT_FIELD(b, c, FSYNR0, AMID) +#define GET_APID(b, c) GET_CONTEXT_FIELD(b, c, FSYNR0, APID) +#define GET_ABID(b, c) GET_CONTEXT_FIELD(b, c, FSYNR0, ABID) +#define GET_ATID(b, c) GET_CONTEXT_FIELD(b, c, FSYNR0, ATID) + + +/* FSYNR1 */ +#define GET_AMEMTYPE(b, c) GET_CONTEXT_FIELD(b, c, FSYNR1, AMEMTYPE) +#define GET_ASHARED(b, c) GET_CONTEXT_FIELD(b, c, FSYNR1, ASHARED) +#define GET_AINNERSHARED(b, c) GET_CONTEXT_FIELD(b, c, FSYNR1, AINNERSHARED) +#define GET_APRIV(b, c) GET_CONTEXT_FIELD(b, c, FSYNR1, APRIV) +#define GET_APROTNS(b, c) GET_CONTEXT_FIELD(b, c, FSYNR1, APROTNS) +#define GET_AINST(b, c) GET_CONTEXT_FIELD(b, c, FSYNR1, AINST) +#define GET_AWRITE(b, c) GET_CONTEXT_FIELD(b, c, FSYNR1, AWRITE) +#define GET_ABURST(b, c) GET_CONTEXT_FIELD(b, c, FSYNR1, ABURST) +#define GET_ALEN(b, c) GET_CONTEXT_FIELD(b, c, FSYNR1, ALEN) +#define GET_FSYNR1_ASIZE(b, c) GET_CONTEXT_FIELD(b, c, FSYNR1, FSYNR1_ASIZE) +#define GET_ALOCK(b, c) GET_CONTEXT_FIELD(b, c, FSYNR1, ALOCK) +#define GET_AFULL(b, c) GET_CONTEXT_FIELD(b, c, FSYNR1, AFULL) + + +/* NMRR */ +#define GET_ICPC0(b, c) GET_CONTEXT_FIELD(b, c, NMRR, ICPC0) +#define GET_ICPC1(b, c) GET_CONTEXT_FIELD(b, c, NMRR, ICPC1) +#define GET_ICPC2(b, c) GET_CONTEXT_FIELD(b, c, NMRR, ICPC2) +#define GET_ICPC3(b, c) GET_CONTEXT_FIELD(b, c, NMRR, ICPC3) +#define GET_ICPC4(b, c) GET_CONTEXT_FIELD(b, c, NMRR, ICPC4) +#define GET_ICPC5(b, c) GET_CONTEXT_FIELD(b, c, NMRR, ICPC5) +#define GET_ICPC6(b, c) GET_CONTEXT_FIELD(b, c, NMRR, ICPC6) +#define GET_ICPC7(b, c) GET_CONTEXT_FIELD(b, c, NMRR, ICPC7) +#define GET_OCPC0(b, c) GET_CONTEXT_FIELD(b, c, NMRR, OCPC0) +#define GET_OCPC1(b, c) GET_CONTEXT_FIELD(b, c, NMRR, OCPC1) +#define GET_OCPC2(b, c) GET_CONTEXT_FIELD(b, c, NMRR, OCPC2) +#define GET_OCPC3(b, c) GET_CONTEXT_FIELD(b, c, NMRR, OCPC3) +#define GET_OCPC4(b, c) GET_CONTEXT_FIELD(b, c, NMRR, OCPC4) +#define GET_OCPC5(b, c) GET_CONTEXT_FIELD(b, c, NMRR, OCPC5) +#define GET_OCPC6(b, c) GET_CONTEXT_FIELD(b, c, NMRR, OCPC6) +#define GET_OCPC7(b, c) GET_CONTEXT_FIELD(b, c, NMRR, OCPC7) +#define NMRR_ICP(nmrr, n) (((nmrr) & (3 << ((n) * 2))) >> ((n) * 2)) +#define NMRR_OCP(nmrr, n) (((nmrr) & (3 << ((n) * 2 + 16))) >> \ + ((n) * 2 + 16)) + +/* PAR */ +#define GET_FAULT(b, c) GET_CONTEXT_FIELD(b, c, PAR, FAULT) + +#define GET_FAULT_TF(b, c) GET_CONTEXT_FIELD(b, c, PAR, FAULT_TF) +#define GET_FAULT_AFF(b, c) GET_CONTEXT_FIELD(b, c, PAR, FAULT_AFF) +#define GET_FAULT_APF(b, c) GET_CONTEXT_FIELD(b, c, PAR, FAULT_APF) +#define GET_FAULT_TLBMF(b, c) GET_CONTEXT_FIELD(b, c, PAR, FAULT_TLBMF) +#define GET_FAULT_HTWDEEF(b, c) GET_CONTEXT_FIELD(b, c, PAR, FAULT_HTWDEEF) +#define GET_FAULT_HTWSEEF(b, c) GET_CONTEXT_FIELD(b, c, PAR, FAULT_HTWSEEF) +#define GET_FAULT_MHF(b, c) GET_CONTEXT_FIELD(b, c, PAR, FAULT_MHF) +#define GET_FAULT_SL(b, c) GET_CONTEXT_FIELD(b, c, PAR, FAULT_SL) +#define GET_FAULT_SS(b, c) GET_CONTEXT_FIELD(b, c, PAR, FAULT_SS) + +#define GET_NOFAULT_SS(b, c) GET_CONTEXT_FIELD(b, c, PAR, PAR_NOFAULT_SS) +#define GET_NOFAULT_MT(b, c) GET_CONTEXT_FIELD(b, c, PAR, PAR_NOFAULT_MT) +#define GET_NOFAULT_SH(b, c) GET_CONTEXT_FIELD(b, c, PAR, PAR_NOFAULT_SH) +#define GET_NOFAULT_NS(b, c) GET_CONTEXT_FIELD(b, c, PAR, PAR_NOFAULT_NS) +#define GET_NOFAULT_NOS(b, c) GET_CONTEXT_FIELD(b, c, PAR, PAR_NOFAULT_NOS) +#define GET_NPFAULT_PA(b, c) GET_CONTEXT_FIELD(b, c, PAR, PAR_NPFAULT_PA) + + +/* PRRR */ +#define GET_MTC0(b, c) GET_CONTEXT_FIELD(b, c, PRRR, MTC0) +#define GET_MTC1(b, c) GET_CONTEXT_FIELD(b, c, PRRR, MTC1) +#define GET_MTC2(b, c) GET_CONTEXT_FIELD(b, c, PRRR, MTC2) +#define GET_MTC3(b, c) GET_CONTEXT_FIELD(b, c, PRRR, MTC3) +#define GET_MTC4(b, c) GET_CONTEXT_FIELD(b, c, PRRR, MTC4) +#define GET_MTC5(b, c) GET_CONTEXT_FIELD(b, c, PRRR, MTC5) +#define GET_MTC6(b, c) GET_CONTEXT_FIELD(b, c, PRRR, MTC6) +#define GET_MTC7(b, c) GET_CONTEXT_FIELD(b, c, PRRR, MTC7) +#define GET_SHDSH0(b, c) GET_CONTEXT_FIELD(b, c, PRRR, SHDSH0) +#define GET_SHDSH1(b, c) GET_CONTEXT_FIELD(b, c, PRRR, SHDSH1) +#define GET_SHNMSH0(b, c) GET_CONTEXT_FIELD(b, c, PRRR, SHNMSH0) +#define GET_SHNMSH1(b, c) GET_CONTEXT_FIELD(b, c, PRRR, SHNMSH1) +#define GET_NOS0(b, c) GET_CONTEXT_FIELD(b, c, PRRR, NOS0) +#define GET_NOS1(b, c) GET_CONTEXT_FIELD(b, c, PRRR, NOS1) +#define GET_NOS2(b, c) GET_CONTEXT_FIELD(b, c, PRRR, NOS2) +#define GET_NOS3(b, c) GET_CONTEXT_FIELD(b, c, PRRR, NOS3) +#define GET_NOS4(b, c) GET_CONTEXT_FIELD(b, c, PRRR, NOS4) +#define GET_NOS5(b, c) GET_CONTEXT_FIELD(b, c, PRRR, NOS5) +#define GET_NOS6(b, c) GET_CONTEXT_FIELD(b, c, PRRR, NOS6) +#define GET_NOS7(b, c) GET_CONTEXT_FIELD(b, c, PRRR, NOS7) +#define PRRR_NOS(prrr, n) ((prrr) & (1 << ((n) + 24)) ? 1 : 0) +#define PRRR_MT(prrr, n) ((((prrr) & (3 << ((n) * 2))) >> ((n) * 2))) + + +/* RESUME */ +#define GET_TNR(b, c) GET_CONTEXT_FIELD(b, c, RESUME, TNR) + + +/* SCTLR */ +#define GET_M(b, c) GET_CONTEXT_FIELD(b, c, SCTLR, M) +#define GET_TRE(b, c) GET_CONTEXT_FIELD(b, c, SCTLR, TRE) +#define GET_AFE(b, c) GET_CONTEXT_FIELD(b, c, SCTLR, AFE) +#define GET_HAF(b, c) GET_CONTEXT_FIELD(b, c, SCTLR, HAF) +#define GET_BE(b, c) GET_CONTEXT_FIELD(b, c, SCTLR, BE) +#define GET_AFFD(b, c) GET_CONTEXT_FIELD(b, c, SCTLR, AFFD) + + +/* TLBLKCR */ +#define GET_LKE(b, c) GET_CONTEXT_FIELD(b, c, TLBLKCR, LKE) +#define GET_TLBLCKR_TLBIALLCFG(b, c) \ + GET_CONTEXT_FIELD(b, c, TLBLKCR, TLBLCKR_TLBIALLCFG) +#define GET_TLBIASIDCFG(b, c) GET_CONTEXT_FIELD(b, c, TLBLKCR, TLBIASIDCFG) +#define GET_TLBIVAACFG(b, c) GET_CONTEXT_FIELD(b, c, TLBLKCR, TLBIVAACFG) +#define GET_FLOOR(b, c) GET_CONTEXT_FIELD(b, c, TLBLKCR, FLOOR) +#define GET_VICTIM(b, c) GET_CONTEXT_FIELD(b, c, TLBLKCR, VICTIM) + + +/* TTBCR */ +#define GET_N(b, c) GET_CONTEXT_FIELD(b, c, TTBCR, N) +#define GET_PD0(b, c) GET_CONTEXT_FIELD(b, c, TTBCR, PD0) +#define GET_PD1(b, c) GET_CONTEXT_FIELD(b, c, TTBCR, PD1) + + +/* TTBR0 */ +#define GET_TTBR0_IRGNH(b, c) GET_CONTEXT_FIELD(b, c, TTBR0, TTBR0_IRGNH) +#define GET_TTBR0_SH(b, c) GET_CONTEXT_FIELD(b, c, TTBR0, TTBR0_SH) +#define GET_TTBR0_ORGN(b, c) GET_CONTEXT_FIELD(b, c, TTBR0, TTBR0_ORGN) +#define GET_TTBR0_NOS(b, c) GET_CONTEXT_FIELD(b, c, TTBR0, TTBR0_NOS) +#define GET_TTBR0_IRGNL(b, c) GET_CONTEXT_FIELD(b, c, TTBR0, TTBR0_IRGNL) +#define GET_TTBR0_PA(b, c) GET_CONTEXT_FIELD(b, c, TTBR0, TTBR0_PA) + + +/* TTBR1 */ +#define GET_TTBR1_IRGNH(b, c) GET_CONTEXT_FIELD(b, c, TTBR1, TTBR1_IRGNH) +#define GET_TTBR1_SH(b, c) GET_CONTEXT_FIELD(b, c, TTBR1, TTBR1_SH) +#define GET_TTBR1_ORGN(b, c) GET_CONTEXT_FIELD(b, c, TTBR1, TTBR1_ORGN) +#define GET_TTBR1_NOS(b, c) GET_CONTEXT_FIELD(b, c, TTBR1, TTBR1_NOS) +#define GET_TTBR1_IRGNL(b, c) GET_CONTEXT_FIELD(b, c, TTBR1, TTBR1_IRGNL) +#define GET_TTBR1_PA(b, c) GET_CONTEXT_FIELD(b, c, TTBR1, TTBR1_PA) + + +/* V2PSR */ +#define GET_HIT(b, c) GET_CONTEXT_FIELD(b, c, V2PSR, HIT) +#define GET_INDEX(b, c) GET_CONTEXT_FIELD(b, c, V2PSR, INDEX) + + +/* Global Registers */ +#define M2VCBR_N (0xFF000) +#define CBACR_N (0xFF800) +#define TLBRSW (0xFFE00) +#define TLBTR0 (0xFFE80) +#define TLBTR1 (0xFFE84) +#define TLBTR2 (0xFFE88) +#define TESTBUSCR (0xFFE8C) +#define GLOBAL_TLBIALL (0xFFF00) +#define TLBIVMID (0xFFF04) +#define CR (0xFFF80) +#define EAR (0xFFF84) +#define ESR (0xFFF88) +#define ESRRESTORE (0xFFF8C) +#define ESYNR0 (0xFFF90) +#define ESYNR1 (0xFFF94) +#define REV (0xFFFF4) +#define IDR (0xFFFF8) +#define RPU_ACR (0xFFFFC) + + +/* Context Bank Registers */ +#define SCTLR (0x000) +#define ACTLR (0x004) +#define CONTEXTIDR (0x008) +#define TTBR0 (0x010) +#define TTBR1 (0x014) +#define TTBCR (0x018) +#define PAR (0x01C) +#define FSR (0x020) +#define FSRRESTORE (0x024) +#define FAR (0x028) +#define FSYNR0 (0x02C) +#define FSYNR1 (0x030) +#define PRRR (0x034) +#define NMRR (0x038) +#define TLBLCKR (0x03C) +#define V2PSR (0x040) +#define TLBFLPTER (0x044) +#define TLBSLPTER (0x048) +#define BFBCR (0x04C) +#define CTX_TLBIALL (0x800) +#define TLBIASID (0x804) +#define TLBIVA (0x808) +#define TLBIVAA (0x80C) +#define V2PPR (0x810) +#define V2PPW (0x814) +#define V2PUR (0x818) +#define V2PUW (0x81C) +#define RESUME (0x820) + + +/* Global Register Fields */ +/* CBACRn */ +#define RWVMID (RWVMID_MASK << RWVMID_SHIFT) +#define RWE (RWE_MASK << RWE_SHIFT) +#define RWGE (RWGE_MASK << RWGE_SHIFT) +#define CBVMID (CBVMID_MASK << CBVMID_SHIFT) +#define IRPTNDX (IRPTNDX_MASK << IRPTNDX_SHIFT) + + +/* CR */ +#define RPUE (RPUE_MASK << RPUE_SHIFT) +#define RPUERE (RPUERE_MASK << RPUERE_SHIFT) +#define RPUEIE (RPUEIE_MASK << RPUEIE_SHIFT) +#define DCDEE (DCDEE_MASK << DCDEE_SHIFT) +#define CLIENTPD (CLIENTPD_MASK << CLIENTPD_SHIFT) +#define STALLD (STALLD_MASK << STALLD_SHIFT) +#define TLBLKCRWE (TLBLKCRWE_MASK << TLBLKCRWE_SHIFT) +#define CR_TLBIALLCFG (CR_TLBIALLCFG_MASK << CR_TLBIALLCFG_SHIFT) +#define TLBIVMIDCFG (TLBIVMIDCFG_MASK << TLBIVMIDCFG_SHIFT) +#define CR_HUME (CR_HUME_MASK << CR_HUME_SHIFT) + + +/* ESR */ +#define CFG (CFG_MASK << CFG_SHIFT) +#define BYPASS (BYPASS_MASK << BYPASS_SHIFT) +#define ESR_MULTI (ESR_MULTI_MASK << ESR_MULTI_SHIFT) + + +/* ESYNR0 */ +#define ESYNR0_AMID (ESYNR0_AMID_MASK << ESYNR0_AMID_SHIFT) +#define ESYNR0_APID (ESYNR0_APID_MASK << ESYNR0_APID_SHIFT) +#define ESYNR0_ABID (ESYNR0_ABID_MASK << ESYNR0_ABID_SHIFT) +#define ESYNR0_AVMID (ESYNR0_AVMID_MASK << ESYNR0_AVMID_SHIFT) +#define ESYNR0_ATID (ESYNR0_ATID_MASK << ESYNR0_ATID_SHIFT) + + +/* ESYNR1 */ +#define ESYNR1_AMEMTYPE (ESYNR1_AMEMTYPE_MASK << ESYNR1_AMEMTYPE_SHIFT) +#define ESYNR1_ASHARED (ESYNR1_ASHARED_MASK << ESYNR1_ASHARED_SHIFT) +#define ESYNR1_AINNERSHARED (ESYNR1_AINNERSHARED_MASK<< \ + ESYNR1_AINNERSHARED_SHIFT) +#define ESYNR1_APRIV (ESYNR1_APRIV_MASK << ESYNR1_APRIV_SHIFT) +#define ESYNR1_APROTNS (ESYNR1_APROTNS_MASK << ESYNR1_APROTNS_SHIFT) +#define ESYNR1_AINST (ESYNR1_AINST_MASK << ESYNR1_AINST_SHIFT) +#define ESYNR1_AWRITE (ESYNR1_AWRITE_MASK << ESYNR1_AWRITE_SHIFT) +#define ESYNR1_ABURST (ESYNR1_ABURST_MASK << ESYNR1_ABURST_SHIFT) +#define ESYNR1_ALEN (ESYNR1_ALEN_MASK << ESYNR1_ALEN_SHIFT) +#define ESYNR1_ASIZE (ESYNR1_ASIZE_MASK << ESYNR1_ASIZE_SHIFT) +#define ESYNR1_ALOCK (ESYNR1_ALOCK_MASK << ESYNR1_ALOCK_SHIFT) +#define ESYNR1_AOOO (ESYNR1_AOOO_MASK << ESYNR1_AOOO_SHIFT) +#define ESYNR1_AFULL (ESYNR1_AFULL_MASK << ESYNR1_AFULL_SHIFT) +#define ESYNR1_AC (ESYNR1_AC_MASK << ESYNR1_AC_SHIFT) +#define ESYNR1_DCD (ESYNR1_DCD_MASK << ESYNR1_DCD_SHIFT) + + +/* IDR */ +#define NM2VCBMT (NM2VCBMT_MASK << NM2VCBMT_SHIFT) +#define HTW (HTW_MASK << HTW_SHIFT) +#define HUM (HUM_MASK << HUM_SHIFT) +#define TLBSIZE (TLBSIZE_MASK << TLBSIZE_SHIFT) +#define NCB (NCB_MASK << NCB_SHIFT) +#define NIRPT (NIRPT_MASK << NIRPT_SHIFT) + + +/* M2VCBRn */ +#define VMID (VMID_MASK << VMID_SHIFT) +#define CBNDX (CBNDX_MASK << CBNDX_SHIFT) +#define BYPASSD (BYPASSD_MASK << BYPASSD_SHIFT) +#define BPRCOSH (BPRCOSH_MASK << BPRCOSH_SHIFT) +#define BPRCISH (BPRCISH_MASK << BPRCISH_SHIFT) +#define BPRCNSH (BPRCNSH_MASK << BPRCNSH_SHIFT) +#define BPSHCFG (BPSHCFG_MASK << BPSHCFG_SHIFT) +#define NSCFG (NSCFG_MASK << NSCFG_SHIFT) +#define BPMTCFG (BPMTCFG_MASK << BPMTCFG_SHIFT) +#define BPMEMTYPE (BPMEMTYPE_MASK << BPMEMTYPE_SHIFT) + + +/* REV */ +#define IDR_MINOR (MINOR_MASK << MINOR_SHIFT) +#define IDR_MAJOR (MAJOR_MASK << MAJOR_SHIFT) + + +/* TESTBUSCR */ +#define TBE (TBE_MASK << TBE_SHIFT) +#define SPDMBE (SPDMBE_MASK << SPDMBE_SHIFT) +#define WGSEL (WGSEL_MASK << WGSEL_SHIFT) +#define TBLSEL (TBLSEL_MASK << TBLSEL_SHIFT) +#define TBHSEL (TBHSEL_MASK << TBHSEL_SHIFT) +#define SPDM0SEL (SPDM0SEL_MASK << SPDM0SEL_SHIFT) +#define SPDM1SEL (SPDM1SEL_MASK << SPDM1SEL_SHIFT) +#define SPDM2SEL (SPDM2SEL_MASK << SPDM2SEL_SHIFT) +#define SPDM3SEL (SPDM3SEL_MASK << SPDM3SEL_SHIFT) + + +/* TLBIVMID */ +#define TLBIVMID_VMID (TLBIVMID_VMID_MASK << TLBIVMID_VMID_SHIFT) + + +/* TLBRSW */ +#define TLBRSW_INDEX (TLBRSW_INDEX_MASK << TLBRSW_INDEX_SHIFT) +#define TLBBFBS (TLBBFBS_MASK << TLBBFBS_SHIFT) + + +/* TLBTR0 */ +#define PR (PR_MASK << PR_SHIFT) +#define PW (PW_MASK << PW_SHIFT) +#define UR (UR_MASK << UR_SHIFT) +#define UW (UW_MASK << UW_SHIFT) +#define XN (XN_MASK << XN_SHIFT) +#define NSDESC (NSDESC_MASK << NSDESC_SHIFT) +#define ISH (ISH_MASK << ISH_SHIFT) +#define SH (SH_MASK << SH_SHIFT) +#define MT (MT_MASK << MT_SHIFT) +#define DPSIZR (DPSIZR_MASK << DPSIZR_SHIFT) +#define DPSIZC (DPSIZC_MASK << DPSIZC_SHIFT) + + +/* TLBTR1 */ +#define TLBTR1_VMID (TLBTR1_VMID_MASK << TLBTR1_VMID_SHIFT) +#define TLBTR1_PA (TLBTR1_PA_MASK << TLBTR1_PA_SHIFT) + + +/* TLBTR2 */ +#define TLBTR2_ASID (TLBTR2_ASID_MASK << TLBTR2_ASID_SHIFT) +#define TLBTR2_V (TLBTR2_V_MASK << TLBTR2_V_SHIFT) +#define TLBTR2_NSTID (TLBTR2_NSTID_MASK << TLBTR2_NSTID_SHIFT) +#define TLBTR2_NV (TLBTR2_NV_MASK << TLBTR2_NV_SHIFT) +#define TLBTR2_VA (TLBTR2_VA_MASK << TLBTR2_VA_SHIFT) + + +/* Context Register Fields */ +/* ACTLR */ +#define CFERE (CFERE_MASK << CFERE_SHIFT) +#define CFEIE (CFEIE_MASK << CFEIE_SHIFT) +#define PTSHCFG (PTSHCFG_MASK << PTSHCFG_SHIFT) +#define RCOSH (RCOSH_MASK << RCOSH_SHIFT) +#define RCISH (RCISH_MASK << RCISH_SHIFT) +#define RCNSH (RCNSH_MASK << RCNSH_SHIFT) +#define PRIVCFG (PRIVCFG_MASK << PRIVCFG_SHIFT) +#define DNA (DNA_MASK << DNA_SHIFT) +#define DNLV2PA (DNLV2PA_MASK << DNLV2PA_SHIFT) +#define TLBMCFG (TLBMCFG_MASK << TLBMCFG_SHIFT) +#define CFCFG (CFCFG_MASK << CFCFG_SHIFT) +#define TIPCF (TIPCF_MASK << TIPCF_SHIFT) +#define V2PCFG (V2PCFG_MASK << V2PCFG_SHIFT) +#define HUME (HUME_MASK << HUME_SHIFT) +#define PTMTCFG (PTMTCFG_MASK << PTMTCFG_SHIFT) +#define PTMEMTYPE (PTMEMTYPE_MASK << PTMEMTYPE_SHIFT) + + +/* BFBCR */ +#define BFBDFE (BFBDFE_MASK << BFBDFE_SHIFT) +#define BFBSFE (BFBSFE_MASK << BFBSFE_SHIFT) +#define SFVS (SFVS_MASK << SFVS_SHIFT) +#define FLVIC (FLVIC_MASK << FLVIC_SHIFT) +#define SLVIC (SLVIC_MASK << SLVIC_SHIFT) + + +/* CONTEXTIDR */ +#define CONTEXTIDR_ASID (CONTEXTIDR_ASID_MASK << CONTEXTIDR_ASID_SHIFT) +#define PROCID (PROCID_MASK << PROCID_SHIFT) + + +/* FSR */ +#define TF (TF_MASK << TF_SHIFT) +#define AFF (AFF_MASK << AFF_SHIFT) +#define APF (APF_MASK << APF_SHIFT) +#define TLBMF (TLBMF_MASK << TLBMF_SHIFT) +#define HTWDEEF (HTWDEEF_MASK << HTWDEEF_SHIFT) +#define HTWSEEF (HTWSEEF_MASK << HTWSEEF_SHIFT) +#define MHF (MHF_MASK << MHF_SHIFT) +#define SL (SL_MASK << SL_SHIFT) +#define SS (SS_MASK << SS_SHIFT) +#define MULTI (MULTI_MASK << MULTI_SHIFT) + + +/* FSYNR0 */ +#define AMID (AMID_MASK << AMID_SHIFT) +#define APID (APID_MASK << APID_SHIFT) +#define ABID (ABID_MASK << ABID_SHIFT) +#define ATID (ATID_MASK << ATID_SHIFT) + + +/* FSYNR1 */ +#define AMEMTYPE (AMEMTYPE_MASK << AMEMTYPE_SHIFT) +#define ASHARED (ASHARED_MASK << ASHARED_SHIFT) +#define AINNERSHARED (AINNERSHARED_MASK << AINNERSHARED_SHIFT) +#define APRIV (APRIV_MASK << APRIV_SHIFT) +#define APROTNS (APROTNS_MASK << APROTNS_SHIFT) +#define AINST (AINST_MASK << AINST_SHIFT) +#define AWRITE (AWRITE_MASK << AWRITE_SHIFT) +#define ABURST (ABURST_MASK << ABURST_SHIFT) +#define ALEN (ALEN_MASK << ALEN_SHIFT) +#define FSYNR1_ASIZE (FSYNR1_ASIZE_MASK << FSYNR1_ASIZE_SHIFT) +#define ALOCK (ALOCK_MASK << ALOCK_SHIFT) +#define AFULL (AFULL_MASK << AFULL_SHIFT) + + +/* NMRR */ +#define ICPC0 (ICPC0_MASK << ICPC0_SHIFT) +#define ICPC1 (ICPC1_MASK << ICPC1_SHIFT) +#define ICPC2 (ICPC2_MASK << ICPC2_SHIFT) +#define ICPC3 (ICPC3_MASK << ICPC3_SHIFT) +#define ICPC4 (ICPC4_MASK << ICPC4_SHIFT) +#define ICPC5 (ICPC5_MASK << ICPC5_SHIFT) +#define ICPC6 (ICPC6_MASK << ICPC6_SHIFT) +#define ICPC7 (ICPC7_MASK << ICPC7_SHIFT) +#define OCPC0 (OCPC0_MASK << OCPC0_SHIFT) +#define OCPC1 (OCPC1_MASK << OCPC1_SHIFT) +#define OCPC2 (OCPC2_MASK << OCPC2_SHIFT) +#define OCPC3 (OCPC3_MASK << OCPC3_SHIFT) +#define OCPC4 (OCPC4_MASK << OCPC4_SHIFT) +#define OCPC5 (OCPC5_MASK << OCPC5_SHIFT) +#define OCPC6 (OCPC6_MASK << OCPC6_SHIFT) +#define OCPC7 (OCPC7_MASK << OCPC7_SHIFT) + + +/* PAR */ +#define FAULT (FAULT_MASK << FAULT_SHIFT) +/* If a fault is present, these are the +same as the fault fields in the FAR */ +#define FAULT_TF (FAULT_TF_MASK << FAULT_TF_SHIFT) +#define FAULT_AFF (FAULT_AFF_MASK << FAULT_AFF_SHIFT) +#define FAULT_APF (FAULT_APF_MASK << FAULT_APF_SHIFT) +#define FAULT_TLBMF (FAULT_TLBMF_MASK << FAULT_TLBMF_SHIFT) +#define FAULT_HTWDEEF (FAULT_HTWDEEF_MASK << FAULT_HTWDEEF_SHIFT) +#define FAULT_HTWSEEF (FAULT_HTWSEEF_MASK << FAULT_HTWSEEF_SHIFT) +#define FAULT_MHF (FAULT_MHF_MASK << FAULT_MHF_SHIFT) +#define FAULT_SL (FAULT_SL_MASK << FAULT_SL_SHIFT) +#define FAULT_SS (FAULT_SS_MASK << FAULT_SS_SHIFT) + +/* If NO fault is present, the following fields are in effect */ +/* (FAULT remains as before) */ +#define PAR_NOFAULT_SS (PAR_NOFAULT_SS_MASK << PAR_NOFAULT_SS_SHIFT) +#define PAR_NOFAULT_MT (PAR_NOFAULT_MT_MASK << PAR_NOFAULT_MT_SHIFT) +#define PAR_NOFAULT_SH (PAR_NOFAULT_SH_MASK << PAR_NOFAULT_SH_SHIFT) +#define PAR_NOFAULT_NS (PAR_NOFAULT_NS_MASK << PAR_NOFAULT_NS_SHIFT) +#define PAR_NOFAULT_NOS (PAR_NOFAULT_NOS_MASK << PAR_NOFAULT_NOS_SHIFT) +#define PAR_NPFAULT_PA (PAR_NPFAULT_PA_MASK << PAR_NPFAULT_PA_SHIFT) + + +/* PRRR */ +#define MTC0 (MTC0_MASK << MTC0_SHIFT) +#define MTC1 (MTC1_MASK << MTC1_SHIFT) +#define MTC2 (MTC2_MASK << MTC2_SHIFT) +#define MTC3 (MTC3_MASK << MTC3_SHIFT) +#define MTC4 (MTC4_MASK << MTC4_SHIFT) +#define MTC5 (MTC5_MASK << MTC5_SHIFT) +#define MTC6 (MTC6_MASK << MTC6_SHIFT) +#define MTC7 (MTC7_MASK << MTC7_SHIFT) +#define SHDSH0 (SHDSH0_MASK << SHDSH0_SHIFT) +#define SHDSH1 (SHDSH1_MASK << SHDSH1_SHIFT) +#define SHNMSH0 (SHNMSH0_MASK << SHNMSH0_SHIFT) +#define SHNMSH1 (SHNMSH1_MASK << SHNMSH1_SHIFT) +#define NOS0 (NOS0_MASK << NOS0_SHIFT) +#define NOS1 (NOS1_MASK << NOS1_SHIFT) +#define NOS2 (NOS2_MASK << NOS2_SHIFT) +#define NOS3 (NOS3_MASK << NOS3_SHIFT) +#define NOS4 (NOS4_MASK << NOS4_SHIFT) +#define NOS5 (NOS5_MASK << NOS5_SHIFT) +#define NOS6 (NOS6_MASK << NOS6_SHIFT) +#define NOS7 (NOS7_MASK << NOS7_SHIFT) + + +/* RESUME */ +#define TNR (TNR_MASK << TNR_SHIFT) + + +/* SCTLR */ +#define M (M_MASK << M_SHIFT) +#define TRE (TRE_MASK << TRE_SHIFT) +#define AFE (AFE_MASK << AFE_SHIFT) +#define HAF (HAF_MASK << HAF_SHIFT) +#define BE (BE_MASK << BE_SHIFT) +#define AFFD (AFFD_MASK << AFFD_SHIFT) + + +/* TLBIASID */ +#define TLBIASID_ASID (TLBIASID_ASID_MASK << TLBIASID_ASID_SHIFT) + + +/* TLBIVA */ +#define TLBIVA_ASID (TLBIVA_ASID_MASK << TLBIVA_ASID_SHIFT) +#define TLBIVA_VA (TLBIVA_VA_MASK << TLBIVA_VA_SHIFT) + + +/* TLBIVAA */ +#define TLBIVAA_VA (TLBIVAA_VA_MASK << TLBIVAA_VA_SHIFT) + + +/* TLBLCKR */ +#define LKE (LKE_MASK << LKE_SHIFT) +#define TLBLCKR_TLBIALLCFG (TLBLCKR_TLBIALLCFG_MASK<<TLBLCKR_TLBIALLCFG_SHIFT) +#define TLBIASIDCFG (TLBIASIDCFG_MASK << TLBIASIDCFG_SHIFT) +#define TLBIVAACFG (TLBIVAACFG_MASK << TLBIVAACFG_SHIFT) +#define FLOOR (FLOOR_MASK << FLOOR_SHIFT) +#define VICTIM (VICTIM_MASK << VICTIM_SHIFT) + + +/* TTBCR */ +#define N (N_MASK << N_SHIFT) +#define PD0 (PD0_MASK << PD0_SHIFT) +#define PD1 (PD1_MASK << PD1_SHIFT) + + +/* TTBR0 */ +#define TTBR0_IRGNH (TTBR0_IRGNH_MASK << TTBR0_IRGNH_SHIFT) +#define TTBR0_SH (TTBR0_SH_MASK << TTBR0_SH_SHIFT) +#define TTBR0_ORGN (TTBR0_ORGN_MASK << TTBR0_ORGN_SHIFT) +#define TTBR0_NOS (TTBR0_NOS_MASK << TTBR0_NOS_SHIFT) +#define TTBR0_IRGNL (TTBR0_IRGNL_MASK << TTBR0_IRGNL_SHIFT) +#define TTBR0_PA (TTBR0_PA_MASK << TTBR0_PA_SHIFT) + + +/* TTBR1 */ +#define TTBR1_IRGNH (TTBR1_IRGNH_MASK << TTBR1_IRGNH_SHIFT) +#define TTBR1_SH (TTBR1_SH_MASK << TTBR1_SH_SHIFT) +#define TTBR1_ORGN (TTBR1_ORGN_MASK << TTBR1_ORGN_SHIFT) +#define TTBR1_NOS (TTBR1_NOS_MASK << TTBR1_NOS_SHIFT) +#define TTBR1_IRGNL (TTBR1_IRGNL_MASK << TTBR1_IRGNL_SHIFT) +#define TTBR1_PA (TTBR1_PA_MASK << TTBR1_PA_SHIFT) + + +/* V2PSR */ +#define HIT (HIT_MASK << HIT_SHIFT) +#define INDEX (INDEX_MASK << INDEX_SHIFT) + + +/* V2Pxx */ +#define V2Pxx_INDEX (V2Pxx_INDEX_MASK << V2Pxx_INDEX_SHIFT) +#define V2Pxx_VA (V2Pxx_VA_MASK << V2Pxx_VA_SHIFT) + + +/* Global Register Masks */ +/* CBACRn */ +#define RWVMID_MASK 0x1F +#define RWE_MASK 0x01 +#define RWGE_MASK 0x01 +#define CBVMID_MASK 0x1F +#define IRPTNDX_MASK 0xFF + + +/* CR */ +#define RPUE_MASK 0x01 +#define RPUERE_MASK 0x01 +#define RPUEIE_MASK 0x01 +#define DCDEE_MASK 0x01 +#define CLIENTPD_MASK 0x01 +#define STALLD_MASK 0x01 +#define TLBLKCRWE_MASK 0x01 +#define CR_TLBIALLCFG_MASK 0x01 +#define TLBIVMIDCFG_MASK 0x01 +#define CR_HUME_MASK 0x01 + + +/* ESR */ +#define CFG_MASK 0x01 +#define BYPASS_MASK 0x01 +#define ESR_MULTI_MASK 0x01 + + +/* ESYNR0 */ +#define ESYNR0_AMID_MASK 0xFF +#define ESYNR0_APID_MASK 0x1F +#define ESYNR0_ABID_MASK 0x07 +#define ESYNR0_AVMID_MASK 0x1F +#define ESYNR0_ATID_MASK 0xFF + + +/* ESYNR1 */ +#define ESYNR1_AMEMTYPE_MASK 0x07 +#define ESYNR1_ASHARED_MASK 0x01 +#define ESYNR1_AINNERSHARED_MASK 0x01 +#define ESYNR1_APRIV_MASK 0x01 +#define ESYNR1_APROTNS_MASK 0x01 +#define ESYNR1_AINST_MASK 0x01 +#define ESYNR1_AWRITE_MASK 0x01 +#define ESYNR1_ABURST_MASK 0x01 +#define ESYNR1_ALEN_MASK 0x0F +#define ESYNR1_ASIZE_MASK 0x01 +#define ESYNR1_ALOCK_MASK 0x03 +#define ESYNR1_AOOO_MASK 0x01 +#define ESYNR1_AFULL_MASK 0x01 +#define ESYNR1_AC_MASK 0x01 +#define ESYNR1_DCD_MASK 0x01 + + +/* IDR */ +#define NM2VCBMT_MASK 0x1FF +#define HTW_MASK 0x01 +#define HUM_MASK 0x01 +#define TLBSIZE_MASK 0x0F +#define NCB_MASK 0xFF +#define NIRPT_MASK 0xFF + + +/* M2VCBRn */ +#define VMID_MASK 0x1F +#define CBNDX_MASK 0xFF +#define BYPASSD_MASK 0x01 +#define BPRCOSH_MASK 0x01 +#define BPRCISH_MASK 0x01 +#define BPRCNSH_MASK 0x01 +#define BPSHCFG_MASK 0x03 +#define NSCFG_MASK 0x03 +#define BPMTCFG_MASK 0x01 +#define BPMEMTYPE_MASK 0x07 + + +/* REV */ +#define MINOR_MASK 0x0F +#define MAJOR_MASK 0x0F + + +/* TESTBUSCR */ +#define TBE_MASK 0x01 +#define SPDMBE_MASK 0x01 +#define WGSEL_MASK 0x03 +#define TBLSEL_MASK 0x03 +#define TBHSEL_MASK 0x03 +#define SPDM0SEL_MASK 0x0F +#define SPDM1SEL_MASK 0x0F +#define SPDM2SEL_MASK 0x0F +#define SPDM3SEL_MASK 0x0F + + +/* TLBIMID */ +#define TLBIVMID_VMID_MASK 0x1F + + +/* TLBRSW */ +#define TLBRSW_INDEX_MASK 0xFF +#define TLBBFBS_MASK 0x03 + + +/* TLBTR0 */ +#define PR_MASK 0x01 +#define PW_MASK 0x01 +#define UR_MASK 0x01 +#define UW_MASK 0x01 +#define XN_MASK 0x01 +#define NSDESC_MASK 0x01 +#define ISH_MASK 0x01 +#define SH_MASK 0x01 +#define MT_MASK 0x07 +#define DPSIZR_MASK 0x07 +#define DPSIZC_MASK 0x07 + + +/* TLBTR1 */ +#define TLBTR1_VMID_MASK 0x1F +#define TLBTR1_PA_MASK 0x000FFFFF + + +/* TLBTR2 */ +#define TLBTR2_ASID_MASK 0xFF +#define TLBTR2_V_MASK 0x01 +#define TLBTR2_NSTID_MASK 0x01 +#define TLBTR2_NV_MASK 0x01 +#define TLBTR2_VA_MASK 0x000FFFFF + + +/* Global Register Shifts */ +/* CBACRn */ +#define RWVMID_SHIFT 0 +#define RWE_SHIFT 8 +#define RWGE_SHIFT 9 +#define CBVMID_SHIFT 16 +#define IRPTNDX_SHIFT 24 + + +/* CR */ +#define RPUE_SHIFT 0 +#define RPUERE_SHIFT 1 +#define RPUEIE_SHIFT 2 +#define DCDEE_SHIFT 3 +#define CLIENTPD_SHIFT 4 +#define STALLD_SHIFT 5 +#define TLBLKCRWE_SHIFT 6 +#define CR_TLBIALLCFG_SHIFT 7 +#define TLBIVMIDCFG_SHIFT 8 +#define CR_HUME_SHIFT 9 + + +/* ESR */ +#define CFG_SHIFT 0 +#define BYPASS_SHIFT 1 +#define ESR_MULTI_SHIFT 31 + + +/* ESYNR0 */ +#define ESYNR0_AMID_SHIFT 0 +#define ESYNR0_APID_SHIFT 8 +#define ESYNR0_ABID_SHIFT 13 +#define ESYNR0_AVMID_SHIFT 16 +#define ESYNR0_ATID_SHIFT 24 + + +/* ESYNR1 */ +#define ESYNR1_AMEMTYPE_SHIFT 0 +#define ESYNR1_ASHARED_SHIFT 3 +#define ESYNR1_AINNERSHARED_SHIFT 4 +#define ESYNR1_APRIV_SHIFT 5 +#define ESYNR1_APROTNS_SHIFT 6 +#define ESYNR1_AINST_SHIFT 7 +#define ESYNR1_AWRITE_SHIFT 8 +#define ESYNR1_ABURST_SHIFT 10 +#define ESYNR1_ALEN_SHIFT 12 +#define ESYNR1_ASIZE_SHIFT 16 +#define ESYNR1_ALOCK_SHIFT 20 +#define ESYNR1_AOOO_SHIFT 22 +#define ESYNR1_AFULL_SHIFT 24 +#define ESYNR1_AC_SHIFT 30 +#define ESYNR1_DCD_SHIFT 31 + + +/* IDR */ +#define NM2VCBMT_SHIFT 0 +#define HTW_SHIFT 9 +#define HUM_SHIFT 10 +#define TLBSIZE_SHIFT 12 +#define NCB_SHIFT 16 +#define NIRPT_SHIFT 24 + + +/* M2VCBRn */ +#define VMID_SHIFT 0 +#define CBNDX_SHIFT 8 +#define BYPASSD_SHIFT 16 +#define BPRCOSH_SHIFT 17 +#define BPRCISH_SHIFT 18 +#define BPRCNSH_SHIFT 19 +#define BPSHCFG_SHIFT 20 +#define NSCFG_SHIFT 22 +#define BPMTCFG_SHIFT 24 +#define BPMEMTYPE_SHIFT 25 + + +/* REV */ +#define MINOR_SHIFT 0 +#define MAJOR_SHIFT 4 + + +/* TESTBUSCR */ +#define TBE_SHIFT 0 +#define SPDMBE_SHIFT 1 +#define WGSEL_SHIFT 8 +#define TBLSEL_SHIFT 12 +#define TBHSEL_SHIFT 14 +#define SPDM0SEL_SHIFT 16 +#define SPDM1SEL_SHIFT 20 +#define SPDM2SEL_SHIFT 24 +#define SPDM3SEL_SHIFT 28 + + +/* TLBIMID */ +#define TLBIVMID_VMID_SHIFT 0 + + +/* TLBRSW */ +#define TLBRSW_INDEX_SHIFT 0 +#define TLBBFBS_SHIFT 8 + + +/* TLBTR0 */ +#define PR_SHIFT 0 +#define PW_SHIFT 1 +#define UR_SHIFT 2 +#define UW_SHIFT 3 +#define XN_SHIFT 4 +#define NSDESC_SHIFT 6 +#define ISH_SHIFT 7 +#define SH_SHIFT 8 +#define MT_SHIFT 9 +#define DPSIZR_SHIFT 16 +#define DPSIZC_SHIFT 20 + + +/* TLBTR1 */ +#define TLBTR1_VMID_SHIFT 0 +#define TLBTR1_PA_SHIFT 12 + + +/* TLBTR2 */ +#define TLBTR2_ASID_SHIFT 0 +#define TLBTR2_V_SHIFT 8 +#define TLBTR2_NSTID_SHIFT 9 +#define TLBTR2_NV_SHIFT 10 +#define TLBTR2_VA_SHIFT 12 + + +/* Context Register Masks */ +/* ACTLR */ +#define CFERE_MASK 0x01 +#define CFEIE_MASK 0x01 +#define PTSHCFG_MASK 0x03 +#define RCOSH_MASK 0x01 +#define RCISH_MASK 0x01 +#define RCNSH_MASK 0x01 +#define PRIVCFG_MASK 0x03 +#define DNA_MASK 0x01 +#define DNLV2PA_MASK 0x01 +#define TLBMCFG_MASK 0x03 +#define CFCFG_MASK 0x01 +#define TIPCF_MASK 0x01 +#define V2PCFG_MASK 0x03 +#define HUME_MASK 0x01 +#define PTMTCFG_MASK 0x01 +#define PTMEMTYPE_MASK 0x07 + + +/* BFBCR */ +#define BFBDFE_MASK 0x01 +#define BFBSFE_MASK 0x01 +#define SFVS_MASK 0x01 +#define FLVIC_MASK 0x0F +#define SLVIC_MASK 0x0F + + +/* CONTEXTIDR */ +#define CONTEXTIDR_ASID_MASK 0xFF +#define PROCID_MASK 0x00FFFFFF + + +/* FSR */ +#define TF_MASK 0x01 +#define AFF_MASK 0x01 +#define APF_MASK 0x01 +#define TLBMF_MASK 0x01 +#define HTWDEEF_MASK 0x01 +#define HTWSEEF_MASK 0x01 +#define MHF_MASK 0x01 +#define SL_MASK 0x01 +#define SS_MASK 0x01 +#define MULTI_MASK 0x01 + + +/* FSYNR0 */ +#define AMID_MASK 0xFF +#define APID_MASK 0x1F +#define ABID_MASK 0x07 +#define ATID_MASK 0xFF + + +/* FSYNR1 */ +#define AMEMTYPE_MASK 0x07 +#define ASHARED_MASK 0x01 +#define AINNERSHARED_MASK 0x01 +#define APRIV_MASK 0x01 +#define APROTNS_MASK 0x01 +#define AINST_MASK 0x01 +#define AWRITE_MASK 0x01 +#define ABURST_MASK 0x01 +#define ALEN_MASK 0x0F +#define FSYNR1_ASIZE_MASK 0x07 +#define ALOCK_MASK 0x03 +#define AFULL_MASK 0x01 + + +/* NMRR */ +#define ICPC0_MASK 0x03 +#define ICPC1_MASK 0x03 +#define ICPC2_MASK 0x03 +#define ICPC3_MASK 0x03 +#define ICPC4_MASK 0x03 +#define ICPC5_MASK 0x03 +#define ICPC6_MASK 0x03 +#define ICPC7_MASK 0x03 +#define OCPC0_MASK 0x03 +#define OCPC1_MASK 0x03 +#define OCPC2_MASK 0x03 +#define OCPC3_MASK 0x03 +#define OCPC4_MASK 0x03 +#define OCPC5_MASK 0x03 +#define OCPC6_MASK 0x03 +#define OCPC7_MASK 0x03 + + +/* PAR */ +#define FAULT_MASK 0x01 +/* If a fault is present, these are the +same as the fault fields in the FAR */ +#define FAULT_TF_MASK 0x01 +#define FAULT_AFF_MASK 0x01 +#define FAULT_APF_MASK 0x01 +#define FAULT_TLBMF_MASK 0x01 +#define FAULT_HTWDEEF_MASK 0x01 +#define FAULT_HTWSEEF_MASK 0x01 +#define FAULT_MHF_MASK 0x01 +#define FAULT_SL_MASK 0x01 +#define FAULT_SS_MASK 0x01 + +/* If NO fault is present, the following + * fields are in effect + * (FAULT remains as before) */ +#define PAR_NOFAULT_SS_MASK 0x01 +#define PAR_NOFAULT_MT_MASK 0x07 +#define PAR_NOFAULT_SH_MASK 0x01 +#define PAR_NOFAULT_NS_MASK 0x01 +#define PAR_NOFAULT_NOS_MASK 0x01 +#define PAR_NPFAULT_PA_MASK 0x000FFFFF + + +/* PRRR */ +#define MTC0_MASK 0x03 +#define MTC1_MASK 0x03 +#define MTC2_MASK 0x03 +#define MTC3_MASK 0x03 +#define MTC4_MASK 0x03 +#define MTC5_MASK 0x03 +#define MTC6_MASK 0x03 +#define MTC7_MASK 0x03 +#define SHDSH0_MASK 0x01 +#define SHDSH1_MASK 0x01 +#define SHNMSH0_MASK 0x01 +#define SHNMSH1_MASK 0x01 +#define NOS0_MASK 0x01 +#define NOS1_MASK 0x01 +#define NOS2_MASK 0x01 +#define NOS3_MASK 0x01 +#define NOS4_MASK 0x01 +#define NOS5_MASK 0x01 +#define NOS6_MASK 0x01 +#define NOS7_MASK 0x01 + + +/* RESUME */ +#define TNR_MASK 0x01 + + +/* SCTLR */ +#define M_MASK 0x01 +#define TRE_MASK 0x01 +#define AFE_MASK 0x01 +#define HAF_MASK 0x01 +#define BE_MASK 0x01 +#define AFFD_MASK 0x01 + + +/* TLBIASID */ +#define TLBIASID_ASID_MASK 0xFF + + +/* TLBIVA */ +#define TLBIVA_ASID_MASK 0xFF +#define TLBIVA_VA_MASK 0x000FFFFF + + +/* TLBIVAA */ +#define TLBIVAA_VA_MASK 0x000FFFFF + + +/* TLBLCKR */ +#define LKE_MASK 0x01 +#define TLBLCKR_TLBIALLCFG_MASK 0x01 +#define TLBIASIDCFG_MASK 0x01 +#define TLBIVAACFG_MASK 0x01 +#define FLOOR_MASK 0xFF +#define VICTIM_MASK 0xFF + + +/* TTBCR */ +#define N_MASK 0x07 +#define PD0_MASK 0x01 +#define PD1_MASK 0x01 + + +/* TTBR0 */ +#define TTBR0_IRGNH_MASK 0x01 +#define TTBR0_SH_MASK 0x01 +#define TTBR0_ORGN_MASK 0x03 +#define TTBR0_NOS_MASK 0x01 +#define TTBR0_IRGNL_MASK 0x01 +#define TTBR0_PA_MASK 0x0003FFFF + + +/* TTBR1 */ +#define TTBR1_IRGNH_MASK 0x01 +#define TTBR1_SH_MASK 0x01 +#define TTBR1_ORGN_MASK 0x03 +#define TTBR1_NOS_MASK 0x01 +#define TTBR1_IRGNL_MASK 0x01 +#define TTBR1_PA_MASK 0x0003FFFF + + +/* V2PSR */ +#define HIT_MASK 0x01 +#define INDEX_MASK 0xFF + + +/* V2Pxx */ +#define V2Pxx_INDEX_MASK 0xFF +#define V2Pxx_VA_MASK 0x000FFFFF + + +/* Context Register Shifts */ +/* ACTLR */ +#define CFERE_SHIFT 0 +#define CFEIE_SHIFT 1 +#define PTSHCFG_SHIFT 2 +#define RCOSH_SHIFT 4 +#define RCISH_SHIFT 5 +#define RCNSH_SHIFT 6 +#define PRIVCFG_SHIFT 8 +#define DNA_SHIFT 10 +#define DNLV2PA_SHIFT 11 +#define TLBMCFG_SHIFT 12 +#define CFCFG_SHIFT 14 +#define TIPCF_SHIFT 15 +#define V2PCFG_SHIFT 16 +#define HUME_SHIFT 18 +#define PTMTCFG_SHIFT 20 +#define PTMEMTYPE_SHIFT 21 + + +/* BFBCR */ +#define BFBDFE_SHIFT 0 +#define BFBSFE_SHIFT 1 +#define SFVS_SHIFT 2 +#define FLVIC_SHIFT 4 +#define SLVIC_SHIFT 8 + + +/* CONTEXTIDR */ +#define CONTEXTIDR_ASID_SHIFT 0 +#define PROCID_SHIFT 8 + + +/* FSR */ +#define TF_SHIFT 1 +#define AFF_SHIFT 2 +#define APF_SHIFT 3 +#define TLBMF_SHIFT 4 +#define HTWDEEF_SHIFT 5 +#define HTWSEEF_SHIFT 6 +#define MHF_SHIFT 7 +#define SL_SHIFT 16 +#define SS_SHIFT 30 +#define MULTI_SHIFT 31 + + +/* FSYNR0 */ +#define AMID_SHIFT 0 +#define APID_SHIFT 8 +#define ABID_SHIFT 13 +#define ATID_SHIFT 24 + + +/* FSYNR1 */ +#define AMEMTYPE_SHIFT 0 +#define ASHARED_SHIFT 3 +#define AINNERSHARED_SHIFT 4 +#define APRIV_SHIFT 5 +#define APROTNS_SHIFT 6 +#define AINST_SHIFT 7 +#define AWRITE_SHIFT 8 +#define ABURST_SHIFT 10 +#define ALEN_SHIFT 12 +#define FSYNR1_ASIZE_SHIFT 16 +#define ALOCK_SHIFT 20 +#define AFULL_SHIFT 24 + + +/* NMRR */ +#define ICPC0_SHIFT 0 +#define ICPC1_SHIFT 2 +#define ICPC2_SHIFT 4 +#define ICPC3_SHIFT 6 +#define ICPC4_SHIFT 8 +#define ICPC5_SHIFT 10 +#define ICPC6_SHIFT 12 +#define ICPC7_SHIFT 14 +#define OCPC0_SHIFT 16 +#define OCPC1_SHIFT 18 +#define OCPC2_SHIFT 20 +#define OCPC3_SHIFT 22 +#define OCPC4_SHIFT 24 +#define OCPC5_SHIFT 26 +#define OCPC6_SHIFT 28 +#define OCPC7_SHIFT 30 + + +/* PAR */ +#define FAULT_SHIFT 0 +/* If a fault is present, these are the +same as the fault fields in the FAR */ +#define FAULT_TF_SHIFT 1 +#define FAULT_AFF_SHIFT 2 +#define FAULT_APF_SHIFT 3 +#define FAULT_TLBMF_SHIFT 4 +#define FAULT_HTWDEEF_SHIFT 5 +#define FAULT_HTWSEEF_SHIFT 6 +#define FAULT_MHF_SHIFT 7 +#define FAULT_SL_SHIFT 16 +#define FAULT_SS_SHIFT 30 + +/* If NO fault is present, the following + * fields are in effect + * (FAULT remains as before) */ +#define PAR_NOFAULT_SS_SHIFT 1 +#define PAR_NOFAULT_MT_SHIFT 4 +#define PAR_NOFAULT_SH_SHIFT 7 +#define PAR_NOFAULT_NS_SHIFT 9 +#define PAR_NOFAULT_NOS_SHIFT 10 +#define PAR_NPFAULT_PA_SHIFT 12 + + +/* PRRR */ +#define MTC0_SHIFT 0 +#define MTC1_SHIFT 2 +#define MTC2_SHIFT 4 +#define MTC3_SHIFT 6 +#define MTC4_SHIFT 8 +#define MTC5_SHIFT 10 +#define MTC6_SHIFT 12 +#define MTC7_SHIFT 14 +#define SHDSH0_SHIFT 16 +#define SHDSH1_SHIFT 17 +#define SHNMSH0_SHIFT 18 +#define SHNMSH1_SHIFT 19 +#define NOS0_SHIFT 24 +#define NOS1_SHIFT 25 +#define NOS2_SHIFT 26 +#define NOS3_SHIFT 27 +#define NOS4_SHIFT 28 +#define NOS5_SHIFT 29 +#define NOS6_SHIFT 30 +#define NOS7_SHIFT 31 + + +/* RESUME */ +#define TNR_SHIFT 0 + + +/* SCTLR */ +#define M_SHIFT 0 +#define TRE_SHIFT 1 +#define AFE_SHIFT 2 +#define HAF_SHIFT 3 +#define BE_SHIFT 4 +#define AFFD_SHIFT 5 + + +/* TLBIASID */ +#define TLBIASID_ASID_SHIFT 0 + + +/* TLBIVA */ +#define TLBIVA_ASID_SHIFT 0 +#define TLBIVA_VA_SHIFT 12 + + +/* TLBIVAA */ +#define TLBIVAA_VA_SHIFT 12 + + +/* TLBLCKR */ +#define LKE_SHIFT 0 +#define TLBLCKR_TLBIALLCFG_SHIFT 1 +#define TLBIASIDCFG_SHIFT 2 +#define TLBIVAACFG_SHIFT 3 +#define FLOOR_SHIFT 8 +#define VICTIM_SHIFT 8 + + +/* TTBCR */ +#define N_SHIFT 3 +#define PD0_SHIFT 4 +#define PD1_SHIFT 5 + + +/* TTBR0 */ +#define TTBR0_IRGNH_SHIFT 0 +#define TTBR0_SH_SHIFT 1 +#define TTBR0_ORGN_SHIFT 3 +#define TTBR0_NOS_SHIFT 5 +#define TTBR0_IRGNL_SHIFT 6 +#define TTBR0_PA_SHIFT 14 + + +/* TTBR1 */ +#define TTBR1_IRGNH_SHIFT 0 +#define TTBR1_SH_SHIFT 1 +#define TTBR1_ORGN_SHIFT 3 +#define TTBR1_NOS_SHIFT 5 +#define TTBR1_IRGNL_SHIFT 6 +#define TTBR1_PA_SHIFT 14 + + +/* V2PSR */ +#define HIT_SHIFT 0 +#define INDEX_SHIFT 8 + + +/* V2Pxx */ +#define V2Pxx_INDEX_SHIFT 0 +#define V2Pxx_VA_SHIFT 12 + +#endif diff --git a/drivers/iommu/of_iommu.c b/drivers/iommu/of_iommu.c new file mode 100644 index 000000000..43429ab62 --- /dev/null +++ b/drivers/iommu/of_iommu.c @@ -0,0 +1,186 @@ +/* + * OF helpers for IOMMU + * + * Copyright (c) 2012, NVIDIA CORPORATION. All rights reserved. + * + * This program is free software; you can redistribute it and/or modify it + * under the terms and conditions of the GNU General Public License, + * version 2, as published by the Free Software Foundation. + * + * This program is distributed in the hope it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for + * more details. + * + * You should have received a copy of the GNU General Public License along with + * this program; if not, write to the Free Software Foundation, Inc., + * 51 Franklin St - Fifth Floor, Boston, MA 02110-1301 USA. + */ + +#include <linux/export.h> +#include <linux/iommu.h> +#include <linux/limits.h> +#include <linux/of.h> +#include <linux/of_iommu.h> +#include <linux/slab.h> + +static const struct of_device_id __iommu_of_table_sentinel + __used __section(__iommu_of_table_end); + +/** + * of_get_dma_window - Parse *dma-window property and returns 0 if found. + * + * @dn: device node + * @prefix: prefix for property name if any + * @index: index to start to parse + * @busno: Returns busno if supported. Otherwise pass NULL + * @addr: Returns address that DMA starts + * @size: Returns the range that DMA can handle + * + * This supports different formats flexibly. "prefix" can be + * configured if any. "busno" and "index" are optionally + * specified. Set 0(or NULL) if not used. + */ +int of_get_dma_window(struct device_node *dn, const char *prefix, int index, + unsigned long *busno, dma_addr_t *addr, size_t *size) +{ + const __be32 *dma_window, *end; + int bytes, cur_index = 0; + char propname[NAME_MAX], addrname[NAME_MAX], sizename[NAME_MAX]; + + if (!dn || !addr || !size) + return -EINVAL; + + if (!prefix) + prefix = ""; + + snprintf(propname, sizeof(propname), "%sdma-window", prefix); + snprintf(addrname, sizeof(addrname), "%s#dma-address-cells", prefix); + snprintf(sizename, sizeof(sizename), "%s#dma-size-cells", prefix); + + dma_window = of_get_property(dn, propname, &bytes); + if (!dma_window) + return -ENODEV; + end = dma_window + bytes / sizeof(*dma_window); + + while (dma_window < end) { + u32 cells; + const void *prop; + + /* busno is one cell if supported */ + if (busno) + *busno = be32_to_cpup(dma_window++); + + prop = of_get_property(dn, addrname, NULL); + if (!prop) + prop = of_get_property(dn, "#address-cells", NULL); + + cells = prop ? be32_to_cpup(prop) : of_n_addr_cells(dn); + if (!cells) + return -EINVAL; + *addr = of_read_number(dma_window, cells); + dma_window += cells; + + prop = of_get_property(dn, sizename, NULL); + cells = prop ? be32_to_cpup(prop) : of_n_size_cells(dn); + if (!cells) + return -EINVAL; + *size = of_read_number(dma_window, cells); + dma_window += cells; + + if (cur_index++ == index) + break; + } + return 0; +} +EXPORT_SYMBOL_GPL(of_get_dma_window); + +struct of_iommu_node { + struct list_head list; + struct device_node *np; + struct iommu_ops *ops; +}; +static LIST_HEAD(of_iommu_list); +static DEFINE_SPINLOCK(of_iommu_lock); + +void of_iommu_set_ops(struct device_node *np, struct iommu_ops *ops) +{ + struct of_iommu_node *iommu = kzalloc(sizeof(*iommu), GFP_KERNEL); + + if (WARN_ON(!iommu)) + return; + + INIT_LIST_HEAD(&iommu->list); + iommu->np = np; + iommu->ops = ops; + spin_lock(&of_iommu_lock); + list_add_tail(&iommu->list, &of_iommu_list); + spin_unlock(&of_iommu_lock); +} + +struct iommu_ops *of_iommu_get_ops(struct device_node *np) +{ + struct of_iommu_node *node; + struct iommu_ops *ops = NULL; + + spin_lock(&of_iommu_lock); + list_for_each_entry(node, &of_iommu_list, list) + if (node->np == np) { + ops = node->ops; + break; + } + spin_unlock(&of_iommu_lock); + return ops; +} + +struct iommu_ops *of_iommu_configure(struct device *dev, + struct device_node *master_np) +{ + struct of_phandle_args iommu_spec; + struct device_node *np; + struct iommu_ops *ops = NULL; + int idx = 0; + + if (dev_is_pci(dev)) { + dev_err(dev, "IOMMU is currently not supported for PCI\n"); + return NULL; + } + + /* + * We don't currently walk up the tree looking for a parent IOMMU. + * See the `Notes:' section of + * Documentation/devicetree/bindings/iommu/iommu.txt + */ + while (!of_parse_phandle_with_args(master_np, "iommus", + "#iommu-cells", idx, + &iommu_spec)) { + np = iommu_spec.np; + ops = of_iommu_get_ops(np); + + if (!ops || !ops->of_xlate || ops->of_xlate(dev, &iommu_spec)) + goto err_put_node; + + of_node_put(np); + idx++; + } + + return ops; + +err_put_node: + of_node_put(np); + return NULL; +} + +void __init of_iommu_init(void) +{ + struct device_node *np; + const struct of_device_id *match, *matches = &__iommu_of_table; + + for_each_matching_node_and_match(np, matches, &match) { + const of_iommu_init_fn init_fn = match->data; + + if (init_fn(np)) + pr_err("Failed to initialise IOMMU %s\n", + of_node_full_name(np)); + } +} diff --git a/drivers/iommu/omap-iommu-debug.c b/drivers/iommu/omap-iommu-debug.c new file mode 100644 index 000000000..f3d20a203 --- /dev/null +++ b/drivers/iommu/omap-iommu-debug.c @@ -0,0 +1,218 @@ +/* + * omap iommu: debugfs interface + * + * Copyright (C) 2008-2009 Nokia Corporation + * + * Written by Hiroshi DOYU <Hiroshi.DOYU@nokia.com> + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + */ + +#include <linux/err.h> +#include <linux/io.h> +#include <linux/slab.h> +#include <linux/uaccess.h> +#include <linux/debugfs.h> +#include <linux/platform_data/iommu-omap.h> + +#include "omap-iopgtable.h" +#include "omap-iommu.h" + +static DEFINE_MUTEX(iommu_debug_lock); + +static struct dentry *iommu_debug_root; + +static inline bool is_omap_iommu_detached(struct omap_iommu *obj) +{ + return !obj->domain; +} + +static ssize_t debug_read_regs(struct file *file, char __user *userbuf, + size_t count, loff_t *ppos) +{ + struct omap_iommu *obj = file->private_data; + char *p, *buf; + ssize_t bytes; + + if (is_omap_iommu_detached(obj)) + return -EPERM; + + buf = kmalloc(count, GFP_KERNEL); + if (!buf) + return -ENOMEM; + p = buf; + + mutex_lock(&iommu_debug_lock); + + bytes = omap_iommu_dump_ctx(obj, p, count); + bytes = simple_read_from_buffer(userbuf, count, ppos, buf, bytes); + + mutex_unlock(&iommu_debug_lock); + kfree(buf); + + return bytes; +} + +static ssize_t debug_read_tlb(struct file *file, char __user *userbuf, + size_t count, loff_t *ppos) +{ + struct omap_iommu *obj = file->private_data; + char *p, *buf; + ssize_t bytes, rest; + + if (is_omap_iommu_detached(obj)) + return -EPERM; + + buf = kmalloc(count, GFP_KERNEL); + if (!buf) + return -ENOMEM; + p = buf; + + mutex_lock(&iommu_debug_lock); + + p += sprintf(p, "%8s %8s\n", "cam:", "ram:"); + p += sprintf(p, "-----------------------------------------\n"); + rest = count - (p - buf); + p += omap_dump_tlb_entries(obj, p, rest); + + bytes = simple_read_from_buffer(userbuf, count, ppos, buf, p - buf); + + mutex_unlock(&iommu_debug_lock); + kfree(buf); + + return bytes; +} + +static void dump_ioptable(struct seq_file *s) +{ + int i, j; + u32 da; + u32 *iopgd, *iopte; + struct omap_iommu *obj = s->private; + + spin_lock(&obj->page_table_lock); + + iopgd = iopgd_offset(obj, 0); + for (i = 0; i < PTRS_PER_IOPGD; i++, iopgd++) { + if (!*iopgd) + continue; + + if (!(*iopgd & IOPGD_TABLE)) { + da = i << IOPGD_SHIFT; + seq_printf(s, "1: 0x%08x 0x%08x\n", da, *iopgd); + continue; + } + + iopte = iopte_offset(iopgd, 0); + for (j = 0; j < PTRS_PER_IOPTE; j++, iopte++) { + if (!*iopte) + continue; + + da = (i << IOPGD_SHIFT) + (j << IOPTE_SHIFT); + seq_printf(s, "2: 0x%08x 0x%08x\n", da, *iopte); + } + } + + spin_unlock(&obj->page_table_lock); +} + +static int debug_read_pagetable(struct seq_file *s, void *data) +{ + struct omap_iommu *obj = s->private; + + if (is_omap_iommu_detached(obj)) + return -EPERM; + + mutex_lock(&iommu_debug_lock); + + seq_printf(s, "L: %8s %8s\n", "da:", "pte:"); + seq_puts(s, "--------------------------\n"); + dump_ioptable(s); + + mutex_unlock(&iommu_debug_lock); + + return 0; +} + +#define DEBUG_SEQ_FOPS_RO(name) \ + static int debug_open_##name(struct inode *inode, struct file *file) \ + { \ + return single_open(file, debug_read_##name, inode->i_private); \ + } \ + \ + static const struct file_operations debug_##name##_fops = { \ + .open = debug_open_##name, \ + .read = seq_read, \ + .llseek = seq_lseek, \ + .release = single_release, \ + } + +#define DEBUG_FOPS_RO(name) \ + static const struct file_operations debug_##name##_fops = { \ + .open = simple_open, \ + .read = debug_read_##name, \ + .llseek = generic_file_llseek, \ + }; + +DEBUG_FOPS_RO(regs); +DEBUG_FOPS_RO(tlb); +DEBUG_SEQ_FOPS_RO(pagetable); + +#define __DEBUG_ADD_FILE(attr, mode) \ + { \ + struct dentry *dent; \ + dent = debugfs_create_file(#attr, mode, obj->debug_dir, \ + obj, &debug_##attr##_fops); \ + if (!dent) \ + goto err; \ + } + +#define DEBUG_ADD_FILE_RO(name) __DEBUG_ADD_FILE(name, 0400) + +void omap_iommu_debugfs_add(struct omap_iommu *obj) +{ + struct dentry *d; + + if (!iommu_debug_root) + return; + + obj->debug_dir = debugfs_create_dir(obj->name, iommu_debug_root); + if (!obj->debug_dir) + return; + + d = debugfs_create_u8("nr_tlb_entries", 0400, obj->debug_dir, + (u8 *)&obj->nr_tlb_entries); + if (!d) + return; + + DEBUG_ADD_FILE_RO(regs); + DEBUG_ADD_FILE_RO(tlb); + DEBUG_ADD_FILE_RO(pagetable); + + return; + +err: + debugfs_remove_recursive(obj->debug_dir); +} + +void omap_iommu_debugfs_remove(struct omap_iommu *obj) +{ + if (!obj->debug_dir) + return; + + debugfs_remove_recursive(obj->debug_dir); +} + +void __init omap_iommu_debugfs_init(void) +{ + iommu_debug_root = debugfs_create_dir("omap_iommu", NULL); + if (!iommu_debug_root) + pr_err("can't create debugfs dir\n"); +} + +void __exit omap_iommu_debugfs_exit(void) +{ + debugfs_remove(iommu_debug_root); +} diff --git a/drivers/iommu/omap-iommu.c b/drivers/iommu/omap-iommu.c new file mode 100644 index 000000000..a22c33d6a --- /dev/null +++ b/drivers/iommu/omap-iommu.c @@ -0,0 +1,1424 @@ +/* + * omap iommu: tlb and pagetable primitives + * + * Copyright (C) 2008-2010 Nokia Corporation + * + * Written by Hiroshi DOYU <Hiroshi.DOYU@nokia.com>, + * Paul Mundt and Toshihiro Kobayashi + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + */ + +#include <linux/err.h> +#include <linux/module.h> +#include <linux/slab.h> +#include <linux/interrupt.h> +#include <linux/ioport.h> +#include <linux/platform_device.h> +#include <linux/iommu.h> +#include <linux/omap-iommu.h> +#include <linux/mutex.h> +#include <linux/spinlock.h> +#include <linux/io.h> +#include <linux/pm_runtime.h> +#include <linux/of.h> +#include <linux/of_iommu.h> +#include <linux/of_irq.h> +#include <linux/of_platform.h> + +#include <asm/cacheflush.h> + +#include <linux/platform_data/iommu-omap.h> + +#include "omap-iopgtable.h" +#include "omap-iommu.h" + +#define to_iommu(dev) \ + ((struct omap_iommu *)platform_get_drvdata(to_platform_device(dev))) + +#define for_each_iotlb_cr(obj, n, __i, cr) \ + for (__i = 0; \ + (__i < (n)) && (cr = __iotlb_read_cr((obj), __i), true); \ + __i++) + +/* bitmap of the page sizes currently supported */ +#define OMAP_IOMMU_PGSIZES (SZ_4K | SZ_64K | SZ_1M | SZ_16M) + +/** + * struct omap_iommu_domain - omap iommu domain + * @pgtable: the page table + * @iommu_dev: an omap iommu device attached to this domain. only a single + * iommu device can be attached for now. + * @dev: Device using this domain. + * @lock: domain lock, should be taken when attaching/detaching + */ +struct omap_iommu_domain { + u32 *pgtable; + struct omap_iommu *iommu_dev; + struct device *dev; + spinlock_t lock; + struct iommu_domain domain; +}; + +#define MMU_LOCK_BASE_SHIFT 10 +#define MMU_LOCK_BASE_MASK (0x1f << MMU_LOCK_BASE_SHIFT) +#define MMU_LOCK_BASE(x) \ + ((x & MMU_LOCK_BASE_MASK) >> MMU_LOCK_BASE_SHIFT) + +#define MMU_LOCK_VICT_SHIFT 4 +#define MMU_LOCK_VICT_MASK (0x1f << MMU_LOCK_VICT_SHIFT) +#define MMU_LOCK_VICT(x) \ + ((x & MMU_LOCK_VICT_MASK) >> MMU_LOCK_VICT_SHIFT) + +struct iotlb_lock { + short base; + short vict; +}; + +static struct platform_driver omap_iommu_driver; +static struct kmem_cache *iopte_cachep; + +/** + * to_omap_domain - Get struct omap_iommu_domain from generic iommu_domain + * @dom: generic iommu domain handle + **/ +static struct omap_iommu_domain *to_omap_domain(struct iommu_domain *dom) +{ + return container_of(dom, struct omap_iommu_domain, domain); +} + +/** + * omap_iommu_save_ctx - Save registers for pm off-mode support + * @dev: client device + **/ +void omap_iommu_save_ctx(struct device *dev) +{ + struct omap_iommu *obj = dev_to_omap_iommu(dev); + u32 *p = obj->ctx; + int i; + + for (i = 0; i < (MMU_REG_SIZE / sizeof(u32)); i++) { + p[i] = iommu_read_reg(obj, i * sizeof(u32)); + dev_dbg(obj->dev, "%s\t[%02d] %08x\n", __func__, i, p[i]); + } +} +EXPORT_SYMBOL_GPL(omap_iommu_save_ctx); + +/** + * omap_iommu_restore_ctx - Restore registers for pm off-mode support + * @dev: client device + **/ +void omap_iommu_restore_ctx(struct device *dev) +{ + struct omap_iommu *obj = dev_to_omap_iommu(dev); + u32 *p = obj->ctx; + int i; + + for (i = 0; i < (MMU_REG_SIZE / sizeof(u32)); i++) { + iommu_write_reg(obj, p[i], i * sizeof(u32)); + dev_dbg(obj->dev, "%s\t[%02d] %08x\n", __func__, i, p[i]); + } +} +EXPORT_SYMBOL_GPL(omap_iommu_restore_ctx); + +static void __iommu_set_twl(struct omap_iommu *obj, bool on) +{ + u32 l = iommu_read_reg(obj, MMU_CNTL); + + if (on) + iommu_write_reg(obj, MMU_IRQ_TWL_MASK, MMU_IRQENABLE); + else + iommu_write_reg(obj, MMU_IRQ_TLB_MISS_MASK, MMU_IRQENABLE); + + l &= ~MMU_CNTL_MASK; + if (on) + l |= (MMU_CNTL_MMU_EN | MMU_CNTL_TWL_EN); + else + l |= (MMU_CNTL_MMU_EN); + + iommu_write_reg(obj, l, MMU_CNTL); +} + +static int omap2_iommu_enable(struct omap_iommu *obj) +{ + u32 l, pa; + + if (!obj->iopgd || !IS_ALIGNED((u32)obj->iopgd, SZ_16K)) + return -EINVAL; + + pa = virt_to_phys(obj->iopgd); + if (!IS_ALIGNED(pa, SZ_16K)) + return -EINVAL; + + l = iommu_read_reg(obj, MMU_REVISION); + dev_info(obj->dev, "%s: version %d.%d\n", obj->name, + (l >> 4) & 0xf, l & 0xf); + + iommu_write_reg(obj, pa, MMU_TTB); + + if (obj->has_bus_err_back) + iommu_write_reg(obj, MMU_GP_REG_BUS_ERR_BACK_EN, MMU_GP_REG); + + __iommu_set_twl(obj, true); + + return 0; +} + +static void omap2_iommu_disable(struct omap_iommu *obj) +{ + u32 l = iommu_read_reg(obj, MMU_CNTL); + + l &= ~MMU_CNTL_MASK; + iommu_write_reg(obj, l, MMU_CNTL); + + dev_dbg(obj->dev, "%s is shutting down\n", obj->name); +} + +static int iommu_enable(struct omap_iommu *obj) +{ + int err; + struct platform_device *pdev = to_platform_device(obj->dev); + struct iommu_platform_data *pdata = dev_get_platdata(&pdev->dev); + + if (pdata && pdata->deassert_reset) { + err = pdata->deassert_reset(pdev, pdata->reset_name); + if (err) { + dev_err(obj->dev, "deassert_reset failed: %d\n", err); + return err; + } + } + + pm_runtime_get_sync(obj->dev); + + err = omap2_iommu_enable(obj); + + return err; +} + +static void iommu_disable(struct omap_iommu *obj) +{ + struct platform_device *pdev = to_platform_device(obj->dev); + struct iommu_platform_data *pdata = dev_get_platdata(&pdev->dev); + + omap2_iommu_disable(obj); + + pm_runtime_put_sync(obj->dev); + + if (pdata && pdata->assert_reset) + pdata->assert_reset(pdev, pdata->reset_name); +} + +/* + * TLB operations + */ +static inline int iotlb_cr_valid(struct cr_regs *cr) +{ + if (!cr) + return -EINVAL; + + return cr->cam & MMU_CAM_V; +} + +static u32 iotlb_cr_to_virt(struct cr_regs *cr) +{ + u32 page_size = cr->cam & MMU_CAM_PGSZ_MASK; + u32 mask = get_cam_va_mask(cr->cam & page_size); + + return cr->cam & mask; +} + +static u32 get_iopte_attr(struct iotlb_entry *e) +{ + u32 attr; + + attr = e->mixed << 5; + attr |= e->endian; + attr |= e->elsz >> 3; + attr <<= (((e->pgsz == MMU_CAM_PGSZ_4K) || + (e->pgsz == MMU_CAM_PGSZ_64K)) ? 0 : 6); + return attr; +} + +static u32 iommu_report_fault(struct omap_iommu *obj, u32 *da) +{ + u32 status, fault_addr; + + status = iommu_read_reg(obj, MMU_IRQSTATUS); + status &= MMU_IRQ_MASK; + if (!status) { + *da = 0; + return 0; + } + + fault_addr = iommu_read_reg(obj, MMU_FAULT_AD); + *da = fault_addr; + + iommu_write_reg(obj, status, MMU_IRQSTATUS); + + return status; +} + +static void iotlb_lock_get(struct omap_iommu *obj, struct iotlb_lock *l) +{ + u32 val; + + val = iommu_read_reg(obj, MMU_LOCK); + + l->base = MMU_LOCK_BASE(val); + l->vict = MMU_LOCK_VICT(val); + +} + +static void iotlb_lock_set(struct omap_iommu *obj, struct iotlb_lock *l) +{ + u32 val; + + val = (l->base << MMU_LOCK_BASE_SHIFT); + val |= (l->vict << MMU_LOCK_VICT_SHIFT); + + iommu_write_reg(obj, val, MMU_LOCK); +} + +static void iotlb_read_cr(struct omap_iommu *obj, struct cr_regs *cr) +{ + cr->cam = iommu_read_reg(obj, MMU_READ_CAM); + cr->ram = iommu_read_reg(obj, MMU_READ_RAM); +} + +static void iotlb_load_cr(struct omap_iommu *obj, struct cr_regs *cr) +{ + iommu_write_reg(obj, cr->cam | MMU_CAM_V, MMU_CAM); + iommu_write_reg(obj, cr->ram, MMU_RAM); + + iommu_write_reg(obj, 1, MMU_FLUSH_ENTRY); + iommu_write_reg(obj, 1, MMU_LD_TLB); +} + +/* only used in iotlb iteration for-loop */ +static struct cr_regs __iotlb_read_cr(struct omap_iommu *obj, int n) +{ + struct cr_regs cr; + struct iotlb_lock l; + + iotlb_lock_get(obj, &l); + l.vict = n; + iotlb_lock_set(obj, &l); + iotlb_read_cr(obj, &cr); + + return cr; +} + +#ifdef PREFETCH_IOTLB +static struct cr_regs *iotlb_alloc_cr(struct omap_iommu *obj, + struct iotlb_entry *e) +{ + struct cr_regs *cr; + + if (!e) + return NULL; + + if (e->da & ~(get_cam_va_mask(e->pgsz))) { + dev_err(obj->dev, "%s:\twrong alignment: %08x\n", __func__, + e->da); + return ERR_PTR(-EINVAL); + } + + cr = kmalloc(sizeof(*cr), GFP_KERNEL); + if (!cr) + return ERR_PTR(-ENOMEM); + + cr->cam = (e->da & MMU_CAM_VATAG_MASK) | e->prsvd | e->pgsz | e->valid; + cr->ram = e->pa | e->endian | e->elsz | e->mixed; + + return cr; +} + +/** + * load_iotlb_entry - Set an iommu tlb entry + * @obj: target iommu + * @e: an iommu tlb entry info + **/ +static int load_iotlb_entry(struct omap_iommu *obj, struct iotlb_entry *e) +{ + int err = 0; + struct iotlb_lock l; + struct cr_regs *cr; + + if (!obj || !obj->nr_tlb_entries || !e) + return -EINVAL; + + pm_runtime_get_sync(obj->dev); + + iotlb_lock_get(obj, &l); + if (l.base == obj->nr_tlb_entries) { + dev_warn(obj->dev, "%s: preserve entries full\n", __func__); + err = -EBUSY; + goto out; + } + if (!e->prsvd) { + int i; + struct cr_regs tmp; + + for_each_iotlb_cr(obj, obj->nr_tlb_entries, i, tmp) + if (!iotlb_cr_valid(&tmp)) + break; + + if (i == obj->nr_tlb_entries) { + dev_dbg(obj->dev, "%s: full: no entry\n", __func__); + err = -EBUSY; + goto out; + } + + iotlb_lock_get(obj, &l); + } else { + l.vict = l.base; + iotlb_lock_set(obj, &l); + } + + cr = iotlb_alloc_cr(obj, e); + if (IS_ERR(cr)) { + pm_runtime_put_sync(obj->dev); + return PTR_ERR(cr); + } + + iotlb_load_cr(obj, cr); + kfree(cr); + + if (e->prsvd) + l.base++; + /* increment victim for next tlb load */ + if (++l.vict == obj->nr_tlb_entries) + l.vict = l.base; + iotlb_lock_set(obj, &l); +out: + pm_runtime_put_sync(obj->dev); + return err; +} + +#else /* !PREFETCH_IOTLB */ + +static int load_iotlb_entry(struct omap_iommu *obj, struct iotlb_entry *e) +{ + return 0; +} + +#endif /* !PREFETCH_IOTLB */ + +static int prefetch_iotlb_entry(struct omap_iommu *obj, struct iotlb_entry *e) +{ + return load_iotlb_entry(obj, e); +} + +/** + * flush_iotlb_page - Clear an iommu tlb entry + * @obj: target iommu + * @da: iommu device virtual address + * + * Clear an iommu tlb entry which includes 'da' address. + **/ +static void flush_iotlb_page(struct omap_iommu *obj, u32 da) +{ + int i; + struct cr_regs cr; + + pm_runtime_get_sync(obj->dev); + + for_each_iotlb_cr(obj, obj->nr_tlb_entries, i, cr) { + u32 start; + size_t bytes; + + if (!iotlb_cr_valid(&cr)) + continue; + + start = iotlb_cr_to_virt(&cr); + bytes = iopgsz_to_bytes(cr.cam & 3); + + if ((start <= da) && (da < start + bytes)) { + dev_dbg(obj->dev, "%s: %08x<=%08x(%x)\n", + __func__, start, da, bytes); + iotlb_load_cr(obj, &cr); + iommu_write_reg(obj, 1, MMU_FLUSH_ENTRY); + break; + } + } + pm_runtime_put_sync(obj->dev); + + if (i == obj->nr_tlb_entries) + dev_dbg(obj->dev, "%s: no page for %08x\n", __func__, da); +} + +/** + * flush_iotlb_all - Clear all iommu tlb entries + * @obj: target iommu + **/ +static void flush_iotlb_all(struct omap_iommu *obj) +{ + struct iotlb_lock l; + + pm_runtime_get_sync(obj->dev); + + l.base = 0; + l.vict = 0; + iotlb_lock_set(obj, &l); + + iommu_write_reg(obj, 1, MMU_GFLUSH); + + pm_runtime_put_sync(obj->dev); +} + +#ifdef CONFIG_OMAP_IOMMU_DEBUG + +#define pr_reg(name) \ + do { \ + ssize_t bytes; \ + const char *str = "%20s: %08x\n"; \ + const int maxcol = 32; \ + bytes = snprintf(p, maxcol, str, __stringify(name), \ + iommu_read_reg(obj, MMU_##name)); \ + p += bytes; \ + len -= bytes; \ + if (len < maxcol) \ + goto out; \ + } while (0) + +static ssize_t +omap2_iommu_dump_ctx(struct omap_iommu *obj, char *buf, ssize_t len) +{ + char *p = buf; + + pr_reg(REVISION); + pr_reg(IRQSTATUS); + pr_reg(IRQENABLE); + pr_reg(WALKING_ST); + pr_reg(CNTL); + pr_reg(FAULT_AD); + pr_reg(TTB); + pr_reg(LOCK); + pr_reg(LD_TLB); + pr_reg(CAM); + pr_reg(RAM); + pr_reg(GFLUSH); + pr_reg(FLUSH_ENTRY); + pr_reg(READ_CAM); + pr_reg(READ_RAM); + pr_reg(EMU_FAULT_AD); +out: + return p - buf; +} + +ssize_t omap_iommu_dump_ctx(struct omap_iommu *obj, char *buf, ssize_t bytes) +{ + if (!obj || !buf) + return -EINVAL; + + pm_runtime_get_sync(obj->dev); + + bytes = omap2_iommu_dump_ctx(obj, buf, bytes); + + pm_runtime_put_sync(obj->dev); + + return bytes; +} + +static int +__dump_tlb_entries(struct omap_iommu *obj, struct cr_regs *crs, int num) +{ + int i; + struct iotlb_lock saved; + struct cr_regs tmp; + struct cr_regs *p = crs; + + pm_runtime_get_sync(obj->dev); + iotlb_lock_get(obj, &saved); + + for_each_iotlb_cr(obj, num, i, tmp) { + if (!iotlb_cr_valid(&tmp)) + continue; + *p++ = tmp; + } + + iotlb_lock_set(obj, &saved); + pm_runtime_put_sync(obj->dev); + + return p - crs; +} + +/** + * iotlb_dump_cr - Dump an iommu tlb entry into buf + * @obj: target iommu + * @cr: contents of cam and ram register + * @buf: output buffer + **/ +static ssize_t iotlb_dump_cr(struct omap_iommu *obj, struct cr_regs *cr, + char *buf) +{ + char *p = buf; + + /* FIXME: Need more detail analysis of cam/ram */ + p += sprintf(p, "%08x %08x %01x\n", cr->cam, cr->ram, + (cr->cam & MMU_CAM_P) ? 1 : 0); + + return p - buf; +} + +/** + * omap_dump_tlb_entries - dump cr arrays to given buffer + * @obj: target iommu + * @buf: output buffer + **/ +size_t omap_dump_tlb_entries(struct omap_iommu *obj, char *buf, ssize_t bytes) +{ + int i, num; + struct cr_regs *cr; + char *p = buf; + + num = bytes / sizeof(*cr); + num = min(obj->nr_tlb_entries, num); + + cr = kcalloc(num, sizeof(*cr), GFP_KERNEL); + if (!cr) + return 0; + + num = __dump_tlb_entries(obj, cr, num); + for (i = 0; i < num; i++) + p += iotlb_dump_cr(obj, cr + i, p); + kfree(cr); + + return p - buf; +} + +#endif /* CONFIG_OMAP_IOMMU_DEBUG */ + +/* + * H/W pagetable operations + */ +static void flush_iopgd_range(u32 *first, u32 *last) +{ + /* FIXME: L2 cache should be taken care of if it exists */ + do { + asm("mcr p15, 0, %0, c7, c10, 1 @ flush_pgd" + : : "r" (first)); + first += L1_CACHE_BYTES / sizeof(*first); + } while (first <= last); +} + +static void flush_iopte_range(u32 *first, u32 *last) +{ + /* FIXME: L2 cache should be taken care of if it exists */ + do { + asm("mcr p15, 0, %0, c7, c10, 1 @ flush_pte" + : : "r" (first)); + first += L1_CACHE_BYTES / sizeof(*first); + } while (first <= last); +} + +static void iopte_free(u32 *iopte) +{ + /* Note: freed iopte's must be clean ready for re-use */ + if (iopte) + kmem_cache_free(iopte_cachep, iopte); +} + +static u32 *iopte_alloc(struct omap_iommu *obj, u32 *iopgd, u32 da) +{ + u32 *iopte; + + /* a table has already existed */ + if (*iopgd) + goto pte_ready; + + /* + * do the allocation outside the page table lock + */ + spin_unlock(&obj->page_table_lock); + iopte = kmem_cache_zalloc(iopte_cachep, GFP_KERNEL); + spin_lock(&obj->page_table_lock); + + if (!*iopgd) { + if (!iopte) + return ERR_PTR(-ENOMEM); + + *iopgd = virt_to_phys(iopte) | IOPGD_TABLE; + flush_iopgd_range(iopgd, iopgd); + + dev_vdbg(obj->dev, "%s: a new pte:%p\n", __func__, iopte); + } else { + /* We raced, free the reduniovant table */ + iopte_free(iopte); + } + +pte_ready: + iopte = iopte_offset(iopgd, da); + + dev_vdbg(obj->dev, + "%s: da:%08x pgd:%p *pgd:%08x pte:%p *pte:%08x\n", + __func__, da, iopgd, *iopgd, iopte, *iopte); + + return iopte; +} + +static int iopgd_alloc_section(struct omap_iommu *obj, u32 da, u32 pa, u32 prot) +{ + u32 *iopgd = iopgd_offset(obj, da); + + if ((da | pa) & ~IOSECTION_MASK) { + dev_err(obj->dev, "%s: %08x:%08x should aligned on %08lx\n", + __func__, da, pa, IOSECTION_SIZE); + return -EINVAL; + } + + *iopgd = (pa & IOSECTION_MASK) | prot | IOPGD_SECTION; + flush_iopgd_range(iopgd, iopgd); + return 0; +} + +static int iopgd_alloc_super(struct omap_iommu *obj, u32 da, u32 pa, u32 prot) +{ + u32 *iopgd = iopgd_offset(obj, da); + int i; + + if ((da | pa) & ~IOSUPER_MASK) { + dev_err(obj->dev, "%s: %08x:%08x should aligned on %08lx\n", + __func__, da, pa, IOSUPER_SIZE); + return -EINVAL; + } + + for (i = 0; i < 16; i++) + *(iopgd + i) = (pa & IOSUPER_MASK) | prot | IOPGD_SUPER; + flush_iopgd_range(iopgd, iopgd + 15); + return 0; +} + +static int iopte_alloc_page(struct omap_iommu *obj, u32 da, u32 pa, u32 prot) +{ + u32 *iopgd = iopgd_offset(obj, da); + u32 *iopte = iopte_alloc(obj, iopgd, da); + + if (IS_ERR(iopte)) + return PTR_ERR(iopte); + + *iopte = (pa & IOPAGE_MASK) | prot | IOPTE_SMALL; + flush_iopte_range(iopte, iopte); + + dev_vdbg(obj->dev, "%s: da:%08x pa:%08x pte:%p *pte:%08x\n", + __func__, da, pa, iopte, *iopte); + + return 0; +} + +static int iopte_alloc_large(struct omap_iommu *obj, u32 da, u32 pa, u32 prot) +{ + u32 *iopgd = iopgd_offset(obj, da); + u32 *iopte = iopte_alloc(obj, iopgd, da); + int i; + + if ((da | pa) & ~IOLARGE_MASK) { + dev_err(obj->dev, "%s: %08x:%08x should aligned on %08lx\n", + __func__, da, pa, IOLARGE_SIZE); + return -EINVAL; + } + + if (IS_ERR(iopte)) + return PTR_ERR(iopte); + + for (i = 0; i < 16; i++) + *(iopte + i) = (pa & IOLARGE_MASK) | prot | IOPTE_LARGE; + flush_iopte_range(iopte, iopte + 15); + return 0; +} + +static int +iopgtable_store_entry_core(struct omap_iommu *obj, struct iotlb_entry *e) +{ + int (*fn)(struct omap_iommu *, u32, u32, u32); + u32 prot; + int err; + + if (!obj || !e) + return -EINVAL; + + switch (e->pgsz) { + case MMU_CAM_PGSZ_16M: + fn = iopgd_alloc_super; + break; + case MMU_CAM_PGSZ_1M: + fn = iopgd_alloc_section; + break; + case MMU_CAM_PGSZ_64K: + fn = iopte_alloc_large; + break; + case MMU_CAM_PGSZ_4K: + fn = iopte_alloc_page; + break; + default: + fn = NULL; + BUG(); + break; + } + + prot = get_iopte_attr(e); + + spin_lock(&obj->page_table_lock); + err = fn(obj, e->da, e->pa, prot); + spin_unlock(&obj->page_table_lock); + + return err; +} + +/** + * omap_iopgtable_store_entry - Make an iommu pte entry + * @obj: target iommu + * @e: an iommu tlb entry info + **/ +static int +omap_iopgtable_store_entry(struct omap_iommu *obj, struct iotlb_entry *e) +{ + int err; + + flush_iotlb_page(obj, e->da); + err = iopgtable_store_entry_core(obj, e); + if (!err) + prefetch_iotlb_entry(obj, e); + return err; +} + +/** + * iopgtable_lookup_entry - Lookup an iommu pte entry + * @obj: target iommu + * @da: iommu device virtual address + * @ppgd: iommu pgd entry pointer to be returned + * @ppte: iommu pte entry pointer to be returned + **/ +static void +iopgtable_lookup_entry(struct omap_iommu *obj, u32 da, u32 **ppgd, u32 **ppte) +{ + u32 *iopgd, *iopte = NULL; + + iopgd = iopgd_offset(obj, da); + if (!*iopgd) + goto out; + + if (iopgd_is_table(*iopgd)) + iopte = iopte_offset(iopgd, da); +out: + *ppgd = iopgd; + *ppte = iopte; +} + +static size_t iopgtable_clear_entry_core(struct omap_iommu *obj, u32 da) +{ + size_t bytes; + u32 *iopgd = iopgd_offset(obj, da); + int nent = 1; + + if (!*iopgd) + return 0; + + if (iopgd_is_table(*iopgd)) { + int i; + u32 *iopte = iopte_offset(iopgd, da); + + bytes = IOPTE_SIZE; + if (*iopte & IOPTE_LARGE) { + nent *= 16; + /* rewind to the 1st entry */ + iopte = iopte_offset(iopgd, (da & IOLARGE_MASK)); + } + bytes *= nent; + memset(iopte, 0, nent * sizeof(*iopte)); + flush_iopte_range(iopte, iopte + (nent - 1) * sizeof(*iopte)); + + /* + * do table walk to check if this table is necessary or not + */ + iopte = iopte_offset(iopgd, 0); + for (i = 0; i < PTRS_PER_IOPTE; i++) + if (iopte[i]) + goto out; + + iopte_free(iopte); + nent = 1; /* for the next L1 entry */ + } else { + bytes = IOPGD_SIZE; + if ((*iopgd & IOPGD_SUPER) == IOPGD_SUPER) { + nent *= 16; + /* rewind to the 1st entry */ + iopgd = iopgd_offset(obj, (da & IOSUPER_MASK)); + } + bytes *= nent; + } + memset(iopgd, 0, nent * sizeof(*iopgd)); + flush_iopgd_range(iopgd, iopgd + (nent - 1) * sizeof(*iopgd)); +out: + return bytes; +} + +/** + * iopgtable_clear_entry - Remove an iommu pte entry + * @obj: target iommu + * @da: iommu device virtual address + **/ +static size_t iopgtable_clear_entry(struct omap_iommu *obj, u32 da) +{ + size_t bytes; + + spin_lock(&obj->page_table_lock); + + bytes = iopgtable_clear_entry_core(obj, da); + flush_iotlb_page(obj, da); + + spin_unlock(&obj->page_table_lock); + + return bytes; +} + +static void iopgtable_clear_entry_all(struct omap_iommu *obj) +{ + int i; + + spin_lock(&obj->page_table_lock); + + for (i = 0; i < PTRS_PER_IOPGD; i++) { + u32 da; + u32 *iopgd; + + da = i << IOPGD_SHIFT; + iopgd = iopgd_offset(obj, da); + + if (!*iopgd) + continue; + + if (iopgd_is_table(*iopgd)) + iopte_free(iopte_offset(iopgd, 0)); + + *iopgd = 0; + flush_iopgd_range(iopgd, iopgd); + } + + flush_iotlb_all(obj); + + spin_unlock(&obj->page_table_lock); +} + +/* + * Device IOMMU generic operations + */ +static irqreturn_t iommu_fault_handler(int irq, void *data) +{ + u32 da, errs; + u32 *iopgd, *iopte; + struct omap_iommu *obj = data; + struct iommu_domain *domain = obj->domain; + struct omap_iommu_domain *omap_domain = to_omap_domain(domain); + + if (!omap_domain->iommu_dev) + return IRQ_NONE; + + errs = iommu_report_fault(obj, &da); + if (errs == 0) + return IRQ_HANDLED; + + /* Fault callback or TLB/PTE Dynamic loading */ + if (!report_iommu_fault(domain, obj->dev, da, 0)) + return IRQ_HANDLED; + + iommu_disable(obj); + + iopgd = iopgd_offset(obj, da); + + if (!iopgd_is_table(*iopgd)) { + dev_err(obj->dev, "%s: errs:0x%08x da:0x%08x pgd:0x%p *pgd:px%08x\n", + obj->name, errs, da, iopgd, *iopgd); + return IRQ_NONE; + } + + iopte = iopte_offset(iopgd, da); + + dev_err(obj->dev, "%s: errs:0x%08x da:0x%08x pgd:0x%p *pgd:0x%08x pte:0x%p *pte:0x%08x\n", + obj->name, errs, da, iopgd, *iopgd, iopte, *iopte); + + return IRQ_NONE; +} + +static int device_match_by_alias(struct device *dev, void *data) +{ + struct omap_iommu *obj = to_iommu(dev); + const char *name = data; + + pr_debug("%s: %s %s\n", __func__, obj->name, name); + + return strcmp(obj->name, name) == 0; +} + +/** + * omap_iommu_attach() - attach iommu device to an iommu domain + * @name: name of target omap iommu device + * @iopgd: page table + **/ +static struct omap_iommu *omap_iommu_attach(const char *name, u32 *iopgd) +{ + int err; + struct device *dev; + struct omap_iommu *obj; + + dev = driver_find_device(&omap_iommu_driver.driver, NULL, + (void *)name, + device_match_by_alias); + if (!dev) + return ERR_PTR(-ENODEV); + + obj = to_iommu(dev); + + spin_lock(&obj->iommu_lock); + + obj->iopgd = iopgd; + err = iommu_enable(obj); + if (err) + goto err_enable; + flush_iotlb_all(obj); + + spin_unlock(&obj->iommu_lock); + + dev_dbg(obj->dev, "%s: %s\n", __func__, obj->name); + return obj; + +err_enable: + spin_unlock(&obj->iommu_lock); + return ERR_PTR(err); +} + +/** + * omap_iommu_detach - release iommu device + * @obj: target iommu + **/ +static void omap_iommu_detach(struct omap_iommu *obj) +{ + if (!obj || IS_ERR(obj)) + return; + + spin_lock(&obj->iommu_lock); + + iommu_disable(obj); + obj->iopgd = NULL; + + spin_unlock(&obj->iommu_lock); + + dev_dbg(obj->dev, "%s: %s\n", __func__, obj->name); +} + +/* + * OMAP Device MMU(IOMMU) detection + */ +static int omap_iommu_probe(struct platform_device *pdev) +{ + int err = -ENODEV; + int irq; + struct omap_iommu *obj; + struct resource *res; + struct iommu_platform_data *pdata = dev_get_platdata(&pdev->dev); + struct device_node *of = pdev->dev.of_node; + + obj = devm_kzalloc(&pdev->dev, sizeof(*obj) + MMU_REG_SIZE, GFP_KERNEL); + if (!obj) + return -ENOMEM; + + if (of) { + obj->name = dev_name(&pdev->dev); + obj->nr_tlb_entries = 32; + err = of_property_read_u32(of, "ti,#tlb-entries", + &obj->nr_tlb_entries); + if (err && err != -EINVAL) + return err; + if (obj->nr_tlb_entries != 32 && obj->nr_tlb_entries != 8) + return -EINVAL; + if (of_find_property(of, "ti,iommu-bus-err-back", NULL)) + obj->has_bus_err_back = MMU_GP_REG_BUS_ERR_BACK_EN; + } else { + obj->nr_tlb_entries = pdata->nr_tlb_entries; + obj->name = pdata->name; + } + + obj->dev = &pdev->dev; + obj->ctx = (void *)obj + sizeof(*obj); + + spin_lock_init(&obj->iommu_lock); + spin_lock_init(&obj->page_table_lock); + + res = platform_get_resource(pdev, IORESOURCE_MEM, 0); + obj->regbase = devm_ioremap_resource(obj->dev, res); + if (IS_ERR(obj->regbase)) + return PTR_ERR(obj->regbase); + + irq = platform_get_irq(pdev, 0); + if (irq < 0) + return -ENODEV; + + err = devm_request_irq(obj->dev, irq, iommu_fault_handler, IRQF_SHARED, + dev_name(obj->dev), obj); + if (err < 0) + return err; + platform_set_drvdata(pdev, obj); + + pm_runtime_irq_safe(obj->dev); + pm_runtime_enable(obj->dev); + + omap_iommu_debugfs_add(obj); + + dev_info(&pdev->dev, "%s registered\n", obj->name); + return 0; +} + +static int omap_iommu_remove(struct platform_device *pdev) +{ + struct omap_iommu *obj = platform_get_drvdata(pdev); + + iopgtable_clear_entry_all(obj); + omap_iommu_debugfs_remove(obj); + + pm_runtime_disable(obj->dev); + + dev_info(&pdev->dev, "%s removed\n", obj->name); + return 0; +} + +static const struct of_device_id omap_iommu_of_match[] = { + { .compatible = "ti,omap2-iommu" }, + { .compatible = "ti,omap4-iommu" }, + { .compatible = "ti,dra7-iommu" }, + {}, +}; +MODULE_DEVICE_TABLE(of, omap_iommu_of_match); + +static struct platform_driver omap_iommu_driver = { + .probe = omap_iommu_probe, + .remove = omap_iommu_remove, + .driver = { + .name = "omap-iommu", + .of_match_table = of_match_ptr(omap_iommu_of_match), + }, +}; + +static void iopte_cachep_ctor(void *iopte) +{ + clean_dcache_area(iopte, IOPTE_TABLE_SIZE); +} + +static u32 iotlb_init_entry(struct iotlb_entry *e, u32 da, u32 pa, int pgsz) +{ + memset(e, 0, sizeof(*e)); + + e->da = da; + e->pa = pa; + e->valid = MMU_CAM_V; + e->pgsz = pgsz; + e->endian = MMU_RAM_ENDIAN_LITTLE; + e->elsz = MMU_RAM_ELSZ_8; + e->mixed = 0; + + return iopgsz_to_bytes(e->pgsz); +} + +static int omap_iommu_map(struct iommu_domain *domain, unsigned long da, + phys_addr_t pa, size_t bytes, int prot) +{ + struct omap_iommu_domain *omap_domain = to_omap_domain(domain); + struct omap_iommu *oiommu = omap_domain->iommu_dev; + struct device *dev = oiommu->dev; + struct iotlb_entry e; + int omap_pgsz; + u32 ret; + + omap_pgsz = bytes_to_iopgsz(bytes); + if (omap_pgsz < 0) { + dev_err(dev, "invalid size to map: %d\n", bytes); + return -EINVAL; + } + + dev_dbg(dev, "mapping da 0x%lx to pa %pa size 0x%x\n", da, &pa, bytes); + + iotlb_init_entry(&e, da, pa, omap_pgsz); + + ret = omap_iopgtable_store_entry(oiommu, &e); + if (ret) + dev_err(dev, "omap_iopgtable_store_entry failed: %d\n", ret); + + return ret; +} + +static size_t omap_iommu_unmap(struct iommu_domain *domain, unsigned long da, + size_t size) +{ + struct omap_iommu_domain *omap_domain = to_omap_domain(domain); + struct omap_iommu *oiommu = omap_domain->iommu_dev; + struct device *dev = oiommu->dev; + + dev_dbg(dev, "unmapping da 0x%lx size %u\n", da, size); + + return iopgtable_clear_entry(oiommu, da); +} + +static int +omap_iommu_attach_dev(struct iommu_domain *domain, struct device *dev) +{ + struct omap_iommu_domain *omap_domain = to_omap_domain(domain); + struct omap_iommu *oiommu; + struct omap_iommu_arch_data *arch_data = dev->archdata.iommu; + int ret = 0; + + if (!arch_data || !arch_data->name) { + dev_err(dev, "device doesn't have an associated iommu\n"); + return -EINVAL; + } + + spin_lock(&omap_domain->lock); + + /* only a single device is supported per domain for now */ + if (omap_domain->iommu_dev) { + dev_err(dev, "iommu domain is already attached\n"); + ret = -EBUSY; + goto out; + } + + /* get a handle to and enable the omap iommu */ + oiommu = omap_iommu_attach(arch_data->name, omap_domain->pgtable); + if (IS_ERR(oiommu)) { + ret = PTR_ERR(oiommu); + dev_err(dev, "can't get omap iommu: %d\n", ret); + goto out; + } + + omap_domain->iommu_dev = arch_data->iommu_dev = oiommu; + omap_domain->dev = dev; + oiommu->domain = domain; + +out: + spin_unlock(&omap_domain->lock); + return ret; +} + +static void _omap_iommu_detach_dev(struct omap_iommu_domain *omap_domain, + struct device *dev) +{ + struct omap_iommu *oiommu = dev_to_omap_iommu(dev); + struct omap_iommu_arch_data *arch_data = dev->archdata.iommu; + + /* only a single device is supported per domain for now */ + if (omap_domain->iommu_dev != oiommu) { + dev_err(dev, "invalid iommu device\n"); + return; + } + + iopgtable_clear_entry_all(oiommu); + + omap_iommu_detach(oiommu); + + omap_domain->iommu_dev = arch_data->iommu_dev = NULL; + omap_domain->dev = NULL; + oiommu->domain = NULL; +} + +static void omap_iommu_detach_dev(struct iommu_domain *domain, + struct device *dev) +{ + struct omap_iommu_domain *omap_domain = to_omap_domain(domain); + + spin_lock(&omap_domain->lock); + _omap_iommu_detach_dev(omap_domain, dev); + spin_unlock(&omap_domain->lock); +} + +static struct iommu_domain *omap_iommu_domain_alloc(unsigned type) +{ + struct omap_iommu_domain *omap_domain; + + if (type != IOMMU_DOMAIN_UNMANAGED) + return NULL; + + omap_domain = kzalloc(sizeof(*omap_domain), GFP_KERNEL); + if (!omap_domain) { + pr_err("kzalloc failed\n"); + goto out; + } + + omap_domain->pgtable = kzalloc(IOPGD_TABLE_SIZE, GFP_KERNEL); + if (!omap_domain->pgtable) { + pr_err("kzalloc failed\n"); + goto fail_nomem; + } + + /* + * should never fail, but please keep this around to ensure + * we keep the hardware happy + */ + BUG_ON(!IS_ALIGNED((long)omap_domain->pgtable, IOPGD_TABLE_SIZE)); + + clean_dcache_area(omap_domain->pgtable, IOPGD_TABLE_SIZE); + spin_lock_init(&omap_domain->lock); + + omap_domain->domain.geometry.aperture_start = 0; + omap_domain->domain.geometry.aperture_end = (1ULL << 32) - 1; + omap_domain->domain.geometry.force_aperture = true; + + return &omap_domain->domain; + +fail_nomem: + kfree(omap_domain); +out: + return NULL; +} + +static void omap_iommu_domain_free(struct iommu_domain *domain) +{ + struct omap_iommu_domain *omap_domain = to_omap_domain(domain); + + /* + * An iommu device is still attached + * (currently, only one device can be attached) ? + */ + if (omap_domain->iommu_dev) + _omap_iommu_detach_dev(omap_domain, omap_domain->dev); + + kfree(omap_domain->pgtable); + kfree(omap_domain); +} + +static phys_addr_t omap_iommu_iova_to_phys(struct iommu_domain *domain, + dma_addr_t da) +{ + struct omap_iommu_domain *omap_domain = to_omap_domain(domain); + struct omap_iommu *oiommu = omap_domain->iommu_dev; + struct device *dev = oiommu->dev; + u32 *pgd, *pte; + phys_addr_t ret = 0; + + iopgtable_lookup_entry(oiommu, da, &pgd, &pte); + + if (pte) { + if (iopte_is_small(*pte)) + ret = omap_iommu_translate(*pte, da, IOPTE_MASK); + else if (iopte_is_large(*pte)) + ret = omap_iommu_translate(*pte, da, IOLARGE_MASK); + else + dev_err(dev, "bogus pte 0x%x, da 0x%llx", *pte, + (unsigned long long)da); + } else { + if (iopgd_is_section(*pgd)) + ret = omap_iommu_translate(*pgd, da, IOSECTION_MASK); + else if (iopgd_is_super(*pgd)) + ret = omap_iommu_translate(*pgd, da, IOSUPER_MASK); + else + dev_err(dev, "bogus pgd 0x%x, da 0x%llx", *pgd, + (unsigned long long)da); + } + + return ret; +} + +static int omap_iommu_add_device(struct device *dev) +{ + struct omap_iommu_arch_data *arch_data; + struct device_node *np; + struct platform_device *pdev; + + /* + * Allocate the archdata iommu structure for DT-based devices. + * + * TODO: Simplify this when removing non-DT support completely from the + * IOMMU users. + */ + if (!dev->of_node) + return 0; + + np = of_parse_phandle(dev->of_node, "iommus", 0); + if (!np) + return 0; + + pdev = of_find_device_by_node(np); + if (WARN_ON(!pdev)) { + of_node_put(np); + return -EINVAL; + } + + arch_data = kzalloc(sizeof(*arch_data), GFP_KERNEL); + if (!arch_data) { + of_node_put(np); + return -ENOMEM; + } + + arch_data->name = kstrdup(dev_name(&pdev->dev), GFP_KERNEL); + dev->archdata.iommu = arch_data; + + of_node_put(np); + + return 0; +} + +static void omap_iommu_remove_device(struct device *dev) +{ + struct omap_iommu_arch_data *arch_data = dev->archdata.iommu; + + if (!dev->of_node || !arch_data) + return; + + kfree(arch_data->name); + kfree(arch_data); +} + +static const struct iommu_ops omap_iommu_ops = { + .domain_alloc = omap_iommu_domain_alloc, + .domain_free = omap_iommu_domain_free, + .attach_dev = omap_iommu_attach_dev, + .detach_dev = omap_iommu_detach_dev, + .map = omap_iommu_map, + .unmap = omap_iommu_unmap, + .map_sg = default_iommu_map_sg, + .iova_to_phys = omap_iommu_iova_to_phys, + .add_device = omap_iommu_add_device, + .remove_device = omap_iommu_remove_device, + .pgsize_bitmap = OMAP_IOMMU_PGSIZES, +}; + +static int __init omap_iommu_init(void) +{ + struct kmem_cache *p; + const unsigned long flags = SLAB_HWCACHE_ALIGN; + size_t align = 1 << 10; /* L2 pagetable alignement */ + struct device_node *np; + + np = of_find_matching_node(NULL, omap_iommu_of_match); + if (!np) + return 0; + + of_node_put(np); + + p = kmem_cache_create("iopte_cache", IOPTE_TABLE_SIZE, align, flags, + iopte_cachep_ctor); + if (!p) + return -ENOMEM; + iopte_cachep = p; + + bus_set_iommu(&platform_bus_type, &omap_iommu_ops); + + omap_iommu_debugfs_init(); + + return platform_driver_register(&omap_iommu_driver); +} +/* must be ready before omap3isp is probed */ +subsys_initcall(omap_iommu_init); + +static void __exit omap_iommu_exit(void) +{ + kmem_cache_destroy(iopte_cachep); + + platform_driver_unregister(&omap_iommu_driver); + + omap_iommu_debugfs_exit(); +} +module_exit(omap_iommu_exit); + +MODULE_DESCRIPTION("omap iommu: tlb and pagetable primitives"); +MODULE_ALIAS("platform:omap-iommu"); +MODULE_AUTHOR("Hiroshi DOYU, Paul Mundt and Toshihiro Kobayashi"); +MODULE_LICENSE("GPL v2"); diff --git a/drivers/iommu/omap-iommu.h b/drivers/iommu/omap-iommu.h new file mode 100644 index 000000000..d736630df --- /dev/null +++ b/drivers/iommu/omap-iommu.h @@ -0,0 +1,225 @@ +/* + * omap iommu: main structures + * + * Copyright (C) 2008-2009 Nokia Corporation + * + * Written by Hiroshi DOYU <Hiroshi.DOYU@nokia.com> + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + */ + +#ifndef _OMAP_IOMMU_H +#define _OMAP_IOMMU_H + +struct iotlb_entry { + u32 da; + u32 pa; + u32 pgsz, prsvd, valid; + union { + u16 ap; + struct { + u32 endian, elsz, mixed; + }; + }; +}; + +struct omap_iommu { + const char *name; + void __iomem *regbase; + struct device *dev; + struct iommu_domain *domain; + struct dentry *debug_dir; + + spinlock_t iommu_lock; /* global for this whole object */ + + /* + * We don't change iopgd for a situation like pgd for a task, + * but share it globally for each iommu. + */ + u32 *iopgd; + spinlock_t page_table_lock; /* protect iopgd */ + + int nr_tlb_entries; + + void *ctx; /* iommu context: registres saved area */ + + int has_bus_err_back; +}; + +struct cr_regs { + union { + struct { + u16 cam_l; + u16 cam_h; + }; + u32 cam; + }; + union { + struct { + u16 ram_l; + u16 ram_h; + }; + u32 ram; + }; +}; + +/** + * dev_to_omap_iommu() - retrieves an omap iommu object from a user device + * @dev: iommu client device + */ +static inline struct omap_iommu *dev_to_omap_iommu(struct device *dev) +{ + struct omap_iommu_arch_data *arch_data = dev->archdata.iommu; + + return arch_data->iommu_dev; +} + +/* + * MMU Register offsets + */ +#define MMU_REVISION 0x00 +#define MMU_IRQSTATUS 0x18 +#define MMU_IRQENABLE 0x1c +#define MMU_WALKING_ST 0x40 +#define MMU_CNTL 0x44 +#define MMU_FAULT_AD 0x48 +#define MMU_TTB 0x4c +#define MMU_LOCK 0x50 +#define MMU_LD_TLB 0x54 +#define MMU_CAM 0x58 +#define MMU_RAM 0x5c +#define MMU_GFLUSH 0x60 +#define MMU_FLUSH_ENTRY 0x64 +#define MMU_READ_CAM 0x68 +#define MMU_READ_RAM 0x6c +#define MMU_EMU_FAULT_AD 0x70 +#define MMU_GP_REG 0x88 + +#define MMU_REG_SIZE 256 + +/* + * MMU Register bit definitions + */ +/* IRQSTATUS & IRQENABLE */ +#define MMU_IRQ_MULTIHITFAULT (1 << 4) +#define MMU_IRQ_TABLEWALKFAULT (1 << 3) +#define MMU_IRQ_EMUMISS (1 << 2) +#define MMU_IRQ_TRANSLATIONFAULT (1 << 1) +#define MMU_IRQ_TLBMISS (1 << 0) + +#define __MMU_IRQ_FAULT \ + (MMU_IRQ_MULTIHITFAULT | MMU_IRQ_EMUMISS | MMU_IRQ_TRANSLATIONFAULT) +#define MMU_IRQ_MASK \ + (__MMU_IRQ_FAULT | MMU_IRQ_TABLEWALKFAULT | MMU_IRQ_TLBMISS) +#define MMU_IRQ_TWL_MASK (__MMU_IRQ_FAULT | MMU_IRQ_TABLEWALKFAULT) +#define MMU_IRQ_TLB_MISS_MASK (__MMU_IRQ_FAULT | MMU_IRQ_TLBMISS) + +/* MMU_CNTL */ +#define MMU_CNTL_SHIFT 1 +#define MMU_CNTL_MASK (7 << MMU_CNTL_SHIFT) +#define MMU_CNTL_EML_TLB (1 << 3) +#define MMU_CNTL_TWL_EN (1 << 2) +#define MMU_CNTL_MMU_EN (1 << 1) + +/* CAM */ +#define MMU_CAM_VATAG_SHIFT 12 +#define MMU_CAM_VATAG_MASK \ + ((~0UL >> MMU_CAM_VATAG_SHIFT) << MMU_CAM_VATAG_SHIFT) +#define MMU_CAM_P (1 << 3) +#define MMU_CAM_V (1 << 2) +#define MMU_CAM_PGSZ_MASK 3 +#define MMU_CAM_PGSZ_1M (0 << 0) +#define MMU_CAM_PGSZ_64K (1 << 0) +#define MMU_CAM_PGSZ_4K (2 << 0) +#define MMU_CAM_PGSZ_16M (3 << 0) + +/* RAM */ +#define MMU_RAM_PADDR_SHIFT 12 +#define MMU_RAM_PADDR_MASK \ + ((~0UL >> MMU_RAM_PADDR_SHIFT) << MMU_RAM_PADDR_SHIFT) + +#define MMU_RAM_ENDIAN_SHIFT 9 +#define MMU_RAM_ENDIAN_MASK (1 << MMU_RAM_ENDIAN_SHIFT) +#define MMU_RAM_ENDIAN_LITTLE (0 << MMU_RAM_ENDIAN_SHIFT) +#define MMU_RAM_ENDIAN_BIG (1 << MMU_RAM_ENDIAN_SHIFT) + +#define MMU_RAM_ELSZ_SHIFT 7 +#define MMU_RAM_ELSZ_MASK (3 << MMU_RAM_ELSZ_SHIFT) +#define MMU_RAM_ELSZ_8 (0 << MMU_RAM_ELSZ_SHIFT) +#define MMU_RAM_ELSZ_16 (1 << MMU_RAM_ELSZ_SHIFT) +#define MMU_RAM_ELSZ_32 (2 << MMU_RAM_ELSZ_SHIFT) +#define MMU_RAM_ELSZ_NONE (3 << MMU_RAM_ELSZ_SHIFT) +#define MMU_RAM_MIXED_SHIFT 6 +#define MMU_RAM_MIXED_MASK (1 << MMU_RAM_MIXED_SHIFT) +#define MMU_RAM_MIXED MMU_RAM_MIXED_MASK + +#define MMU_GP_REG_BUS_ERR_BACK_EN 0x1 + +#define get_cam_va_mask(pgsz) \ + (((pgsz) == MMU_CAM_PGSZ_16M) ? 0xff000000 : \ + ((pgsz) == MMU_CAM_PGSZ_1M) ? 0xfff00000 : \ + ((pgsz) == MMU_CAM_PGSZ_64K) ? 0xffff0000 : \ + ((pgsz) == MMU_CAM_PGSZ_4K) ? 0xfffff000 : 0) + +/* + * utilities for super page(16MB, 1MB, 64KB and 4KB) + */ + +#define iopgsz_max(bytes) \ + (((bytes) >= SZ_16M) ? SZ_16M : \ + ((bytes) >= SZ_1M) ? SZ_1M : \ + ((bytes) >= SZ_64K) ? SZ_64K : \ + ((bytes) >= SZ_4K) ? SZ_4K : 0) + +#define bytes_to_iopgsz(bytes) \ + (((bytes) == SZ_16M) ? MMU_CAM_PGSZ_16M : \ + ((bytes) == SZ_1M) ? MMU_CAM_PGSZ_1M : \ + ((bytes) == SZ_64K) ? MMU_CAM_PGSZ_64K : \ + ((bytes) == SZ_4K) ? MMU_CAM_PGSZ_4K : -1) + +#define iopgsz_to_bytes(iopgsz) \ + (((iopgsz) == MMU_CAM_PGSZ_16M) ? SZ_16M : \ + ((iopgsz) == MMU_CAM_PGSZ_1M) ? SZ_1M : \ + ((iopgsz) == MMU_CAM_PGSZ_64K) ? SZ_64K : \ + ((iopgsz) == MMU_CAM_PGSZ_4K) ? SZ_4K : 0) + +#define iopgsz_ok(bytes) (bytes_to_iopgsz(bytes) >= 0) + +/* + * global functions + */ +#ifdef CONFIG_OMAP_IOMMU_DEBUG +extern ssize_t +omap_iommu_dump_ctx(struct omap_iommu *obj, char *buf, ssize_t len); +extern size_t +omap_dump_tlb_entries(struct omap_iommu *obj, char *buf, ssize_t len); + +void omap_iommu_debugfs_init(void); +void omap_iommu_debugfs_exit(void); + +void omap_iommu_debugfs_add(struct omap_iommu *obj); +void omap_iommu_debugfs_remove(struct omap_iommu *obj); +#else +static inline void omap_iommu_debugfs_init(void) { } +static inline void omap_iommu_debugfs_exit(void) { } + +static inline void omap_iommu_debugfs_add(struct omap_iommu *obj) { } +static inline void omap_iommu_debugfs_remove(struct omap_iommu *obj) { } +#endif + +/* + * register accessors + */ +static inline u32 iommu_read_reg(struct omap_iommu *obj, size_t offs) +{ + return __raw_readl(obj->regbase + offs); +} + +static inline void iommu_write_reg(struct omap_iommu *obj, u32 val, size_t offs) +{ + __raw_writel(val, obj->regbase + offs); +} + +#endif /* _OMAP_IOMMU_H */ diff --git a/drivers/iommu/omap-iopgtable.h b/drivers/iommu/omap-iopgtable.h new file mode 100644 index 000000000..f891683e3 --- /dev/null +++ b/drivers/iommu/omap-iopgtable.h @@ -0,0 +1,95 @@ +/* + * omap iommu: pagetable definitions + * + * Copyright (C) 2008-2010 Nokia Corporation + * + * Written by Hiroshi DOYU <Hiroshi.DOYU@nokia.com> + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + */ + +/* + * "L2 table" address mask and size definitions. + */ +#define IOPGD_SHIFT 20 +#define IOPGD_SIZE (1UL << IOPGD_SHIFT) +#define IOPGD_MASK (~(IOPGD_SIZE - 1)) + +/* + * "section" address mask and size definitions. + */ +#define IOSECTION_SHIFT 20 +#define IOSECTION_SIZE (1UL << IOSECTION_SHIFT) +#define IOSECTION_MASK (~(IOSECTION_SIZE - 1)) + +/* + * "supersection" address mask and size definitions. + */ +#define IOSUPER_SHIFT 24 +#define IOSUPER_SIZE (1UL << IOSUPER_SHIFT) +#define IOSUPER_MASK (~(IOSUPER_SIZE - 1)) + +#define PTRS_PER_IOPGD (1UL << (32 - IOPGD_SHIFT)) +#define IOPGD_TABLE_SIZE (PTRS_PER_IOPGD * sizeof(u32)) + +/* + * "small page" address mask and size definitions. + */ +#define IOPTE_SHIFT 12 +#define IOPTE_SIZE (1UL << IOPTE_SHIFT) +#define IOPTE_MASK (~(IOPTE_SIZE - 1)) + +/* + * "large page" address mask and size definitions. + */ +#define IOLARGE_SHIFT 16 +#define IOLARGE_SIZE (1UL << IOLARGE_SHIFT) +#define IOLARGE_MASK (~(IOLARGE_SIZE - 1)) + +#define PTRS_PER_IOPTE (1UL << (IOPGD_SHIFT - IOPTE_SHIFT)) +#define IOPTE_TABLE_SIZE (PTRS_PER_IOPTE * sizeof(u32)) + +#define IOPAGE_MASK IOPTE_MASK + +/** + * omap_iommu_translate() - va to pa translation + * @d: omap iommu descriptor + * @va: virtual address + * @mask: omap iommu descriptor mask + * + * va to pa translation + */ +static inline phys_addr_t omap_iommu_translate(u32 d, u32 va, u32 mask) +{ + return (d & mask) | (va & (~mask)); +} + +/* + * some descriptor attributes. + */ +#define IOPGD_TABLE (1 << 0) +#define IOPGD_SECTION (2 << 0) +#define IOPGD_SUPER (1 << 18 | 2 << 0) + +#define iopgd_is_table(x) (((x) & 3) == IOPGD_TABLE) +#define iopgd_is_section(x) (((x) & (1 << 18 | 3)) == IOPGD_SECTION) +#define iopgd_is_super(x) (((x) & (1 << 18 | 3)) == IOPGD_SUPER) + +#define IOPTE_SMALL (2 << 0) +#define IOPTE_LARGE (1 << 0) + +#define iopte_is_small(x) (((x) & 2) == IOPTE_SMALL) +#define iopte_is_large(x) (((x) & 3) == IOPTE_LARGE) + +/* to find an entry in a page-table-directory */ +#define iopgd_index(da) (((da) >> IOPGD_SHIFT) & (PTRS_PER_IOPGD - 1)) +#define iopgd_offset(obj, da) ((obj)->iopgd + iopgd_index(da)) + +#define iopgd_page_paddr(iopgd) (*iopgd & ~((1 << 10) - 1)) +#define iopgd_page_vaddr(iopgd) ((u32 *)phys_to_virt(iopgd_page_paddr(iopgd))) + +/* to find an entry in the second-level page table. */ +#define iopte_index(da) (((da) >> IOPTE_SHIFT) & (PTRS_PER_IOPTE - 1)) +#define iopte_offset(iopgd, da) (iopgd_page_vaddr(iopgd) + iopte_index(da)) diff --git a/drivers/iommu/rockchip-iommu.c b/drivers/iommu/rockchip-iommu.c new file mode 100644 index 000000000..cab214544 --- /dev/null +++ b/drivers/iommu/rockchip-iommu.c @@ -0,0 +1,1050 @@ +/* + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + */ + +#include <asm/cacheflush.h> +#include <asm/pgtable.h> +#include <linux/compiler.h> +#include <linux/delay.h> +#include <linux/device.h> +#include <linux/errno.h> +#include <linux/interrupt.h> +#include <linux/io.h> +#include <linux/iommu.h> +#include <linux/jiffies.h> +#include <linux/list.h> +#include <linux/mm.h> +#include <linux/module.h> +#include <linux/of.h> +#include <linux/of_platform.h> +#include <linux/platform_device.h> +#include <linux/slab.h> +#include <linux/spinlock.h> + +/** MMU register offsets */ +#define RK_MMU_DTE_ADDR 0x00 /* Directory table address */ +#define RK_MMU_STATUS 0x04 +#define RK_MMU_COMMAND 0x08 +#define RK_MMU_PAGE_FAULT_ADDR 0x0C /* IOVA of last page fault */ +#define RK_MMU_ZAP_ONE_LINE 0x10 /* Shootdown one IOTLB entry */ +#define RK_MMU_INT_RAWSTAT 0x14 /* IRQ status ignoring mask */ +#define RK_MMU_INT_CLEAR 0x18 /* Acknowledge and re-arm irq */ +#define RK_MMU_INT_MASK 0x1C /* IRQ enable */ +#define RK_MMU_INT_STATUS 0x20 /* IRQ status after masking */ +#define RK_MMU_AUTO_GATING 0x24 + +#define DTE_ADDR_DUMMY 0xCAFEBABE +#define FORCE_RESET_TIMEOUT 100 /* ms */ + +/* RK_MMU_STATUS fields */ +#define RK_MMU_STATUS_PAGING_ENABLED BIT(0) +#define RK_MMU_STATUS_PAGE_FAULT_ACTIVE BIT(1) +#define RK_MMU_STATUS_STALL_ACTIVE BIT(2) +#define RK_MMU_STATUS_IDLE BIT(3) +#define RK_MMU_STATUS_REPLAY_BUFFER_EMPTY BIT(4) +#define RK_MMU_STATUS_PAGE_FAULT_IS_WRITE BIT(5) +#define RK_MMU_STATUS_STALL_NOT_ACTIVE BIT(31) + +/* RK_MMU_COMMAND command values */ +#define RK_MMU_CMD_ENABLE_PAGING 0 /* Enable memory translation */ +#define RK_MMU_CMD_DISABLE_PAGING 1 /* Disable memory translation */ +#define RK_MMU_CMD_ENABLE_STALL 2 /* Stall paging to allow other cmds */ +#define RK_MMU_CMD_DISABLE_STALL 3 /* Stop stall re-enables paging */ +#define RK_MMU_CMD_ZAP_CACHE 4 /* Shoot down entire IOTLB */ +#define RK_MMU_CMD_PAGE_FAULT_DONE 5 /* Clear page fault */ +#define RK_MMU_CMD_FORCE_RESET 6 /* Reset all registers */ + +/* RK_MMU_INT_* register fields */ +#define RK_MMU_IRQ_PAGE_FAULT 0x01 /* page fault */ +#define RK_MMU_IRQ_BUS_ERROR 0x02 /* bus read error */ +#define RK_MMU_IRQ_MASK (RK_MMU_IRQ_PAGE_FAULT | RK_MMU_IRQ_BUS_ERROR) + +#define NUM_DT_ENTRIES 1024 +#define NUM_PT_ENTRIES 1024 + +#define SPAGE_ORDER 12 +#define SPAGE_SIZE (1 << SPAGE_ORDER) + + /* + * Support mapping any size that fits in one page table: + * 4 KiB to 4 MiB + */ +#define RK_IOMMU_PGSIZE_BITMAP 0x007ff000 + +#define IOMMU_REG_POLL_COUNT_FAST 1000 + +struct rk_iommu_domain { + struct list_head iommus; + u32 *dt; /* page directory table */ + spinlock_t iommus_lock; /* lock for iommus list */ + spinlock_t dt_lock; /* lock for modifying page directory table */ + + struct iommu_domain domain; +}; + +struct rk_iommu { + struct device *dev; + void __iomem *base; + int irq; + struct list_head node; /* entry in rk_iommu_domain.iommus */ + struct iommu_domain *domain; /* domain to which iommu is attached */ +}; + +static inline void rk_table_flush(u32 *va, unsigned int count) +{ + phys_addr_t pa_start = virt_to_phys(va); + phys_addr_t pa_end = virt_to_phys(va + count); + size_t size = pa_end - pa_start; + + __cpuc_flush_dcache_area(va, size); + outer_flush_range(pa_start, pa_end); +} + +static struct rk_iommu_domain *to_rk_domain(struct iommu_domain *dom) +{ + return container_of(dom, struct rk_iommu_domain, domain); +} + +/** + * Inspired by _wait_for in intel_drv.h + * This is NOT safe for use in interrupt context. + * + * Note that it's important that we check the condition again after having + * timed out, since the timeout could be due to preemption or similar and + * we've never had a chance to check the condition before the timeout. + */ +#define rk_wait_for(COND, MS) ({ \ + unsigned long timeout__ = jiffies + msecs_to_jiffies(MS) + 1; \ + int ret__ = 0; \ + while (!(COND)) { \ + if (time_after(jiffies, timeout__)) { \ + ret__ = (COND) ? 0 : -ETIMEDOUT; \ + break; \ + } \ + usleep_range(50, 100); \ + } \ + ret__; \ +}) + +/* + * The Rockchip rk3288 iommu uses a 2-level page table. + * The first level is the "Directory Table" (DT). + * The DT consists of 1024 4-byte Directory Table Entries (DTEs), each pointing + * to a "Page Table". + * The second level is the 1024 Page Tables (PT). + * Each PT consists of 1024 4-byte Page Table Entries (PTEs), each pointing to + * a 4 KB page of physical memory. + * + * The DT and each PT fits in a single 4 KB page (4-bytes * 1024 entries). + * Each iommu device has a MMU_DTE_ADDR register that contains the physical + * address of the start of the DT page. + * + * The structure of the page table is as follows: + * + * DT + * MMU_DTE_ADDR -> +-----+ + * | | + * +-----+ PT + * | DTE | -> +-----+ + * +-----+ | | Memory + * | | +-----+ Page + * | | | PTE | -> +-----+ + * +-----+ +-----+ | | + * | | | | + * | | | | + * +-----+ | | + * | | + * | | + * +-----+ + */ + +/* + * Each DTE has a PT address and a valid bit: + * +---------------------+-----------+-+ + * | PT address | Reserved |V| + * +---------------------+-----------+-+ + * 31:12 - PT address (PTs always starts on a 4 KB boundary) + * 11: 1 - Reserved + * 0 - 1 if PT @ PT address is valid + */ +#define RK_DTE_PT_ADDRESS_MASK 0xfffff000 +#define RK_DTE_PT_VALID BIT(0) + +static inline phys_addr_t rk_dte_pt_address(u32 dte) +{ + return (phys_addr_t)dte & RK_DTE_PT_ADDRESS_MASK; +} + +static inline bool rk_dte_is_pt_valid(u32 dte) +{ + return dte & RK_DTE_PT_VALID; +} + +static u32 rk_mk_dte(u32 *pt) +{ + phys_addr_t pt_phys = virt_to_phys(pt); + return (pt_phys & RK_DTE_PT_ADDRESS_MASK) | RK_DTE_PT_VALID; +} + +/* + * Each PTE has a Page address, some flags and a valid bit: + * +---------------------+---+-------+-+ + * | Page address |Rsv| Flags |V| + * +---------------------+---+-------+-+ + * 31:12 - Page address (Pages always start on a 4 KB boundary) + * 11: 9 - Reserved + * 8: 1 - Flags + * 8 - Read allocate - allocate cache space on read misses + * 7 - Read cache - enable cache & prefetch of data + * 6 - Write buffer - enable delaying writes on their way to memory + * 5 - Write allocate - allocate cache space on write misses + * 4 - Write cache - different writes can be merged together + * 3 - Override cache attributes + * if 1, bits 4-8 control cache attributes + * if 0, the system bus defaults are used + * 2 - Writable + * 1 - Readable + * 0 - 1 if Page @ Page address is valid + */ +#define RK_PTE_PAGE_ADDRESS_MASK 0xfffff000 +#define RK_PTE_PAGE_FLAGS_MASK 0x000001fe +#define RK_PTE_PAGE_WRITABLE BIT(2) +#define RK_PTE_PAGE_READABLE BIT(1) +#define RK_PTE_PAGE_VALID BIT(0) + +static inline phys_addr_t rk_pte_page_address(u32 pte) +{ + return (phys_addr_t)pte & RK_PTE_PAGE_ADDRESS_MASK; +} + +static inline bool rk_pte_is_page_valid(u32 pte) +{ + return pte & RK_PTE_PAGE_VALID; +} + +/* TODO: set cache flags per prot IOMMU_CACHE */ +static u32 rk_mk_pte(phys_addr_t page, int prot) +{ + u32 flags = 0; + flags |= (prot & IOMMU_READ) ? RK_PTE_PAGE_READABLE : 0; + flags |= (prot & IOMMU_WRITE) ? RK_PTE_PAGE_WRITABLE : 0; + page &= RK_PTE_PAGE_ADDRESS_MASK; + return page | flags | RK_PTE_PAGE_VALID; +} + +static u32 rk_mk_pte_invalid(u32 pte) +{ + return pte & ~RK_PTE_PAGE_VALID; +} + +/* + * rk3288 iova (IOMMU Virtual Address) format + * 31 22.21 12.11 0 + * +-----------+-----------+-------------+ + * | DTE index | PTE index | Page offset | + * +-----------+-----------+-------------+ + * 31:22 - DTE index - index of DTE in DT + * 21:12 - PTE index - index of PTE in PT @ DTE.pt_address + * 11: 0 - Page offset - offset into page @ PTE.page_address + */ +#define RK_IOVA_DTE_MASK 0xffc00000 +#define RK_IOVA_DTE_SHIFT 22 +#define RK_IOVA_PTE_MASK 0x003ff000 +#define RK_IOVA_PTE_SHIFT 12 +#define RK_IOVA_PAGE_MASK 0x00000fff +#define RK_IOVA_PAGE_SHIFT 0 + +static u32 rk_iova_dte_index(dma_addr_t iova) +{ + return (u32)(iova & RK_IOVA_DTE_MASK) >> RK_IOVA_DTE_SHIFT; +} + +static u32 rk_iova_pte_index(dma_addr_t iova) +{ + return (u32)(iova & RK_IOVA_PTE_MASK) >> RK_IOVA_PTE_SHIFT; +} + +static u32 rk_iova_page_offset(dma_addr_t iova) +{ + return (u32)(iova & RK_IOVA_PAGE_MASK) >> RK_IOVA_PAGE_SHIFT; +} + +static u32 rk_iommu_read(struct rk_iommu *iommu, u32 offset) +{ + return readl(iommu->base + offset); +} + +static void rk_iommu_write(struct rk_iommu *iommu, u32 offset, u32 value) +{ + writel(value, iommu->base + offset); +} + +static void rk_iommu_command(struct rk_iommu *iommu, u32 command) +{ + writel(command, iommu->base + RK_MMU_COMMAND); +} + +static void rk_iommu_zap_lines(struct rk_iommu *iommu, dma_addr_t iova, + size_t size) +{ + dma_addr_t iova_end = iova + size; + /* + * TODO(djkurtz): Figure out when it is more efficient to shootdown the + * entire iotlb rather than iterate over individual iovas. + */ + for (; iova < iova_end; iova += SPAGE_SIZE) + rk_iommu_write(iommu, RK_MMU_ZAP_ONE_LINE, iova); +} + +static bool rk_iommu_is_stall_active(struct rk_iommu *iommu) +{ + return rk_iommu_read(iommu, RK_MMU_STATUS) & RK_MMU_STATUS_STALL_ACTIVE; +} + +static bool rk_iommu_is_paging_enabled(struct rk_iommu *iommu) +{ + return rk_iommu_read(iommu, RK_MMU_STATUS) & + RK_MMU_STATUS_PAGING_ENABLED; +} + +static int rk_iommu_enable_stall(struct rk_iommu *iommu) +{ + int ret; + + if (rk_iommu_is_stall_active(iommu)) + return 0; + + /* Stall can only be enabled if paging is enabled */ + if (!rk_iommu_is_paging_enabled(iommu)) + return 0; + + rk_iommu_command(iommu, RK_MMU_CMD_ENABLE_STALL); + + ret = rk_wait_for(rk_iommu_is_stall_active(iommu), 1); + if (ret) + dev_err(iommu->dev, "Enable stall request timed out, status: %#08x\n", + rk_iommu_read(iommu, RK_MMU_STATUS)); + + return ret; +} + +static int rk_iommu_disable_stall(struct rk_iommu *iommu) +{ + int ret; + + if (!rk_iommu_is_stall_active(iommu)) + return 0; + + rk_iommu_command(iommu, RK_MMU_CMD_DISABLE_STALL); + + ret = rk_wait_for(!rk_iommu_is_stall_active(iommu), 1); + if (ret) + dev_err(iommu->dev, "Disable stall request timed out, status: %#08x\n", + rk_iommu_read(iommu, RK_MMU_STATUS)); + + return ret; +} + +static int rk_iommu_enable_paging(struct rk_iommu *iommu) +{ + int ret; + + if (rk_iommu_is_paging_enabled(iommu)) + return 0; + + rk_iommu_command(iommu, RK_MMU_CMD_ENABLE_PAGING); + + ret = rk_wait_for(rk_iommu_is_paging_enabled(iommu), 1); + if (ret) + dev_err(iommu->dev, "Enable paging request timed out, status: %#08x\n", + rk_iommu_read(iommu, RK_MMU_STATUS)); + + return ret; +} + +static int rk_iommu_disable_paging(struct rk_iommu *iommu) +{ + int ret; + + if (!rk_iommu_is_paging_enabled(iommu)) + return 0; + + rk_iommu_command(iommu, RK_MMU_CMD_DISABLE_PAGING); + + ret = rk_wait_for(!rk_iommu_is_paging_enabled(iommu), 1); + if (ret) + dev_err(iommu->dev, "Disable paging request timed out, status: %#08x\n", + rk_iommu_read(iommu, RK_MMU_STATUS)); + + return ret; +} + +static int rk_iommu_force_reset(struct rk_iommu *iommu) +{ + int ret; + u32 dte_addr; + + /* + * Check if register DTE_ADDR is working by writing DTE_ADDR_DUMMY + * and verifying that upper 5 nybbles are read back. + */ + rk_iommu_write(iommu, RK_MMU_DTE_ADDR, DTE_ADDR_DUMMY); + + dte_addr = rk_iommu_read(iommu, RK_MMU_DTE_ADDR); + if (dte_addr != (DTE_ADDR_DUMMY & RK_DTE_PT_ADDRESS_MASK)) { + dev_err(iommu->dev, "Error during raw reset. MMU_DTE_ADDR is not functioning\n"); + return -EFAULT; + } + + rk_iommu_command(iommu, RK_MMU_CMD_FORCE_RESET); + + ret = rk_wait_for(rk_iommu_read(iommu, RK_MMU_DTE_ADDR) == 0x00000000, + FORCE_RESET_TIMEOUT); + if (ret) + dev_err(iommu->dev, "FORCE_RESET command timed out\n"); + + return ret; +} + +static void log_iova(struct rk_iommu *iommu, dma_addr_t iova) +{ + u32 dte_index, pte_index, page_offset; + u32 mmu_dte_addr; + phys_addr_t mmu_dte_addr_phys, dte_addr_phys; + u32 *dte_addr; + u32 dte; + phys_addr_t pte_addr_phys = 0; + u32 *pte_addr = NULL; + u32 pte = 0; + phys_addr_t page_addr_phys = 0; + u32 page_flags = 0; + + dte_index = rk_iova_dte_index(iova); + pte_index = rk_iova_pte_index(iova); + page_offset = rk_iova_page_offset(iova); + + mmu_dte_addr = rk_iommu_read(iommu, RK_MMU_DTE_ADDR); + mmu_dte_addr_phys = (phys_addr_t)mmu_dte_addr; + + dte_addr_phys = mmu_dte_addr_phys + (4 * dte_index); + dte_addr = phys_to_virt(dte_addr_phys); + dte = *dte_addr; + + if (!rk_dte_is_pt_valid(dte)) + goto print_it; + + pte_addr_phys = rk_dte_pt_address(dte) + (pte_index * 4); + pte_addr = phys_to_virt(pte_addr_phys); + pte = *pte_addr; + + if (!rk_pte_is_page_valid(pte)) + goto print_it; + + page_addr_phys = rk_pte_page_address(pte) + page_offset; + page_flags = pte & RK_PTE_PAGE_FLAGS_MASK; + +print_it: + dev_err(iommu->dev, "iova = %pad: dte_index: %#03x pte_index: %#03x page_offset: %#03x\n", + &iova, dte_index, pte_index, page_offset); + dev_err(iommu->dev, "mmu_dte_addr: %pa dte@%pa: %#08x valid: %u pte@%pa: %#08x valid: %u page@%pa flags: %#03x\n", + &mmu_dte_addr_phys, &dte_addr_phys, dte, + rk_dte_is_pt_valid(dte), &pte_addr_phys, pte, + rk_pte_is_page_valid(pte), &page_addr_phys, page_flags); +} + +static irqreturn_t rk_iommu_irq(int irq, void *dev_id) +{ + struct rk_iommu *iommu = dev_id; + u32 status; + u32 int_status; + dma_addr_t iova; + + int_status = rk_iommu_read(iommu, RK_MMU_INT_STATUS); + if (int_status == 0) + return IRQ_NONE; + + iova = rk_iommu_read(iommu, RK_MMU_PAGE_FAULT_ADDR); + + if (int_status & RK_MMU_IRQ_PAGE_FAULT) { + int flags; + + status = rk_iommu_read(iommu, RK_MMU_STATUS); + flags = (status & RK_MMU_STATUS_PAGE_FAULT_IS_WRITE) ? + IOMMU_FAULT_WRITE : IOMMU_FAULT_READ; + + dev_err(iommu->dev, "Page fault at %pad of type %s\n", + &iova, + (flags == IOMMU_FAULT_WRITE) ? "write" : "read"); + + log_iova(iommu, iova); + + /* + * Report page fault to any installed handlers. + * Ignore the return code, though, since we always zap cache + * and clear the page fault anyway. + */ + if (iommu->domain) + report_iommu_fault(iommu->domain, iommu->dev, iova, + flags); + else + dev_err(iommu->dev, "Page fault while iommu not attached to domain?\n"); + + rk_iommu_command(iommu, RK_MMU_CMD_ZAP_CACHE); + rk_iommu_command(iommu, RK_MMU_CMD_PAGE_FAULT_DONE); + } + + if (int_status & RK_MMU_IRQ_BUS_ERROR) + dev_err(iommu->dev, "BUS_ERROR occurred at %pad\n", &iova); + + if (int_status & ~RK_MMU_IRQ_MASK) + dev_err(iommu->dev, "unexpected int_status: %#08x\n", + int_status); + + rk_iommu_write(iommu, RK_MMU_INT_CLEAR, int_status); + + return IRQ_HANDLED; +} + +static phys_addr_t rk_iommu_iova_to_phys(struct iommu_domain *domain, + dma_addr_t iova) +{ + struct rk_iommu_domain *rk_domain = to_rk_domain(domain); + unsigned long flags; + phys_addr_t pt_phys, phys = 0; + u32 dte, pte; + u32 *page_table; + + spin_lock_irqsave(&rk_domain->dt_lock, flags); + + dte = rk_domain->dt[rk_iova_dte_index(iova)]; + if (!rk_dte_is_pt_valid(dte)) + goto out; + + pt_phys = rk_dte_pt_address(dte); + page_table = (u32 *)phys_to_virt(pt_phys); + pte = page_table[rk_iova_pte_index(iova)]; + if (!rk_pte_is_page_valid(pte)) + goto out; + + phys = rk_pte_page_address(pte) + rk_iova_page_offset(iova); +out: + spin_unlock_irqrestore(&rk_domain->dt_lock, flags); + + return phys; +} + +static void rk_iommu_zap_iova(struct rk_iommu_domain *rk_domain, + dma_addr_t iova, size_t size) +{ + struct list_head *pos; + unsigned long flags; + + /* shootdown these iova from all iommus using this domain */ + spin_lock_irqsave(&rk_domain->iommus_lock, flags); + list_for_each(pos, &rk_domain->iommus) { + struct rk_iommu *iommu; + iommu = list_entry(pos, struct rk_iommu, node); + rk_iommu_zap_lines(iommu, iova, size); + } + spin_unlock_irqrestore(&rk_domain->iommus_lock, flags); +} + +static u32 *rk_dte_get_page_table(struct rk_iommu_domain *rk_domain, + dma_addr_t iova) +{ + u32 *page_table, *dte_addr; + u32 dte; + phys_addr_t pt_phys; + + assert_spin_locked(&rk_domain->dt_lock); + + dte_addr = &rk_domain->dt[rk_iova_dte_index(iova)]; + dte = *dte_addr; + if (rk_dte_is_pt_valid(dte)) + goto done; + + page_table = (u32 *)get_zeroed_page(GFP_ATOMIC | GFP_DMA32); + if (!page_table) + return ERR_PTR(-ENOMEM); + + dte = rk_mk_dte(page_table); + *dte_addr = dte; + + rk_table_flush(page_table, NUM_PT_ENTRIES); + rk_table_flush(dte_addr, 1); + + /* + * Zap the first iova of newly allocated page table so iommu evicts + * old cached value of new dte from the iotlb. + */ + rk_iommu_zap_iova(rk_domain, iova, SPAGE_SIZE); + +done: + pt_phys = rk_dte_pt_address(dte); + return (u32 *)phys_to_virt(pt_phys); +} + +static size_t rk_iommu_unmap_iova(struct rk_iommu_domain *rk_domain, + u32 *pte_addr, dma_addr_t iova, size_t size) +{ + unsigned int pte_count; + unsigned int pte_total = size / SPAGE_SIZE; + + assert_spin_locked(&rk_domain->dt_lock); + + for (pte_count = 0; pte_count < pte_total; pte_count++) { + u32 pte = pte_addr[pte_count]; + if (!rk_pte_is_page_valid(pte)) + break; + + pte_addr[pte_count] = rk_mk_pte_invalid(pte); + } + + rk_table_flush(pte_addr, pte_count); + + return pte_count * SPAGE_SIZE; +} + +static int rk_iommu_map_iova(struct rk_iommu_domain *rk_domain, u32 *pte_addr, + dma_addr_t iova, phys_addr_t paddr, size_t size, + int prot) +{ + unsigned int pte_count; + unsigned int pte_total = size / SPAGE_SIZE; + phys_addr_t page_phys; + + assert_spin_locked(&rk_domain->dt_lock); + + for (pte_count = 0; pte_count < pte_total; pte_count++) { + u32 pte = pte_addr[pte_count]; + + if (rk_pte_is_page_valid(pte)) + goto unwind; + + pte_addr[pte_count] = rk_mk_pte(paddr, prot); + + paddr += SPAGE_SIZE; + } + + rk_table_flush(pte_addr, pte_count); + + return 0; +unwind: + /* Unmap the range of iovas that we just mapped */ + rk_iommu_unmap_iova(rk_domain, pte_addr, iova, pte_count * SPAGE_SIZE); + + iova += pte_count * SPAGE_SIZE; + page_phys = rk_pte_page_address(pte_addr[pte_count]); + pr_err("iova: %pad already mapped to %pa cannot remap to phys: %pa prot: %#x\n", + &iova, &page_phys, &paddr, prot); + + return -EADDRINUSE; +} + +static int rk_iommu_map(struct iommu_domain *domain, unsigned long _iova, + phys_addr_t paddr, size_t size, int prot) +{ + struct rk_iommu_domain *rk_domain = to_rk_domain(domain); + unsigned long flags; + dma_addr_t iova = (dma_addr_t)_iova; + u32 *page_table, *pte_addr; + int ret; + + spin_lock_irqsave(&rk_domain->dt_lock, flags); + + /* + * pgsize_bitmap specifies iova sizes that fit in one page table + * (1024 4-KiB pages = 4 MiB). + * So, size will always be 4096 <= size <= 4194304. + * Since iommu_map() guarantees that both iova and size will be + * aligned, we will always only be mapping from a single dte here. + */ + page_table = rk_dte_get_page_table(rk_domain, iova); + if (IS_ERR(page_table)) { + spin_unlock_irqrestore(&rk_domain->dt_lock, flags); + return PTR_ERR(page_table); + } + + pte_addr = &page_table[rk_iova_pte_index(iova)]; + ret = rk_iommu_map_iova(rk_domain, pte_addr, iova, paddr, size, prot); + spin_unlock_irqrestore(&rk_domain->dt_lock, flags); + + return ret; +} + +static size_t rk_iommu_unmap(struct iommu_domain *domain, unsigned long _iova, + size_t size) +{ + struct rk_iommu_domain *rk_domain = to_rk_domain(domain); + unsigned long flags; + dma_addr_t iova = (dma_addr_t)_iova; + phys_addr_t pt_phys; + u32 dte; + u32 *pte_addr; + size_t unmap_size; + + spin_lock_irqsave(&rk_domain->dt_lock, flags); + + /* + * pgsize_bitmap specifies iova sizes that fit in one page table + * (1024 4-KiB pages = 4 MiB). + * So, size will always be 4096 <= size <= 4194304. + * Since iommu_unmap() guarantees that both iova and size will be + * aligned, we will always only be unmapping from a single dte here. + */ + dte = rk_domain->dt[rk_iova_dte_index(iova)]; + /* Just return 0 if iova is unmapped */ + if (!rk_dte_is_pt_valid(dte)) { + spin_unlock_irqrestore(&rk_domain->dt_lock, flags); + return 0; + } + + pt_phys = rk_dte_pt_address(dte); + pte_addr = (u32 *)phys_to_virt(pt_phys) + rk_iova_pte_index(iova); + unmap_size = rk_iommu_unmap_iova(rk_domain, pte_addr, iova, size); + + spin_unlock_irqrestore(&rk_domain->dt_lock, flags); + + /* Shootdown iotlb entries for iova range that was just unmapped */ + rk_iommu_zap_iova(rk_domain, iova, unmap_size); + + return unmap_size; +} + +static struct rk_iommu *rk_iommu_from_dev(struct device *dev) +{ + struct iommu_group *group; + struct device *iommu_dev; + struct rk_iommu *rk_iommu; + + group = iommu_group_get(dev); + if (!group) + return NULL; + iommu_dev = iommu_group_get_iommudata(group); + rk_iommu = dev_get_drvdata(iommu_dev); + iommu_group_put(group); + + return rk_iommu; +} + +static int rk_iommu_attach_device(struct iommu_domain *domain, + struct device *dev) +{ + struct rk_iommu *iommu; + struct rk_iommu_domain *rk_domain = to_rk_domain(domain); + unsigned long flags; + int ret; + phys_addr_t dte_addr; + + /* + * Allow 'virtual devices' (e.g., drm) to attach to domain. + * Such a device does not belong to an iommu group. + */ + iommu = rk_iommu_from_dev(dev); + if (!iommu) + return 0; + + ret = rk_iommu_enable_stall(iommu); + if (ret) + return ret; + + ret = rk_iommu_force_reset(iommu); + if (ret) + return ret; + + iommu->domain = domain; + + ret = devm_request_irq(dev, iommu->irq, rk_iommu_irq, + IRQF_SHARED, dev_name(dev), iommu); + if (ret) + return ret; + + dte_addr = virt_to_phys(rk_domain->dt); + rk_iommu_write(iommu, RK_MMU_DTE_ADDR, dte_addr); + rk_iommu_command(iommu, RK_MMU_CMD_ZAP_CACHE); + rk_iommu_write(iommu, RK_MMU_INT_MASK, RK_MMU_IRQ_MASK); + + ret = rk_iommu_enable_paging(iommu); + if (ret) + return ret; + + spin_lock_irqsave(&rk_domain->iommus_lock, flags); + list_add_tail(&iommu->node, &rk_domain->iommus); + spin_unlock_irqrestore(&rk_domain->iommus_lock, flags); + + dev_info(dev, "Attached to iommu domain\n"); + + rk_iommu_disable_stall(iommu); + + return 0; +} + +static void rk_iommu_detach_device(struct iommu_domain *domain, + struct device *dev) +{ + struct rk_iommu *iommu; + struct rk_iommu_domain *rk_domain = to_rk_domain(domain); + unsigned long flags; + + /* Allow 'virtual devices' (eg drm) to detach from domain */ + iommu = rk_iommu_from_dev(dev); + if (!iommu) + return; + + spin_lock_irqsave(&rk_domain->iommus_lock, flags); + list_del_init(&iommu->node); + spin_unlock_irqrestore(&rk_domain->iommus_lock, flags); + + /* Ignore error while disabling, just keep going */ + rk_iommu_enable_stall(iommu); + rk_iommu_disable_paging(iommu); + rk_iommu_write(iommu, RK_MMU_INT_MASK, 0); + rk_iommu_write(iommu, RK_MMU_DTE_ADDR, 0); + rk_iommu_disable_stall(iommu); + + devm_free_irq(dev, iommu->irq, iommu); + + iommu->domain = NULL; + + dev_info(dev, "Detached from iommu domain\n"); +} + +static struct iommu_domain *rk_iommu_domain_alloc(unsigned type) +{ + struct rk_iommu_domain *rk_domain; + + if (type != IOMMU_DOMAIN_UNMANAGED) + return NULL; + + rk_domain = kzalloc(sizeof(*rk_domain), GFP_KERNEL); + if (!rk_domain) + return NULL; + + /* + * rk32xx iommus use a 2 level pagetable. + * Each level1 (dt) and level2 (pt) table has 1024 4-byte entries. + * Allocate one 4 KiB page for each table. + */ + rk_domain->dt = (u32 *)get_zeroed_page(GFP_KERNEL | GFP_DMA32); + if (!rk_domain->dt) + goto err_dt; + + rk_table_flush(rk_domain->dt, NUM_DT_ENTRIES); + + spin_lock_init(&rk_domain->iommus_lock); + spin_lock_init(&rk_domain->dt_lock); + INIT_LIST_HEAD(&rk_domain->iommus); + + return &rk_domain->domain; + +err_dt: + kfree(rk_domain); + return NULL; +} + +static void rk_iommu_domain_free(struct iommu_domain *domain) +{ + struct rk_iommu_domain *rk_domain = to_rk_domain(domain); + int i; + + WARN_ON(!list_empty(&rk_domain->iommus)); + + for (i = 0; i < NUM_DT_ENTRIES; i++) { + u32 dte = rk_domain->dt[i]; + if (rk_dte_is_pt_valid(dte)) { + phys_addr_t pt_phys = rk_dte_pt_address(dte); + u32 *page_table = phys_to_virt(pt_phys); + free_page((unsigned long)page_table); + } + } + + free_page((unsigned long)rk_domain->dt); + kfree(rk_domain); +} + +static bool rk_iommu_is_dev_iommu_master(struct device *dev) +{ + struct device_node *np = dev->of_node; + int ret; + + /* + * An iommu master has an iommus property containing a list of phandles + * to iommu nodes, each with an #iommu-cells property with value 0. + */ + ret = of_count_phandle_with_args(np, "iommus", "#iommu-cells"); + return (ret > 0); +} + +static int rk_iommu_group_set_iommudata(struct iommu_group *group, + struct device *dev) +{ + struct device_node *np = dev->of_node; + struct platform_device *pd; + int ret; + struct of_phandle_args args; + + /* + * An iommu master has an iommus property containing a list of phandles + * to iommu nodes, each with an #iommu-cells property with value 0. + */ + ret = of_parse_phandle_with_args(np, "iommus", "#iommu-cells", 0, + &args); + if (ret) { + dev_err(dev, "of_parse_phandle_with_args(%s) => %d\n", + np->full_name, ret); + return ret; + } + if (args.args_count != 0) { + dev_err(dev, "incorrect number of iommu params found for %s (found %d, expected 0)\n", + args.np->full_name, args.args_count); + return -EINVAL; + } + + pd = of_find_device_by_node(args.np); + of_node_put(args.np); + if (!pd) { + dev_err(dev, "iommu %s not found\n", args.np->full_name); + return -EPROBE_DEFER; + } + + /* TODO(djkurtz): handle multiple slave iommus for a single master */ + iommu_group_set_iommudata(group, &pd->dev, NULL); + + return 0; +} + +static int rk_iommu_add_device(struct device *dev) +{ + struct iommu_group *group; + int ret; + + if (!rk_iommu_is_dev_iommu_master(dev)) + return -ENODEV; + + group = iommu_group_get(dev); + if (!group) { + group = iommu_group_alloc(); + if (IS_ERR(group)) { + dev_err(dev, "Failed to allocate IOMMU group\n"); + return PTR_ERR(group); + } + } + + ret = iommu_group_add_device(group, dev); + if (ret) + goto err_put_group; + + ret = rk_iommu_group_set_iommudata(group, dev); + if (ret) + goto err_remove_device; + + iommu_group_put(group); + + return 0; + +err_remove_device: + iommu_group_remove_device(dev); +err_put_group: + iommu_group_put(group); + return ret; +} + +static void rk_iommu_remove_device(struct device *dev) +{ + if (!rk_iommu_is_dev_iommu_master(dev)) + return; + + iommu_group_remove_device(dev); +} + +static const struct iommu_ops rk_iommu_ops = { + .domain_alloc = rk_iommu_domain_alloc, + .domain_free = rk_iommu_domain_free, + .attach_dev = rk_iommu_attach_device, + .detach_dev = rk_iommu_detach_device, + .map = rk_iommu_map, + .unmap = rk_iommu_unmap, + .add_device = rk_iommu_add_device, + .remove_device = rk_iommu_remove_device, + .iova_to_phys = rk_iommu_iova_to_phys, + .pgsize_bitmap = RK_IOMMU_PGSIZE_BITMAP, +}; + +static int rk_iommu_probe(struct platform_device *pdev) +{ + struct device *dev = &pdev->dev; + struct rk_iommu *iommu; + struct resource *res; + + iommu = devm_kzalloc(dev, sizeof(*iommu), GFP_KERNEL); + if (!iommu) + return -ENOMEM; + + platform_set_drvdata(pdev, iommu); + iommu->dev = dev; + + res = platform_get_resource(pdev, IORESOURCE_MEM, 0); + iommu->base = devm_ioremap_resource(&pdev->dev, res); + if (IS_ERR(iommu->base)) + return PTR_ERR(iommu->base); + + iommu->irq = platform_get_irq(pdev, 0); + if (iommu->irq < 0) { + dev_err(dev, "Failed to get IRQ, %d\n", iommu->irq); + return -ENXIO; + } + + return 0; +} + +static int rk_iommu_remove(struct platform_device *pdev) +{ + return 0; +} + +static const struct of_device_id rk_iommu_dt_ids[] = { + { .compatible = "rockchip,iommu" }, + { /* sentinel */ } +}; +MODULE_DEVICE_TABLE(of, rk_iommu_dt_ids); + +static struct platform_driver rk_iommu_driver = { + .probe = rk_iommu_probe, + .remove = rk_iommu_remove, + .driver = { + .name = "rk_iommu", + .of_match_table = rk_iommu_dt_ids, + }, +}; + +static int __init rk_iommu_init(void) +{ + struct device_node *np; + int ret; + + np = of_find_matching_node(NULL, rk_iommu_dt_ids); + if (!np) + return 0; + + of_node_put(np); + + ret = bus_set_iommu(&platform_bus_type, &rk_iommu_ops); + if (ret) + return ret; + + return platform_driver_register(&rk_iommu_driver); +} +static void __exit rk_iommu_exit(void) +{ + platform_driver_unregister(&rk_iommu_driver); +} + +subsys_initcall(rk_iommu_init); +module_exit(rk_iommu_exit); + +MODULE_DESCRIPTION("IOMMU API for Rockchip"); +MODULE_AUTHOR("Simon Xue <xxm@rock-chips.com> and Daniel Kurtz <djkurtz@chromium.org>"); +MODULE_ALIAS("platform:rockchip-iommu"); +MODULE_LICENSE("GPL v2"); diff --git a/drivers/iommu/shmobile-iommu.c b/drivers/iommu/shmobile-iommu.c new file mode 100644 index 000000000..a0287519a --- /dev/null +++ b/drivers/iommu/shmobile-iommu.c @@ -0,0 +1,402 @@ +/* + * IOMMU for IPMMU/IPMMUI + * Copyright (C) 2012 Hideki EIRAKU + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; version 2 of the License. + */ + +#include <linux/dma-mapping.h> +#include <linux/io.h> +#include <linux/iommu.h> +#include <linux/platform_device.h> +#include <linux/sizes.h> +#include <linux/slab.h> +#include <asm/dma-iommu.h> +#include "shmobile-ipmmu.h" + +#define L1_SIZE CONFIG_SHMOBILE_IOMMU_L1SIZE +#define L1_LEN (L1_SIZE / 4) +#define L1_ALIGN L1_SIZE +#define L2_SIZE SZ_1K +#define L2_LEN (L2_SIZE / 4) +#define L2_ALIGN L2_SIZE + +struct shmobile_iommu_domain_pgtable { + uint32_t *pgtable; + dma_addr_t handle; +}; + +struct shmobile_iommu_archdata { + struct list_head attached_list; + struct dma_iommu_mapping *iommu_mapping; + spinlock_t attach_lock; + struct shmobile_iommu_domain *attached; + int num_attached_devices; + struct shmobile_ipmmu *ipmmu; +}; + +struct shmobile_iommu_domain { + struct shmobile_iommu_domain_pgtable l1, l2[L1_LEN]; + spinlock_t map_lock; + spinlock_t attached_list_lock; + struct list_head attached_list; + struct iommu_domain domain; +}; + +static struct shmobile_iommu_archdata *ipmmu_archdata; +static struct kmem_cache *l1cache, *l2cache; + +static struct shmobile_iommu_domain *to_sh_domain(struct iommu_domain *dom) +{ + return container_of(dom, struct shmobile_iommu_domain, domain); +} + +static int pgtable_alloc(struct shmobile_iommu_domain_pgtable *pgtable, + struct kmem_cache *cache, size_t size) +{ + pgtable->pgtable = kmem_cache_zalloc(cache, GFP_ATOMIC); + if (!pgtable->pgtable) + return -ENOMEM; + pgtable->handle = dma_map_single(NULL, pgtable->pgtable, size, + DMA_TO_DEVICE); + return 0; +} + +static void pgtable_free(struct shmobile_iommu_domain_pgtable *pgtable, + struct kmem_cache *cache, size_t size) +{ + dma_unmap_single(NULL, pgtable->handle, size, DMA_TO_DEVICE); + kmem_cache_free(cache, pgtable->pgtable); +} + +static uint32_t pgtable_read(struct shmobile_iommu_domain_pgtable *pgtable, + unsigned int index) +{ + return pgtable->pgtable[index]; +} + +static void pgtable_write(struct shmobile_iommu_domain_pgtable *pgtable, + unsigned int index, unsigned int count, uint32_t val) +{ + unsigned int i; + + for (i = 0; i < count; i++) + pgtable->pgtable[index + i] = val; + dma_sync_single_for_device(NULL, pgtable->handle + index * sizeof(val), + sizeof(val) * count, DMA_TO_DEVICE); +} + +static struct iommu_domain *shmobile_iommu_domain_alloc(unsigned type) +{ + struct shmobile_iommu_domain *sh_domain; + int i, ret; + + if (type != IOMMU_DOMAIN_UNMANAGED) + return NULL; + + sh_domain = kzalloc(sizeof(*sh_domain), GFP_KERNEL); + if (!sh_domain) + return NULL; + ret = pgtable_alloc(&sh_domain->l1, l1cache, L1_SIZE); + if (ret < 0) { + kfree(sh_domain); + return NULL; + } + for (i = 0; i < L1_LEN; i++) + sh_domain->l2[i].pgtable = NULL; + spin_lock_init(&sh_domain->map_lock); + spin_lock_init(&sh_domain->attached_list_lock); + INIT_LIST_HEAD(&sh_domain->attached_list); + return &sh_domain->domain; +} + +static void shmobile_iommu_domain_free(struct iommu_domain *domain) +{ + struct shmobile_iommu_domain *sh_domain = to_sh_domain(domain); + int i; + + for (i = 0; i < L1_LEN; i++) { + if (sh_domain->l2[i].pgtable) + pgtable_free(&sh_domain->l2[i], l2cache, L2_SIZE); + } + pgtable_free(&sh_domain->l1, l1cache, L1_SIZE); + kfree(sh_domain); +} + +static int shmobile_iommu_attach_device(struct iommu_domain *domain, + struct device *dev) +{ + struct shmobile_iommu_archdata *archdata = dev->archdata.iommu; + struct shmobile_iommu_domain *sh_domain = to_sh_domain(domain); + int ret = -EBUSY; + + if (!archdata) + return -ENODEV; + spin_lock(&sh_domain->attached_list_lock); + spin_lock(&archdata->attach_lock); + if (archdata->attached != sh_domain) { + if (archdata->attached) + goto err; + ipmmu_tlb_set(archdata->ipmmu, sh_domain->l1.handle, L1_SIZE, + 0); + ipmmu_tlb_flush(archdata->ipmmu); + archdata->attached = sh_domain; + archdata->num_attached_devices = 0; + list_add(&archdata->attached_list, &sh_domain->attached_list); + } + archdata->num_attached_devices++; + ret = 0; +err: + spin_unlock(&archdata->attach_lock); + spin_unlock(&sh_domain->attached_list_lock); + return ret; +} + +static void shmobile_iommu_detach_device(struct iommu_domain *domain, + struct device *dev) +{ + struct shmobile_iommu_archdata *archdata = dev->archdata.iommu; + struct shmobile_iommu_domain *sh_domain = to_sh_domain(domain); + + if (!archdata) + return; + spin_lock(&sh_domain->attached_list_lock); + spin_lock(&archdata->attach_lock); + archdata->num_attached_devices--; + if (!archdata->num_attached_devices) { + ipmmu_tlb_set(archdata->ipmmu, 0, 0, 0); + ipmmu_tlb_flush(archdata->ipmmu); + archdata->attached = NULL; + list_del(&archdata->attached_list); + } + spin_unlock(&archdata->attach_lock); + spin_unlock(&sh_domain->attached_list_lock); +} + +static void domain_tlb_flush(struct shmobile_iommu_domain *sh_domain) +{ + struct shmobile_iommu_archdata *archdata; + + spin_lock(&sh_domain->attached_list_lock); + list_for_each_entry(archdata, &sh_domain->attached_list, attached_list) + ipmmu_tlb_flush(archdata->ipmmu); + spin_unlock(&sh_domain->attached_list_lock); +} + +static int l2alloc(struct shmobile_iommu_domain *sh_domain, + unsigned int l1index) +{ + int ret; + + if (!sh_domain->l2[l1index].pgtable) { + ret = pgtable_alloc(&sh_domain->l2[l1index], l2cache, L2_SIZE); + if (ret < 0) + return ret; + } + pgtable_write(&sh_domain->l1, l1index, 1, + sh_domain->l2[l1index].handle | 0x1); + return 0; +} + +static void l2realfree(struct shmobile_iommu_domain_pgtable *l2) +{ + if (l2->pgtable) + pgtable_free(l2, l2cache, L2_SIZE); +} + +static void l2free(struct shmobile_iommu_domain *sh_domain, + unsigned int l1index, + struct shmobile_iommu_domain_pgtable *l2) +{ + pgtable_write(&sh_domain->l1, l1index, 1, 0); + if (sh_domain->l2[l1index].pgtable) { + *l2 = sh_domain->l2[l1index]; + sh_domain->l2[l1index].pgtable = NULL; + } +} + +static int shmobile_iommu_map(struct iommu_domain *domain, unsigned long iova, + phys_addr_t paddr, size_t size, int prot) +{ + struct shmobile_iommu_domain_pgtable l2 = { .pgtable = NULL }; + struct shmobile_iommu_domain *sh_domain = to_sh_domain(domain); + unsigned int l1index, l2index; + int ret; + + l1index = iova >> 20; + switch (size) { + case SZ_4K: + l2index = (iova >> 12) & 0xff; + spin_lock(&sh_domain->map_lock); + ret = l2alloc(sh_domain, l1index); + if (!ret) + pgtable_write(&sh_domain->l2[l1index], l2index, 1, + paddr | 0xff2); + spin_unlock(&sh_domain->map_lock); + break; + case SZ_64K: + l2index = (iova >> 12) & 0xf0; + spin_lock(&sh_domain->map_lock); + ret = l2alloc(sh_domain, l1index); + if (!ret) + pgtable_write(&sh_domain->l2[l1index], l2index, 0x10, + paddr | 0xff1); + spin_unlock(&sh_domain->map_lock); + break; + case SZ_1M: + spin_lock(&sh_domain->map_lock); + l2free(sh_domain, l1index, &l2); + pgtable_write(&sh_domain->l1, l1index, 1, paddr | 0xc02); + spin_unlock(&sh_domain->map_lock); + ret = 0; + break; + default: + ret = -EINVAL; + } + if (!ret) + domain_tlb_flush(sh_domain); + l2realfree(&l2); + return ret; +} + +static size_t shmobile_iommu_unmap(struct iommu_domain *domain, + unsigned long iova, size_t size) +{ + struct shmobile_iommu_domain_pgtable l2 = { .pgtable = NULL }; + struct shmobile_iommu_domain *sh_domain = to_sh_domain(domain); + unsigned int l1index, l2index; + uint32_t l2entry = 0; + size_t ret = 0; + + l1index = iova >> 20; + if (!(iova & 0xfffff) && size >= SZ_1M) { + spin_lock(&sh_domain->map_lock); + l2free(sh_domain, l1index, &l2); + spin_unlock(&sh_domain->map_lock); + ret = SZ_1M; + goto done; + } + l2index = (iova >> 12) & 0xff; + spin_lock(&sh_domain->map_lock); + if (sh_domain->l2[l1index].pgtable) + l2entry = pgtable_read(&sh_domain->l2[l1index], l2index); + switch (l2entry & 3) { + case 1: + if (l2index & 0xf) + break; + pgtable_write(&sh_domain->l2[l1index], l2index, 0x10, 0); + ret = SZ_64K; + break; + case 2: + pgtable_write(&sh_domain->l2[l1index], l2index, 1, 0); + ret = SZ_4K; + break; + } + spin_unlock(&sh_domain->map_lock); +done: + if (ret) + domain_tlb_flush(sh_domain); + l2realfree(&l2); + return ret; +} + +static phys_addr_t shmobile_iommu_iova_to_phys(struct iommu_domain *domain, + dma_addr_t iova) +{ + struct shmobile_iommu_domain *sh_domain = to_sh_domain(domain); + uint32_t l1entry = 0, l2entry = 0; + unsigned int l1index, l2index; + + l1index = iova >> 20; + l2index = (iova >> 12) & 0xff; + spin_lock(&sh_domain->map_lock); + if (sh_domain->l2[l1index].pgtable) + l2entry = pgtable_read(&sh_domain->l2[l1index], l2index); + else + l1entry = pgtable_read(&sh_domain->l1, l1index); + spin_unlock(&sh_domain->map_lock); + switch (l2entry & 3) { + case 1: + return (l2entry & ~0xffff) | (iova & 0xffff); + case 2: + return (l2entry & ~0xfff) | (iova & 0xfff); + default: + if ((l1entry & 3) == 2) + return (l1entry & ~0xfffff) | (iova & 0xfffff); + return 0; + } +} + +static int find_dev_name(struct shmobile_ipmmu *ipmmu, const char *dev_name) +{ + unsigned int i, n = ipmmu->num_dev_names; + + for (i = 0; i < n; i++) { + if (strcmp(ipmmu->dev_names[i], dev_name) == 0) + return 1; + } + return 0; +} + +static int shmobile_iommu_add_device(struct device *dev) +{ + struct shmobile_iommu_archdata *archdata = ipmmu_archdata; + struct dma_iommu_mapping *mapping; + + if (!find_dev_name(archdata->ipmmu, dev_name(dev))) + return 0; + mapping = archdata->iommu_mapping; + if (!mapping) { + mapping = arm_iommu_create_mapping(&platform_bus_type, 0, + L1_LEN << 20); + if (IS_ERR(mapping)) + return PTR_ERR(mapping); + archdata->iommu_mapping = mapping; + } + dev->archdata.iommu = archdata; + if (arm_iommu_attach_device(dev, mapping)) + pr_err("arm_iommu_attach_device failed\n"); + return 0; +} + +static const struct iommu_ops shmobile_iommu_ops = { + .domain_alloc = shmobile_iommu_domain_alloc, + .domain_free = shmobile_iommu_domain_free, + .attach_dev = shmobile_iommu_attach_device, + .detach_dev = shmobile_iommu_detach_device, + .map = shmobile_iommu_map, + .unmap = shmobile_iommu_unmap, + .map_sg = default_iommu_map_sg, + .iova_to_phys = shmobile_iommu_iova_to_phys, + .add_device = shmobile_iommu_add_device, + .pgsize_bitmap = SZ_1M | SZ_64K | SZ_4K, +}; + +int ipmmu_iommu_init(struct shmobile_ipmmu *ipmmu) +{ + static struct shmobile_iommu_archdata *archdata; + + l1cache = kmem_cache_create("shmobile-iommu-pgtable1", L1_SIZE, + L1_ALIGN, SLAB_HWCACHE_ALIGN, NULL); + if (!l1cache) + return -ENOMEM; + l2cache = kmem_cache_create("shmobile-iommu-pgtable2", L2_SIZE, + L2_ALIGN, SLAB_HWCACHE_ALIGN, NULL); + if (!l2cache) { + kmem_cache_destroy(l1cache); + return -ENOMEM; + } + archdata = kzalloc(sizeof(*archdata), GFP_KERNEL); + if (!archdata) { + kmem_cache_destroy(l1cache); + kmem_cache_destroy(l2cache); + return -ENOMEM; + } + spin_lock_init(&archdata->attach_lock); + archdata->ipmmu = ipmmu; + ipmmu_archdata = archdata; + bus_set_iommu(&platform_bus_type, &shmobile_iommu_ops); + return 0; +} diff --git a/drivers/iommu/shmobile-ipmmu.c b/drivers/iommu/shmobile-ipmmu.c new file mode 100644 index 000000000..951651a97 --- /dev/null +++ b/drivers/iommu/shmobile-ipmmu.c @@ -0,0 +1,129 @@ +/* + * IPMMU/IPMMUI + * Copyright (C) 2012 Hideki EIRAKU + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; version 2 of the License. + */ + +#include <linux/err.h> +#include <linux/export.h> +#include <linux/io.h> +#include <linux/platform_device.h> +#include <linux/slab.h> +#include <linux/platform_data/sh_ipmmu.h> +#include "shmobile-ipmmu.h" + +#define IMCTR1 0x000 +#define IMCTR2 0x004 +#define IMASID 0x010 +#define IMTTBR 0x014 +#define IMTTBCR 0x018 + +#define IMCTR1_TLBEN (1 << 0) +#define IMCTR1_FLUSH (1 << 1) + +static void ipmmu_reg_write(struct shmobile_ipmmu *ipmmu, unsigned long reg_off, + unsigned long data) +{ + iowrite32(data, ipmmu->ipmmu_base + reg_off); +} + +void ipmmu_tlb_flush(struct shmobile_ipmmu *ipmmu) +{ + if (!ipmmu) + return; + + spin_lock(&ipmmu->flush_lock); + if (ipmmu->tlb_enabled) + ipmmu_reg_write(ipmmu, IMCTR1, IMCTR1_FLUSH | IMCTR1_TLBEN); + else + ipmmu_reg_write(ipmmu, IMCTR1, IMCTR1_FLUSH); + spin_unlock(&ipmmu->flush_lock); +} + +void ipmmu_tlb_set(struct shmobile_ipmmu *ipmmu, unsigned long phys, int size, + int asid) +{ + if (!ipmmu) + return; + + spin_lock(&ipmmu->flush_lock); + switch (size) { + default: + ipmmu->tlb_enabled = 0; + break; + case 0x2000: + ipmmu_reg_write(ipmmu, IMTTBCR, 1); + ipmmu->tlb_enabled = 1; + break; + case 0x1000: + ipmmu_reg_write(ipmmu, IMTTBCR, 2); + ipmmu->tlb_enabled = 1; + break; + case 0x800: + ipmmu_reg_write(ipmmu, IMTTBCR, 3); + ipmmu->tlb_enabled = 1; + break; + case 0x400: + ipmmu_reg_write(ipmmu, IMTTBCR, 4); + ipmmu->tlb_enabled = 1; + break; + case 0x200: + ipmmu_reg_write(ipmmu, IMTTBCR, 5); + ipmmu->tlb_enabled = 1; + break; + case 0x100: + ipmmu_reg_write(ipmmu, IMTTBCR, 6); + ipmmu->tlb_enabled = 1; + break; + case 0x80: + ipmmu_reg_write(ipmmu, IMTTBCR, 7); + ipmmu->tlb_enabled = 1; + break; + } + ipmmu_reg_write(ipmmu, IMTTBR, phys); + ipmmu_reg_write(ipmmu, IMASID, asid); + spin_unlock(&ipmmu->flush_lock); +} + +static int ipmmu_probe(struct platform_device *pdev) +{ + struct shmobile_ipmmu *ipmmu; + struct resource *res; + struct shmobile_ipmmu_platform_data *pdata = pdev->dev.platform_data; + + ipmmu = devm_kzalloc(&pdev->dev, sizeof(*ipmmu), GFP_KERNEL); + if (!ipmmu) { + dev_err(&pdev->dev, "cannot allocate device data\n"); + return -ENOMEM; + } + spin_lock_init(&ipmmu->flush_lock); + ipmmu->dev = &pdev->dev; + + res = platform_get_resource(pdev, IORESOURCE_MEM, 0); + ipmmu->ipmmu_base = devm_ioremap_resource(&pdev->dev, res); + if (IS_ERR(ipmmu->ipmmu_base)) + return PTR_ERR(ipmmu->ipmmu_base); + + ipmmu->dev_names = pdata->dev_names; + ipmmu->num_dev_names = pdata->num_dev_names; + platform_set_drvdata(pdev, ipmmu); + ipmmu_reg_write(ipmmu, IMCTR1, 0x0); /* disable TLB */ + ipmmu_reg_write(ipmmu, IMCTR2, 0x0); /* disable PMB */ + return ipmmu_iommu_init(ipmmu); +} + +static struct platform_driver ipmmu_driver = { + .probe = ipmmu_probe, + .driver = { + .name = "ipmmu", + }, +}; + +static int __init ipmmu_init(void) +{ + return platform_driver_register(&ipmmu_driver); +} +subsys_initcall(ipmmu_init); diff --git a/drivers/iommu/shmobile-ipmmu.h b/drivers/iommu/shmobile-ipmmu.h new file mode 100644 index 000000000..9524743ca --- /dev/null +++ b/drivers/iommu/shmobile-ipmmu.h @@ -0,0 +1,34 @@ +/* shmobile-ipmmu.h + * + * Copyright (C) 2012 Hideki EIRAKU + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; version 2 of the License. + */ + +#ifndef __SHMOBILE_IPMMU_H__ +#define __SHMOBILE_IPMMU_H__ + +struct shmobile_ipmmu { + struct device *dev; + void __iomem *ipmmu_base; + int tlb_enabled; + spinlock_t flush_lock; + const char * const *dev_names; + unsigned int num_dev_names; +}; + +#ifdef CONFIG_SHMOBILE_IPMMU_TLB +void ipmmu_tlb_flush(struct shmobile_ipmmu *ipmmu); +void ipmmu_tlb_set(struct shmobile_ipmmu *ipmmu, unsigned long phys, int size, + int asid); +int ipmmu_iommu_init(struct shmobile_ipmmu *ipmmu); +#else +static inline int ipmmu_iommu_init(struct shmobile_ipmmu *ipmmu) +{ + return -EINVAL; +} +#endif + +#endif /* __SHMOBILE_IPMMU_H__ */ diff --git a/drivers/iommu/tegra-gart.c b/drivers/iommu/tegra-gart.c new file mode 100644 index 000000000..37e708fdb --- /dev/null +++ b/drivers/iommu/tegra-gart.c @@ -0,0 +1,477 @@ +/* + * IOMMU API for GART in Tegra20 + * + * Copyright (c) 2010-2012, NVIDIA CORPORATION. All rights reserved. + * + * This program is free software; you can redistribute it and/or modify it + * under the terms and conditions of the GNU General Public License, + * version 2, as published by the Free Software Foundation. + * + * This program is distributed in the hope it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for + * more details. + * + * You should have received a copy of the GNU General Public License along with + * this program; if not, write to the Free Software Foundation, Inc., + * 51 Franklin St - Fifth Floor, Boston, MA 02110-1301 USA. + */ + +#define pr_fmt(fmt) "%s(): " fmt, __func__ + +#include <linux/module.h> +#include <linux/platform_device.h> +#include <linux/spinlock.h> +#include <linux/slab.h> +#include <linux/vmalloc.h> +#include <linux/mm.h> +#include <linux/list.h> +#include <linux/device.h> +#include <linux/io.h> +#include <linux/iommu.h> +#include <linux/of.h> + +#include <asm/cacheflush.h> + +/* bitmap of the page sizes currently supported */ +#define GART_IOMMU_PGSIZES (SZ_4K) + +#define GART_REG_BASE 0x24 +#define GART_CONFIG (0x24 - GART_REG_BASE) +#define GART_ENTRY_ADDR (0x28 - GART_REG_BASE) +#define GART_ENTRY_DATA (0x2c - GART_REG_BASE) +#define GART_ENTRY_PHYS_ADDR_VALID (1 << 31) + +#define GART_PAGE_SHIFT 12 +#define GART_PAGE_SIZE (1 << GART_PAGE_SHIFT) +#define GART_PAGE_MASK \ + (~(GART_PAGE_SIZE - 1) & ~GART_ENTRY_PHYS_ADDR_VALID) + +struct gart_client { + struct device *dev; + struct list_head list; +}; + +struct gart_device { + void __iomem *regs; + u32 *savedata; + u32 page_count; /* total remappable size */ + dma_addr_t iovmm_base; /* offset to vmm_area */ + spinlock_t pte_lock; /* for pagetable */ + struct list_head client; + spinlock_t client_lock; /* for client list */ + struct device *dev; +}; + +struct gart_domain { + struct iommu_domain domain; /* generic domain handle */ + struct gart_device *gart; /* link to gart device */ +}; + +static struct gart_device *gart_handle; /* unique for a system */ + +#define GART_PTE(_pfn) \ + (GART_ENTRY_PHYS_ADDR_VALID | ((_pfn) << PAGE_SHIFT)) + +static struct gart_domain *to_gart_domain(struct iommu_domain *dom) +{ + return container_of(dom, struct gart_domain, domain); +} + +/* + * Any interaction between any block on PPSB and a block on APB or AHB + * must have these read-back to ensure the APB/AHB bus transaction is + * complete before initiating activity on the PPSB block. + */ +#define FLUSH_GART_REGS(gart) ((void)readl((gart)->regs + GART_CONFIG)) + +#define for_each_gart_pte(gart, iova) \ + for (iova = gart->iovmm_base; \ + iova < gart->iovmm_base + GART_PAGE_SIZE * gart->page_count; \ + iova += GART_PAGE_SIZE) + +static inline void gart_set_pte(struct gart_device *gart, + unsigned long offs, u32 pte) +{ + writel(offs, gart->regs + GART_ENTRY_ADDR); + writel(pte, gart->regs + GART_ENTRY_DATA); + + dev_dbg(gart->dev, "%s %08lx:%08x\n", + pte ? "map" : "unmap", offs, pte & GART_PAGE_MASK); +} + +static inline unsigned long gart_read_pte(struct gart_device *gart, + unsigned long offs) +{ + unsigned long pte; + + writel(offs, gart->regs + GART_ENTRY_ADDR); + pte = readl(gart->regs + GART_ENTRY_DATA); + + return pte; +} + +static void do_gart_setup(struct gart_device *gart, const u32 *data) +{ + unsigned long iova; + + for_each_gart_pte(gart, iova) + gart_set_pte(gart, iova, data ? *(data++) : 0); + + writel(1, gart->regs + GART_CONFIG); + FLUSH_GART_REGS(gart); +} + +#ifdef DEBUG +static void gart_dump_table(struct gart_device *gart) +{ + unsigned long iova; + unsigned long flags; + + spin_lock_irqsave(&gart->pte_lock, flags); + for_each_gart_pte(gart, iova) { + unsigned long pte; + + pte = gart_read_pte(gart, iova); + + dev_dbg(gart->dev, "%s %08lx:%08lx\n", + (GART_ENTRY_PHYS_ADDR_VALID & pte) ? "v" : " ", + iova, pte & GART_PAGE_MASK); + } + spin_unlock_irqrestore(&gart->pte_lock, flags); +} +#else +static inline void gart_dump_table(struct gart_device *gart) +{ +} +#endif + +static inline bool gart_iova_range_valid(struct gart_device *gart, + unsigned long iova, size_t bytes) +{ + unsigned long iova_start, iova_end, gart_start, gart_end; + + iova_start = iova; + iova_end = iova_start + bytes - 1; + gart_start = gart->iovmm_base; + gart_end = gart_start + gart->page_count * GART_PAGE_SIZE - 1; + + if (iova_start < gart_start) + return false; + if (iova_end > gart_end) + return false; + return true; +} + +static int gart_iommu_attach_dev(struct iommu_domain *domain, + struct device *dev) +{ + struct gart_domain *gart_domain = to_gart_domain(domain); + struct gart_device *gart = gart_domain->gart; + struct gart_client *client, *c; + int err = 0; + + client = devm_kzalloc(gart->dev, sizeof(*c), GFP_KERNEL); + if (!client) + return -ENOMEM; + client->dev = dev; + + spin_lock(&gart->client_lock); + list_for_each_entry(c, &gart->client, list) { + if (c->dev == dev) { + dev_err(gart->dev, + "%s is already attached\n", dev_name(dev)); + err = -EINVAL; + goto fail; + } + } + list_add(&client->list, &gart->client); + spin_unlock(&gart->client_lock); + dev_dbg(gart->dev, "Attached %s\n", dev_name(dev)); + return 0; + +fail: + devm_kfree(gart->dev, client); + spin_unlock(&gart->client_lock); + return err; +} + +static void gart_iommu_detach_dev(struct iommu_domain *domain, + struct device *dev) +{ + struct gart_domain *gart_domain = to_gart_domain(domain); + struct gart_device *gart = gart_domain->gart; + struct gart_client *c; + + spin_lock(&gart->client_lock); + + list_for_each_entry(c, &gart->client, list) { + if (c->dev == dev) { + list_del(&c->list); + devm_kfree(gart->dev, c); + dev_dbg(gart->dev, "Detached %s\n", dev_name(dev)); + goto out; + } + } + dev_err(gart->dev, "Couldn't find\n"); +out: + spin_unlock(&gart->client_lock); +} + +static struct iommu_domain *gart_iommu_domain_alloc(unsigned type) +{ + struct gart_domain *gart_domain; + struct gart_device *gart; + + if (type != IOMMU_DOMAIN_UNMANAGED) + return NULL; + + gart = gart_handle; + if (!gart) + return NULL; + + gart_domain = kzalloc(sizeof(*gart_domain), GFP_KERNEL); + if (!gart_domain) + return NULL; + + gart_domain->gart = gart; + gart_domain->domain.geometry.aperture_start = gart->iovmm_base; + gart_domain->domain.geometry.aperture_end = gart->iovmm_base + + gart->page_count * GART_PAGE_SIZE - 1; + gart_domain->domain.geometry.force_aperture = true; + + return &gart_domain->domain; +} + +static void gart_iommu_domain_free(struct iommu_domain *domain) +{ + struct gart_domain *gart_domain = to_gart_domain(domain); + struct gart_device *gart = gart_domain->gart; + + if (gart) { + spin_lock(&gart->client_lock); + if (!list_empty(&gart->client)) { + struct gart_client *c; + + list_for_each_entry(c, &gart->client, list) + gart_iommu_detach_dev(domain, c->dev); + } + spin_unlock(&gart->client_lock); + } + + kfree(gart_domain); +} + +static int gart_iommu_map(struct iommu_domain *domain, unsigned long iova, + phys_addr_t pa, size_t bytes, int prot) +{ + struct gart_domain *gart_domain = to_gart_domain(domain); + struct gart_device *gart = gart_domain->gart; + unsigned long flags; + unsigned long pfn; + + if (!gart_iova_range_valid(gart, iova, bytes)) + return -EINVAL; + + spin_lock_irqsave(&gart->pte_lock, flags); + pfn = __phys_to_pfn(pa); + if (!pfn_valid(pfn)) { + dev_err(gart->dev, "Invalid page: %pa\n", &pa); + spin_unlock_irqrestore(&gart->pte_lock, flags); + return -EINVAL; + } + gart_set_pte(gart, iova, GART_PTE(pfn)); + FLUSH_GART_REGS(gart); + spin_unlock_irqrestore(&gart->pte_lock, flags); + return 0; +} + +static size_t gart_iommu_unmap(struct iommu_domain *domain, unsigned long iova, + size_t bytes) +{ + struct gart_domain *gart_domain = to_gart_domain(domain); + struct gart_device *gart = gart_domain->gart; + unsigned long flags; + + if (!gart_iova_range_valid(gart, iova, bytes)) + return 0; + + spin_lock_irqsave(&gart->pte_lock, flags); + gart_set_pte(gart, iova, 0); + FLUSH_GART_REGS(gart); + spin_unlock_irqrestore(&gart->pte_lock, flags); + return 0; +} + +static phys_addr_t gart_iommu_iova_to_phys(struct iommu_domain *domain, + dma_addr_t iova) +{ + struct gart_domain *gart_domain = to_gart_domain(domain); + struct gart_device *gart = gart_domain->gart; + unsigned long pte; + phys_addr_t pa; + unsigned long flags; + + if (!gart_iova_range_valid(gart, iova, 0)) + return -EINVAL; + + spin_lock_irqsave(&gart->pte_lock, flags); + pte = gart_read_pte(gart, iova); + spin_unlock_irqrestore(&gart->pte_lock, flags); + + pa = (pte & GART_PAGE_MASK); + if (!pfn_valid(__phys_to_pfn(pa))) { + dev_err(gart->dev, "No entry for %08llx:%pa\n", + (unsigned long long)iova, &pa); + gart_dump_table(gart); + return -EINVAL; + } + return pa; +} + +static bool gart_iommu_capable(enum iommu_cap cap) +{ + return false; +} + +static const struct iommu_ops gart_iommu_ops = { + .capable = gart_iommu_capable, + .domain_alloc = gart_iommu_domain_alloc, + .domain_free = gart_iommu_domain_free, + .attach_dev = gart_iommu_attach_dev, + .detach_dev = gart_iommu_detach_dev, + .map = gart_iommu_map, + .map_sg = default_iommu_map_sg, + .unmap = gart_iommu_unmap, + .iova_to_phys = gart_iommu_iova_to_phys, + .pgsize_bitmap = GART_IOMMU_PGSIZES, +}; + +static int tegra_gart_suspend(struct device *dev) +{ + struct gart_device *gart = dev_get_drvdata(dev); + unsigned long iova; + u32 *data = gart->savedata; + unsigned long flags; + + spin_lock_irqsave(&gart->pte_lock, flags); + for_each_gart_pte(gart, iova) + *(data++) = gart_read_pte(gart, iova); + spin_unlock_irqrestore(&gart->pte_lock, flags); + return 0; +} + +static int tegra_gart_resume(struct device *dev) +{ + struct gart_device *gart = dev_get_drvdata(dev); + unsigned long flags; + + spin_lock_irqsave(&gart->pte_lock, flags); + do_gart_setup(gart, gart->savedata); + spin_unlock_irqrestore(&gart->pte_lock, flags); + return 0; +} + +static int tegra_gart_probe(struct platform_device *pdev) +{ + struct gart_device *gart; + struct resource *res, *res_remap; + void __iomem *gart_regs; + struct device *dev = &pdev->dev; + + if (gart_handle) + return -EIO; + + BUILD_BUG_ON(PAGE_SHIFT != GART_PAGE_SHIFT); + + /* the GART memory aperture is required */ + res = platform_get_resource(pdev, IORESOURCE_MEM, 0); + res_remap = platform_get_resource(pdev, IORESOURCE_MEM, 1); + if (!res || !res_remap) { + dev_err(dev, "GART memory aperture expected\n"); + return -ENXIO; + } + + gart = devm_kzalloc(dev, sizeof(*gart), GFP_KERNEL); + if (!gart) { + dev_err(dev, "failed to allocate gart_device\n"); + return -ENOMEM; + } + + gart_regs = devm_ioremap(dev, res->start, resource_size(res)); + if (!gart_regs) { + dev_err(dev, "failed to remap GART registers\n"); + return -ENXIO; + } + + gart->dev = &pdev->dev; + spin_lock_init(&gart->pte_lock); + spin_lock_init(&gart->client_lock); + INIT_LIST_HEAD(&gart->client); + gart->regs = gart_regs; + gart->iovmm_base = (dma_addr_t)res_remap->start; + gart->page_count = (resource_size(res_remap) >> GART_PAGE_SHIFT); + + gart->savedata = vmalloc(sizeof(u32) * gart->page_count); + if (!gart->savedata) { + dev_err(dev, "failed to allocate context save area\n"); + return -ENOMEM; + } + + platform_set_drvdata(pdev, gart); + do_gart_setup(gart, NULL); + + gart_handle = gart; + + return 0; +} + +static int tegra_gart_remove(struct platform_device *pdev) +{ + struct gart_device *gart = platform_get_drvdata(pdev); + + writel(0, gart->regs + GART_CONFIG); + if (gart->savedata) + vfree(gart->savedata); + gart_handle = NULL; + return 0; +} + +static const struct dev_pm_ops tegra_gart_pm_ops = { + .suspend = tegra_gart_suspend, + .resume = tegra_gart_resume, +}; + +static const struct of_device_id tegra_gart_of_match[] = { + { .compatible = "nvidia,tegra20-gart", }, + { }, +}; +MODULE_DEVICE_TABLE(of, tegra_gart_of_match); + +static struct platform_driver tegra_gart_driver = { + .probe = tegra_gart_probe, + .remove = tegra_gart_remove, + .driver = { + .name = "tegra-gart", + .pm = &tegra_gart_pm_ops, + .of_match_table = tegra_gart_of_match, + }, +}; + +static int tegra_gart_init(void) +{ + return platform_driver_register(&tegra_gart_driver); +} + +static void __exit tegra_gart_exit(void) +{ + platform_driver_unregister(&tegra_gart_driver); +} + +subsys_initcall(tegra_gart_init); +module_exit(tegra_gart_exit); + +MODULE_DESCRIPTION("IOMMU API for GART in Tegra20"); +MODULE_AUTHOR("Hiroshi DOYU <hdoyu@nvidia.com>"); +MODULE_ALIAS("platform:tegra-gart"); +MODULE_LICENSE("GPL v2"); diff --git a/drivers/iommu/tegra-smmu.c b/drivers/iommu/tegra-smmu.c new file mode 100644 index 000000000..c845d99ec --- /dev/null +++ b/drivers/iommu/tegra-smmu.c @@ -0,0 +1,747 @@ +/* + * Copyright (C) 2011-2014 NVIDIA CORPORATION. All rights reserved. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + */ + +#include <linux/bitops.h> +#include <linux/err.h> +#include <linux/iommu.h> +#include <linux/kernel.h> +#include <linux/of.h> +#include <linux/of_device.h> +#include <linux/platform_device.h> +#include <linux/slab.h> + +#include <soc/tegra/ahb.h> +#include <soc/tegra/mc.h> + +struct tegra_smmu { + void __iomem *regs; + struct device *dev; + + struct tegra_mc *mc; + const struct tegra_smmu_soc *soc; + + unsigned long pfn_mask; + + unsigned long *asids; + struct mutex lock; + + struct list_head list; +}; + +struct tegra_smmu_as { + struct iommu_domain domain; + struct tegra_smmu *smmu; + unsigned int use_count; + struct page *count; + struct page *pd; + unsigned id; + u32 attr; +}; + +static struct tegra_smmu_as *to_smmu_as(struct iommu_domain *dom) +{ + return container_of(dom, struct tegra_smmu_as, domain); +} + +static inline void smmu_writel(struct tegra_smmu *smmu, u32 value, + unsigned long offset) +{ + writel(value, smmu->regs + offset); +} + +static inline u32 smmu_readl(struct tegra_smmu *smmu, unsigned long offset) +{ + return readl(smmu->regs + offset); +} + +#define SMMU_CONFIG 0x010 +#define SMMU_CONFIG_ENABLE (1 << 0) + +#define SMMU_TLB_CONFIG 0x14 +#define SMMU_TLB_CONFIG_HIT_UNDER_MISS (1 << 29) +#define SMMU_TLB_CONFIG_ROUND_ROBIN_ARBITRATION (1 << 28) +#define SMMU_TLB_CONFIG_ACTIVE_LINES(x) ((x) & 0x3f) + +#define SMMU_PTC_CONFIG 0x18 +#define SMMU_PTC_CONFIG_ENABLE (1 << 29) +#define SMMU_PTC_CONFIG_REQ_LIMIT(x) (((x) & 0x0f) << 24) +#define SMMU_PTC_CONFIG_INDEX_MAP(x) ((x) & 0x3f) + +#define SMMU_PTB_ASID 0x01c +#define SMMU_PTB_ASID_VALUE(x) ((x) & 0x7f) + +#define SMMU_PTB_DATA 0x020 +#define SMMU_PTB_DATA_VALUE(page, attr) (page_to_phys(page) >> 12 | (attr)) + +#define SMMU_MK_PDE(page, attr) (page_to_phys(page) >> SMMU_PTE_SHIFT | (attr)) + +#define SMMU_TLB_FLUSH 0x030 +#define SMMU_TLB_FLUSH_VA_MATCH_ALL (0 << 0) +#define SMMU_TLB_FLUSH_VA_MATCH_SECTION (2 << 0) +#define SMMU_TLB_FLUSH_VA_MATCH_GROUP (3 << 0) +#define SMMU_TLB_FLUSH_ASID(x) (((x) & 0x7f) << 24) +#define SMMU_TLB_FLUSH_VA_SECTION(addr) ((((addr) & 0xffc00000) >> 12) | \ + SMMU_TLB_FLUSH_VA_MATCH_SECTION) +#define SMMU_TLB_FLUSH_VA_GROUP(addr) ((((addr) & 0xffffc000) >> 12) | \ + SMMU_TLB_FLUSH_VA_MATCH_GROUP) +#define SMMU_TLB_FLUSH_ASID_MATCH (1 << 31) + +#define SMMU_PTC_FLUSH 0x034 +#define SMMU_PTC_FLUSH_TYPE_ALL (0 << 0) +#define SMMU_PTC_FLUSH_TYPE_ADR (1 << 0) + +#define SMMU_PTC_FLUSH_HI 0x9b8 +#define SMMU_PTC_FLUSH_HI_MASK 0x3 + +/* per-SWGROUP SMMU_*_ASID register */ +#define SMMU_ASID_ENABLE (1 << 31) +#define SMMU_ASID_MASK 0x7f +#define SMMU_ASID_VALUE(x) ((x) & SMMU_ASID_MASK) + +/* page table definitions */ +#define SMMU_NUM_PDE 1024 +#define SMMU_NUM_PTE 1024 + +#define SMMU_SIZE_PD (SMMU_NUM_PDE * 4) +#define SMMU_SIZE_PT (SMMU_NUM_PTE * 4) + +#define SMMU_PDE_SHIFT 22 +#define SMMU_PTE_SHIFT 12 + +#define SMMU_PD_READABLE (1 << 31) +#define SMMU_PD_WRITABLE (1 << 30) +#define SMMU_PD_NONSECURE (1 << 29) + +#define SMMU_PDE_READABLE (1 << 31) +#define SMMU_PDE_WRITABLE (1 << 30) +#define SMMU_PDE_NONSECURE (1 << 29) +#define SMMU_PDE_NEXT (1 << 28) + +#define SMMU_PTE_READABLE (1 << 31) +#define SMMU_PTE_WRITABLE (1 << 30) +#define SMMU_PTE_NONSECURE (1 << 29) + +#define SMMU_PDE_ATTR (SMMU_PDE_READABLE | SMMU_PDE_WRITABLE | \ + SMMU_PDE_NONSECURE) +#define SMMU_PTE_ATTR (SMMU_PTE_READABLE | SMMU_PTE_WRITABLE | \ + SMMU_PTE_NONSECURE) + +static inline void smmu_flush_ptc(struct tegra_smmu *smmu, struct page *page, + unsigned long offset) +{ + phys_addr_t phys = page ? page_to_phys(page) : 0; + u32 value; + + if (page) { + offset &= ~(smmu->mc->soc->atom_size - 1); + + if (smmu->mc->soc->num_address_bits > 32) { +#ifdef CONFIG_PHYS_ADDR_T_64BIT + value = (phys >> 32) & SMMU_PTC_FLUSH_HI_MASK; +#else + value = 0; +#endif + smmu_writel(smmu, value, SMMU_PTC_FLUSH_HI); + } + + value = (phys + offset) | SMMU_PTC_FLUSH_TYPE_ADR; + } else { + value = SMMU_PTC_FLUSH_TYPE_ALL; + } + + smmu_writel(smmu, value, SMMU_PTC_FLUSH); +} + +static inline void smmu_flush_tlb(struct tegra_smmu *smmu) +{ + smmu_writel(smmu, SMMU_TLB_FLUSH_VA_MATCH_ALL, SMMU_TLB_FLUSH); +} + +static inline void smmu_flush_tlb_asid(struct tegra_smmu *smmu, + unsigned long asid) +{ + u32 value; + + value = SMMU_TLB_FLUSH_ASID_MATCH | SMMU_TLB_FLUSH_ASID(asid) | + SMMU_TLB_FLUSH_VA_MATCH_ALL; + smmu_writel(smmu, value, SMMU_TLB_FLUSH); +} + +static inline void smmu_flush_tlb_section(struct tegra_smmu *smmu, + unsigned long asid, + unsigned long iova) +{ + u32 value; + + value = SMMU_TLB_FLUSH_ASID_MATCH | SMMU_TLB_FLUSH_ASID(asid) | + SMMU_TLB_FLUSH_VA_SECTION(iova); + smmu_writel(smmu, value, SMMU_TLB_FLUSH); +} + +static inline void smmu_flush_tlb_group(struct tegra_smmu *smmu, + unsigned long asid, + unsigned long iova) +{ + u32 value; + + value = SMMU_TLB_FLUSH_ASID_MATCH | SMMU_TLB_FLUSH_ASID(asid) | + SMMU_TLB_FLUSH_VA_GROUP(iova); + smmu_writel(smmu, value, SMMU_TLB_FLUSH); +} + +static inline void smmu_flush(struct tegra_smmu *smmu) +{ + smmu_readl(smmu, SMMU_CONFIG); +} + +static int tegra_smmu_alloc_asid(struct tegra_smmu *smmu, unsigned int *idp) +{ + unsigned long id; + + mutex_lock(&smmu->lock); + + id = find_first_zero_bit(smmu->asids, smmu->soc->num_asids); + if (id >= smmu->soc->num_asids) { + mutex_unlock(&smmu->lock); + return -ENOSPC; + } + + set_bit(id, smmu->asids); + *idp = id; + + mutex_unlock(&smmu->lock); + return 0; +} + +static void tegra_smmu_free_asid(struct tegra_smmu *smmu, unsigned int id) +{ + mutex_lock(&smmu->lock); + clear_bit(id, smmu->asids); + mutex_unlock(&smmu->lock); +} + +static bool tegra_smmu_capable(enum iommu_cap cap) +{ + return false; +} + +static struct iommu_domain *tegra_smmu_domain_alloc(unsigned type) +{ + struct tegra_smmu_as *as; + unsigned int i; + uint32_t *pd; + + if (type != IOMMU_DOMAIN_UNMANAGED) + return NULL; + + as = kzalloc(sizeof(*as), GFP_KERNEL); + if (!as) + return NULL; + + as->attr = SMMU_PD_READABLE | SMMU_PD_WRITABLE | SMMU_PD_NONSECURE; + + as->pd = alloc_page(GFP_KERNEL | __GFP_DMA); + if (!as->pd) { + kfree(as); + return NULL; + } + + as->count = alloc_page(GFP_KERNEL); + if (!as->count) { + __free_page(as->pd); + kfree(as); + return NULL; + } + + /* clear PDEs */ + pd = page_address(as->pd); + SetPageReserved(as->pd); + + for (i = 0; i < SMMU_NUM_PDE; i++) + pd[i] = 0; + + /* clear PDE usage counters */ + pd = page_address(as->count); + SetPageReserved(as->count); + + for (i = 0; i < SMMU_NUM_PDE; i++) + pd[i] = 0; + + /* setup aperture */ + as->domain.geometry.aperture_start = 0; + as->domain.geometry.aperture_end = 0xffffffff; + as->domain.geometry.force_aperture = true; + + return &as->domain; +} + +static void tegra_smmu_domain_free(struct iommu_domain *domain) +{ + struct tegra_smmu_as *as = to_smmu_as(domain); + + /* TODO: free page directory and page tables */ + ClearPageReserved(as->pd); + + kfree(as); +} + +static const struct tegra_smmu_swgroup * +tegra_smmu_find_swgroup(struct tegra_smmu *smmu, unsigned int swgroup) +{ + const struct tegra_smmu_swgroup *group = NULL; + unsigned int i; + + for (i = 0; i < smmu->soc->num_swgroups; i++) { + if (smmu->soc->swgroups[i].swgroup == swgroup) { + group = &smmu->soc->swgroups[i]; + break; + } + } + + return group; +} + +static void tegra_smmu_enable(struct tegra_smmu *smmu, unsigned int swgroup, + unsigned int asid) +{ + const struct tegra_smmu_swgroup *group; + unsigned int i; + u32 value; + + for (i = 0; i < smmu->soc->num_clients; i++) { + const struct tegra_mc_client *client = &smmu->soc->clients[i]; + + if (client->swgroup != swgroup) + continue; + + value = smmu_readl(smmu, client->smmu.reg); + value |= BIT(client->smmu.bit); + smmu_writel(smmu, value, client->smmu.reg); + } + + group = tegra_smmu_find_swgroup(smmu, swgroup); + if (group) { + value = smmu_readl(smmu, group->reg); + value &= ~SMMU_ASID_MASK; + value |= SMMU_ASID_VALUE(asid); + value |= SMMU_ASID_ENABLE; + smmu_writel(smmu, value, group->reg); + } +} + +static void tegra_smmu_disable(struct tegra_smmu *smmu, unsigned int swgroup, + unsigned int asid) +{ + const struct tegra_smmu_swgroup *group; + unsigned int i; + u32 value; + + group = tegra_smmu_find_swgroup(smmu, swgroup); + if (group) { + value = smmu_readl(smmu, group->reg); + value &= ~SMMU_ASID_MASK; + value |= SMMU_ASID_VALUE(asid); + value &= ~SMMU_ASID_ENABLE; + smmu_writel(smmu, value, group->reg); + } + + for (i = 0; i < smmu->soc->num_clients; i++) { + const struct tegra_mc_client *client = &smmu->soc->clients[i]; + + if (client->swgroup != swgroup) + continue; + + value = smmu_readl(smmu, client->smmu.reg); + value &= ~BIT(client->smmu.bit); + smmu_writel(smmu, value, client->smmu.reg); + } +} + +static int tegra_smmu_as_prepare(struct tegra_smmu *smmu, + struct tegra_smmu_as *as) +{ + u32 value; + int err; + + if (as->use_count > 0) { + as->use_count++; + return 0; + } + + err = tegra_smmu_alloc_asid(smmu, &as->id); + if (err < 0) + return err; + + smmu->soc->ops->flush_dcache(as->pd, 0, SMMU_SIZE_PD); + smmu_flush_ptc(smmu, as->pd, 0); + smmu_flush_tlb_asid(smmu, as->id); + + smmu_writel(smmu, as->id & 0x7f, SMMU_PTB_ASID); + value = SMMU_PTB_DATA_VALUE(as->pd, as->attr); + smmu_writel(smmu, value, SMMU_PTB_DATA); + smmu_flush(smmu); + + as->smmu = smmu; + as->use_count++; + + return 0; +} + +static void tegra_smmu_as_unprepare(struct tegra_smmu *smmu, + struct tegra_smmu_as *as) +{ + if (--as->use_count > 0) + return; + + tegra_smmu_free_asid(smmu, as->id); + as->smmu = NULL; +} + +static int tegra_smmu_attach_dev(struct iommu_domain *domain, + struct device *dev) +{ + struct tegra_smmu *smmu = dev->archdata.iommu; + struct tegra_smmu_as *as = to_smmu_as(domain); + struct device_node *np = dev->of_node; + struct of_phandle_args args; + unsigned int index = 0; + int err = 0; + + while (!of_parse_phandle_with_args(np, "iommus", "#iommu-cells", index, + &args)) { + unsigned int swgroup = args.args[0]; + + if (args.np != smmu->dev->of_node) { + of_node_put(args.np); + continue; + } + + of_node_put(args.np); + + err = tegra_smmu_as_prepare(smmu, as); + if (err < 0) + return err; + + tegra_smmu_enable(smmu, swgroup, as->id); + index++; + } + + if (index == 0) + return -ENODEV; + + return 0; +} + +static void tegra_smmu_detach_dev(struct iommu_domain *domain, struct device *dev) +{ + struct tegra_smmu_as *as = to_smmu_as(domain); + struct device_node *np = dev->of_node; + struct tegra_smmu *smmu = as->smmu; + struct of_phandle_args args; + unsigned int index = 0; + + while (!of_parse_phandle_with_args(np, "iommus", "#iommu-cells", index, + &args)) { + unsigned int swgroup = args.args[0]; + + if (args.np != smmu->dev->of_node) { + of_node_put(args.np); + continue; + } + + of_node_put(args.np); + + tegra_smmu_disable(smmu, swgroup, as->id); + tegra_smmu_as_unprepare(smmu, as); + index++; + } +} + +static u32 *as_get_pte(struct tegra_smmu_as *as, dma_addr_t iova, + struct page **pagep) +{ + u32 *pd = page_address(as->pd), *pt, *count; + u32 pde = (iova >> SMMU_PDE_SHIFT) & 0x3ff; + u32 pte = (iova >> SMMU_PTE_SHIFT) & 0x3ff; + struct tegra_smmu *smmu = as->smmu; + struct page *page; + unsigned int i; + + if (pd[pde] == 0) { + page = alloc_page(GFP_KERNEL | __GFP_DMA); + if (!page) + return NULL; + + pt = page_address(page); + SetPageReserved(page); + + for (i = 0; i < SMMU_NUM_PTE; i++) + pt[i] = 0; + + smmu->soc->ops->flush_dcache(page, 0, SMMU_SIZE_PT); + + pd[pde] = SMMU_MK_PDE(page, SMMU_PDE_ATTR | SMMU_PDE_NEXT); + + smmu->soc->ops->flush_dcache(as->pd, pde << 2, 4); + smmu_flush_ptc(smmu, as->pd, pde << 2); + smmu_flush_tlb_section(smmu, as->id, iova); + smmu_flush(smmu); + } else { + page = pfn_to_page(pd[pde] & smmu->pfn_mask); + pt = page_address(page); + } + + *pagep = page; + + /* Keep track of entries in this page table. */ + count = page_address(as->count); + if (pt[pte] == 0) + count[pde]++; + + return &pt[pte]; +} + +static void as_put_pte(struct tegra_smmu_as *as, dma_addr_t iova) +{ + u32 pde = (iova >> SMMU_PDE_SHIFT) & 0x3ff; + u32 pte = (iova >> SMMU_PTE_SHIFT) & 0x3ff; + u32 *count = page_address(as->count); + u32 *pd = page_address(as->pd), *pt; + struct page *page; + + page = pfn_to_page(pd[pde] & as->smmu->pfn_mask); + pt = page_address(page); + + /* + * When no entries in this page table are used anymore, return the + * memory page to the system. + */ + if (pt[pte] != 0) { + if (--count[pde] == 0) { + ClearPageReserved(page); + __free_page(page); + pd[pde] = 0; + } + + pt[pte] = 0; + } +} + +static int tegra_smmu_map(struct iommu_domain *domain, unsigned long iova, + phys_addr_t paddr, size_t size, int prot) +{ + struct tegra_smmu_as *as = to_smmu_as(domain); + struct tegra_smmu *smmu = as->smmu; + unsigned long offset; + struct page *page; + u32 *pte; + + pte = as_get_pte(as, iova, &page); + if (!pte) + return -ENOMEM; + + *pte = __phys_to_pfn(paddr) | SMMU_PTE_ATTR; + offset = offset_in_page(pte); + + smmu->soc->ops->flush_dcache(page, offset, 4); + smmu_flush_ptc(smmu, page, offset); + smmu_flush_tlb_group(smmu, as->id, iova); + smmu_flush(smmu); + + return 0; +} + +static size_t tegra_smmu_unmap(struct iommu_domain *domain, unsigned long iova, + size_t size) +{ + struct tegra_smmu_as *as = to_smmu_as(domain); + struct tegra_smmu *smmu = as->smmu; + unsigned long offset; + struct page *page; + u32 *pte; + + pte = as_get_pte(as, iova, &page); + if (!pte) + return 0; + + offset = offset_in_page(pte); + as_put_pte(as, iova); + + smmu->soc->ops->flush_dcache(page, offset, 4); + smmu_flush_ptc(smmu, page, offset); + smmu_flush_tlb_group(smmu, as->id, iova); + smmu_flush(smmu); + + return size; +} + +static phys_addr_t tegra_smmu_iova_to_phys(struct iommu_domain *domain, + dma_addr_t iova) +{ + struct tegra_smmu_as *as = to_smmu_as(domain); + struct page *page; + unsigned long pfn; + u32 *pte; + + pte = as_get_pte(as, iova, &page); + pfn = *pte & as->smmu->pfn_mask; + + return PFN_PHYS(pfn); +} + +static struct tegra_smmu *tegra_smmu_find(struct device_node *np) +{ + struct platform_device *pdev; + struct tegra_mc *mc; + + pdev = of_find_device_by_node(np); + if (!pdev) + return NULL; + + mc = platform_get_drvdata(pdev); + if (!mc) + return NULL; + + return mc->smmu; +} + +static int tegra_smmu_add_device(struct device *dev) +{ + struct device_node *np = dev->of_node; + struct of_phandle_args args; + unsigned int index = 0; + + while (of_parse_phandle_with_args(np, "iommus", "#iommu-cells", index, + &args) == 0) { + struct tegra_smmu *smmu; + + smmu = tegra_smmu_find(args.np); + if (smmu) { + /* + * Only a single IOMMU master interface is currently + * supported by the Linux kernel, so abort after the + * first match. + */ + dev->archdata.iommu = smmu; + break; + } + + index++; + } + + return 0; +} + +static void tegra_smmu_remove_device(struct device *dev) +{ + dev->archdata.iommu = NULL; +} + +static const struct iommu_ops tegra_smmu_ops = { + .capable = tegra_smmu_capable, + .domain_alloc = tegra_smmu_domain_alloc, + .domain_free = tegra_smmu_domain_free, + .attach_dev = tegra_smmu_attach_dev, + .detach_dev = tegra_smmu_detach_dev, + .add_device = tegra_smmu_add_device, + .remove_device = tegra_smmu_remove_device, + .map = tegra_smmu_map, + .unmap = tegra_smmu_unmap, + .map_sg = default_iommu_map_sg, + .iova_to_phys = tegra_smmu_iova_to_phys, + + .pgsize_bitmap = SZ_4K, +}; + +static void tegra_smmu_ahb_enable(void) +{ + static const struct of_device_id ahb_match[] = { + { .compatible = "nvidia,tegra30-ahb", }, + { } + }; + struct device_node *ahb; + + ahb = of_find_matching_node(NULL, ahb_match); + if (ahb) { + tegra_ahb_enable_smmu(ahb); + of_node_put(ahb); + } +} + +struct tegra_smmu *tegra_smmu_probe(struct device *dev, + const struct tegra_smmu_soc *soc, + struct tegra_mc *mc) +{ + struct tegra_smmu *smmu; + size_t size; + u32 value; + int err; + + /* This can happen on Tegra20 which doesn't have an SMMU */ + if (!soc) + return NULL; + + smmu = devm_kzalloc(dev, sizeof(*smmu), GFP_KERNEL); + if (!smmu) + return ERR_PTR(-ENOMEM); + + /* + * This is a bit of a hack. Ideally we'd want to simply return this + * value. However the IOMMU registration process will attempt to add + * all devices to the IOMMU when bus_set_iommu() is called. In order + * not to rely on global variables to track the IOMMU instance, we + * set it here so that it can be looked up from the .add_device() + * callback via the IOMMU device's .drvdata field. + */ + mc->smmu = smmu; + + size = BITS_TO_LONGS(soc->num_asids) * sizeof(long); + + smmu->asids = devm_kzalloc(dev, size, GFP_KERNEL); + if (!smmu->asids) + return ERR_PTR(-ENOMEM); + + mutex_init(&smmu->lock); + + smmu->regs = mc->regs; + smmu->soc = soc; + smmu->dev = dev; + smmu->mc = mc; + + smmu->pfn_mask = BIT_MASK(mc->soc->num_address_bits - PAGE_SHIFT) - 1; + dev_dbg(dev, "address bits: %u, PFN mask: %#lx\n", + mc->soc->num_address_bits, smmu->pfn_mask); + + value = SMMU_PTC_CONFIG_ENABLE | SMMU_PTC_CONFIG_INDEX_MAP(0x3f); + + if (soc->supports_request_limit) + value |= SMMU_PTC_CONFIG_REQ_LIMIT(8); + + smmu_writel(smmu, value, SMMU_PTC_CONFIG); + + value = SMMU_TLB_CONFIG_HIT_UNDER_MISS | + SMMU_TLB_CONFIG_ACTIVE_LINES(0x20); + + if (soc->supports_round_robin_arbitration) + value |= SMMU_TLB_CONFIG_ROUND_ROBIN_ARBITRATION; + + smmu_writel(smmu, value, SMMU_TLB_CONFIG); + + smmu_flush_ptc(smmu, NULL, 0); + smmu_flush_tlb(smmu); + smmu_writel(smmu, SMMU_CONFIG_ENABLE, SMMU_CONFIG); + smmu_flush(smmu); + + tegra_smmu_ahb_enable(); + + err = bus_set_iommu(&platform_bus_type, &tegra_smmu_ops); + if (err < 0) + return ERR_PTR(err); + + return smmu; +} |