summaryrefslogtreecommitdiff
path: root/arch/powerpc/platforms
diff options
context:
space:
mode:
authorAndré Fabian Silva Delgado <emulatorman@parabola.nu>2016-09-11 04:34:46 -0300
committerAndré Fabian Silva Delgado <emulatorman@parabola.nu>2016-09-11 04:34:46 -0300
commit863981e96738983919de841ec669e157e6bdaeb0 (patch)
treed6d89a12e7eb8017837c057935a2271290907f76 /arch/powerpc/platforms
parent8dec7c70575785729a6a9e6719a955e9c545bcab (diff)
Linux-libre 4.7.1-gnupck-4.7.1-gnu
Diffstat (limited to 'arch/powerpc/platforms')
-rw-r--r--arch/powerpc/platforms/512x/clock-commonclk.c2
-rw-r--r--arch/powerpc/platforms/52xx/mpc52xx_gpt.c15
-rw-r--r--arch/powerpc/platforms/83xx/mcu_mpc8349emitx.c6
-rw-r--r--arch/powerpc/platforms/Kconfig.cputype11
-rw-r--r--arch/powerpc/platforms/cell/spu_base.c9
-rw-r--r--arch/powerpc/platforms/cell/spufs/coredump.c5
-rw-r--r--arch/powerpc/platforms/cell/spufs/fault.c4
-rw-r--r--arch/powerpc/platforms/cell/spufs/inode.c2
-rw-r--r--arch/powerpc/platforms/powernv/eeh-powernv.c69
-rw-r--r--arch/powerpc/platforms/powernv/npu-dma.c283
-rw-r--r--arch/powerpc/platforms/powernv/opal-hmi.c8
-rw-r--r--arch/powerpc/platforms/powernv/pci-ioda.c953
-rw-r--r--arch/powerpc/platforms/powernv/pci.c19
-rw-r--r--arch/powerpc/platforms/powernv/pci.h72
-rw-r--r--arch/powerpc/platforms/powernv/setup.c5
-rw-r--r--arch/powerpc/platforms/ps3/htab.c2
-rw-r--r--arch/powerpc/platforms/ps3/spu.c4
-rw-r--r--arch/powerpc/platforms/pseries/eeh_pseries.c28
-rw-r--r--arch/powerpc/platforms/pseries/hotplug-memory.c225
-rw-r--r--arch/powerpc/platforms/pseries/lpar.c20
-rw-r--r--arch/powerpc/platforms/pseries/lparcfg.c3
-rw-r--r--arch/powerpc/platforms/pseries/mobility.c4
-rw-r--r--arch/powerpc/platforms/pseries/msi.c4
-rw-r--r--arch/powerpc/platforms/pseries/pci_dlpar.c32
-rw-r--r--arch/powerpc/platforms/pseries/reconfig.c5
-rw-r--r--arch/powerpc/platforms/pseries/setup.c4
26 files changed, 1044 insertions, 750 deletions
diff --git a/arch/powerpc/platforms/512x/clock-commonclk.c b/arch/powerpc/platforms/512x/clock-commonclk.c
index c50ea76ba..6081fbd75 100644
--- a/arch/powerpc/platforms/512x/clock-commonclk.c
+++ b/arch/powerpc/platforms/512x/clock-commonclk.c
@@ -221,7 +221,7 @@ static bool soc_has_mclk_mux0_canin(void)
/* convenience wrappers around the common clk API */
static inline struct clk *mpc512x_clk_fixed(const char *name, int rate)
{
- return clk_register_fixed_rate(NULL, name, NULL, CLK_IS_ROOT, rate);
+ return clk_register_fixed_rate(NULL, name, NULL, 0, rate);
}
static inline struct clk *mpc512x_clk_factor(
diff --git a/arch/powerpc/platforms/52xx/mpc52xx_gpt.c b/arch/powerpc/platforms/52xx/mpc52xx_gpt.c
index 3048e34db..22645a7c6 100644
--- a/arch/powerpc/platforms/52xx/mpc52xx_gpt.c
+++ b/arch/powerpc/platforms/52xx/mpc52xx_gpt.c
@@ -278,14 +278,9 @@ mpc52xx_gpt_irq_setup(struct mpc52xx_gpt_priv *gpt, struct device_node *node)
* GPIOLIB hooks
*/
#if defined(CONFIG_GPIOLIB)
-static inline struct mpc52xx_gpt_priv *gc_to_mpc52xx_gpt(struct gpio_chip *gc)
-{
- return container_of(gc, struct mpc52xx_gpt_priv, gc);
-}
-
static int mpc52xx_gpt_gpio_get(struct gpio_chip *gc, unsigned int gpio)
{
- struct mpc52xx_gpt_priv *gpt = gc_to_mpc52xx_gpt(gc);
+ struct mpc52xx_gpt_priv *gpt = gpiochip_get_data(gc);
return (in_be32(&gpt->regs->status) >> 8) & 1;
}
@@ -293,7 +288,7 @@ static int mpc52xx_gpt_gpio_get(struct gpio_chip *gc, unsigned int gpio)
static void
mpc52xx_gpt_gpio_set(struct gpio_chip *gc, unsigned int gpio, int v)
{
- struct mpc52xx_gpt_priv *gpt = gc_to_mpc52xx_gpt(gc);
+ struct mpc52xx_gpt_priv *gpt = gpiochip_get_data(gc);
unsigned long flags;
u32 r;
@@ -307,7 +302,7 @@ mpc52xx_gpt_gpio_set(struct gpio_chip *gc, unsigned int gpio, int v)
static int mpc52xx_gpt_gpio_dir_in(struct gpio_chip *gc, unsigned int gpio)
{
- struct mpc52xx_gpt_priv *gpt = gc_to_mpc52xx_gpt(gc);
+ struct mpc52xx_gpt_priv *gpt = gpiochip_get_data(gc);
unsigned long flags;
dev_dbg(gpt->dev, "%s: gpio:%d\n", __func__, gpio);
@@ -354,9 +349,9 @@ mpc52xx_gpt_gpio_setup(struct mpc52xx_gpt_priv *gpt, struct device_node *node)
clrsetbits_be32(&gpt->regs->mode, MPC52xx_GPT_MODE_MS_MASK,
MPC52xx_GPT_MODE_MS_GPIO);
- rc = gpiochip_add(&gpt->gc);
+ rc = gpiochip_add_data(&gpt->gc, gpt);
if (rc)
- dev_err(gpt->dev, "gpiochip_add() failed; rc=%i\n", rc);
+ dev_err(gpt->dev, "gpiochip_add_data() failed; rc=%i\n", rc);
dev_dbg(gpt->dev, "%s() complete.\n", __func__);
}
diff --git a/arch/powerpc/platforms/83xx/mcu_mpc8349emitx.c b/arch/powerpc/platforms/83xx/mcu_mpc8349emitx.c
index 15e8021dd..dbcd0303a 100644
--- a/arch/powerpc/platforms/83xx/mcu_mpc8349emitx.c
+++ b/arch/powerpc/platforms/83xx/mcu_mpc8349emitx.c
@@ -16,7 +16,7 @@
#include <linux/device.h>
#include <linux/mutex.h>
#include <linux/i2c.h>
-#include <linux/gpio.h>
+#include <linux/gpio/driver.h>
#include <linux/of.h>
#include <linux/of_gpio.h>
#include <linux/slab.h>
@@ -99,7 +99,7 @@ static void mcu_power_off(void)
static void mcu_gpio_set(struct gpio_chip *gc, unsigned int gpio, int val)
{
- struct mcu *mcu = container_of(gc, struct mcu, gc);
+ struct mcu *mcu = gpiochip_get_data(gc);
u8 bit = 1 << (4 + gpio);
mutex_lock(&mcu->lock);
@@ -136,7 +136,7 @@ static int mcu_gpiochip_add(struct mcu *mcu)
gc->direction_output = mcu_gpio_dir_out;
gc->of_node = np;
- return gpiochip_add(gc);
+ return gpiochip_add_data(gc, mcu);
}
static int mcu_gpiochip_remove(struct mcu *mcu)
diff --git a/arch/powerpc/platforms/Kconfig.cputype b/arch/powerpc/platforms/Kconfig.cputype
index 142dff5e9..77e9b8d59 100644
--- a/arch/powerpc/platforms/Kconfig.cputype
+++ b/arch/powerpc/platforms/Kconfig.cputype
@@ -72,7 +72,7 @@ config PPC_BOOK3S_64
select PPC_FPU
select PPC_HAVE_PMU_SUPPORT
select SYS_SUPPORTS_HUGETLBFS
- select HAVE_ARCH_TRANSPARENT_HUGEPAGE if PPC_64K_PAGES
+ select HAVE_ARCH_TRANSPARENT_HUGEPAGE
select ARCH_SUPPORTS_NUMA_BALANCING
select IRQ_WORK
@@ -331,6 +331,15 @@ config PPC_STD_MMU_64
def_bool y
depends on PPC_STD_MMU && PPC64
+config PPC_RADIX_MMU
+ bool "Radix MMU Support"
+ depends on PPC_BOOK3S_64
+ default y
+ help
+ Enable support for the Power ISA 3.0 Radix style MMU. Currently this
+ is only implemented by IBM Power9 CPUs, if you don't have one of them
+ you can probably disable this.
+
config PPC_MMU_NOHASH
def_bool y
depends on !PPC_STD_MMU
diff --git a/arch/powerpc/platforms/cell/spu_base.c b/arch/powerpc/platforms/cell/spu_base.c
index f7af74f83..3cbe38fad 100644
--- a/arch/powerpc/platforms/cell/spu_base.c
+++ b/arch/powerpc/platforms/cell/spu_base.c
@@ -24,7 +24,7 @@
#include <linux/interrupt.h>
#include <linux/list.h>
-#include <linux/module.h>
+#include <linux/init.h>
#include <linux/ptrace.h>
#include <linux/slab.h>
#include <linux/wait.h>
@@ -197,7 +197,7 @@ static int __spu_trap_data_map(struct spu *spu, unsigned long ea, u64 dsisr)
(REGION_ID(ea) != USER_REGION_ID)) {
spin_unlock(&spu->register_lock);
- ret = hash_page(ea, _PAGE_PRESENT, 0x300, dsisr);
+ ret = hash_page(ea, _PAGE_PRESENT | _PAGE_READ, 0x300, dsisr);
spin_lock(&spu->register_lock);
if (!ret) {
@@ -805,7 +805,4 @@ static int __init init_spu_base(void)
out:
return ret;
}
-module_init(init_spu_base);
-
-MODULE_LICENSE("GPL");
-MODULE_AUTHOR("Arnd Bergmann <arndb@de.ibm.com>");
+device_initcall(init_spu_base);
diff --git a/arch/powerpc/platforms/cell/spufs/coredump.c b/arch/powerpc/platforms/cell/spufs/coredump.c
index be6212ddb..85c85eb3e 100644
--- a/arch/powerpc/platforms/cell/spufs/coredump.c
+++ b/arch/powerpc/platforms/cell/spufs/coredump.c
@@ -137,6 +137,7 @@ static int spufs_arch_write_note(struct spu_context *ctx, int i,
char *name;
char fullname[80], *buf;
struct elf_note en;
+ size_t skip;
buf = (void *)get_zeroed_page(GFP_KERNEL);
if (!buf)
@@ -171,8 +172,8 @@ static int spufs_arch_write_note(struct spu_context *ctx, int i,
if (rc < 0)
goto out;
- if (!dump_skip(cprm,
- roundup(cprm->written - total + sz, 4) - cprm->written))
+ skip = roundup(cprm->pos - total + sz, 4) - cprm->pos;
+ if (!dump_skip(cprm, skip))
goto Eio;
out:
free_page((unsigned long)buf);
diff --git a/arch/powerpc/platforms/cell/spufs/fault.c b/arch/powerpc/platforms/cell/spufs/fault.c
index d98f845ac..e29e4d5af 100644
--- a/arch/powerpc/platforms/cell/spufs/fault.c
+++ b/arch/powerpc/platforms/cell/spufs/fault.c
@@ -141,8 +141,8 @@ int spufs_handle_class1(struct spu_context *ctx)
/* we must not hold the lock when entering copro_handle_mm_fault */
spu_release(ctx);
- access = (_PAGE_PRESENT | _PAGE_USER);
- access |= (dsisr & MFC_DSISR_ACCESS_PUT) ? _PAGE_RW : 0UL;
+ access = (_PAGE_PRESENT | _PAGE_READ);
+ access |= (dsisr & MFC_DSISR_ACCESS_PUT) ? _PAGE_WRITE : 0UL;
local_irq_save(flags);
ret = hash_page(ea, access, 0x300, dsisr);
local_irq_restore(flags);
diff --git a/arch/powerpc/platforms/cell/spufs/inode.c b/arch/powerpc/platforms/cell/spufs/inode.c
index 6ca5f0525..5be15cff7 100644
--- a/arch/powerpc/platforms/cell/spufs/inode.c
+++ b/arch/powerpc/platforms/cell/spufs/inode.c
@@ -238,7 +238,7 @@ const struct file_operations spufs_context_fops = {
.release = spufs_dir_close,
.llseek = dcache_dir_lseek,
.read = generic_read_dir,
- .iterate = dcache_readdir,
+ .iterate_shared = dcache_readdir,
.fsync = noop_fsync,
};
EXPORT_SYMBOL_GPL(spufs_context_fops);
diff --git a/arch/powerpc/platforms/powernv/eeh-powernv.c b/arch/powerpc/platforms/powernv/eeh-powernv.c
index 950b3e539..9226df11b 100644
--- a/arch/powerpc/platforms/powernv/eeh-powernv.c
+++ b/arch/powerpc/platforms/powernv/eeh-powernv.c
@@ -75,7 +75,7 @@ static int pnv_eeh_init(void)
* and P7IOC separately. So we should regard
* PE#0 as valid for PHB3 and P7IOC.
*/
- if (phb->ioda.reserved_pe != 0)
+ if (phb->ioda.reserved_pe_idx != 0)
eeh_add_flag(EEH_VALID_PE_ZERO);
break;
@@ -1009,8 +1009,9 @@ static int pnv_eeh_reset_vf_pe(struct eeh_pe *pe, int option)
static int pnv_eeh_reset(struct eeh_pe *pe, int option)
{
struct pci_controller *hose = pe->phb;
+ struct pnv_phb *phb;
struct pci_bus *bus;
- int ret;
+ int64_t rc;
/*
* For PHB reset, we always have complete reset. For those PEs whose
@@ -1026,45 +1027,39 @@ static int pnv_eeh_reset(struct eeh_pe *pe, int option)
* reset. The side effect is that EEH core has to clear the frozen
* state explicitly after BAR restore.
*/
- if (pe->type & EEH_PE_PHB) {
- ret = pnv_eeh_phb_reset(hose, option);
- } else {
- struct pnv_phb *phb;
- s64 rc;
+ if (pe->type & EEH_PE_PHB)
+ return pnv_eeh_phb_reset(hose, option);
- /*
- * The frozen PE might be caused by PAPR error injection
- * registers, which are expected to be cleared after hitting
- * frozen PE as stated in the hardware spec. Unfortunately,
- * that's not true on P7IOC. So we have to clear it manually
- * to avoid recursive EEH errors during recovery.
- */
- phb = hose->private_data;
- if (phb->model == PNV_PHB_MODEL_P7IOC &&
- (option == EEH_RESET_HOT ||
- option == EEH_RESET_FUNDAMENTAL)) {
- rc = opal_pci_reset(phb->opal_id,
- OPAL_RESET_PHB_ERROR,
- OPAL_ASSERT_RESET);
- if (rc != OPAL_SUCCESS) {
- pr_warn("%s: Failure %lld clearing "
- "error injection registers\n",
- __func__, rc);
- return -EIO;
- }
+ /*
+ * The frozen PE might be caused by PAPR error injection
+ * registers, which are expected to be cleared after hitting
+ * frozen PE as stated in the hardware spec. Unfortunately,
+ * that's not true on P7IOC. So we have to clear it manually
+ * to avoid recursive EEH errors during recovery.
+ */
+ phb = hose->private_data;
+ if (phb->model == PNV_PHB_MODEL_P7IOC &&
+ (option == EEH_RESET_HOT ||
+ option == EEH_RESET_FUNDAMENTAL)) {
+ rc = opal_pci_reset(phb->opal_id,
+ OPAL_RESET_PHB_ERROR,
+ OPAL_ASSERT_RESET);
+ if (rc != OPAL_SUCCESS) {
+ pr_warn("%s: Failure %lld clearing error injection registers\n",
+ __func__, rc);
+ return -EIO;
}
-
- bus = eeh_pe_bus_get(pe);
- if (pe->type & EEH_PE_VF)
- ret = pnv_eeh_reset_vf_pe(pe, option);
- else if (pci_is_root_bus(bus) ||
- pci_is_root_bus(bus->parent))
- ret = pnv_eeh_root_reset(hose, option);
- else
- ret = pnv_eeh_bridge_reset(bus->self, option);
}
- return ret;
+ bus = eeh_pe_bus_get(pe);
+ if (pe->type & EEH_PE_VF)
+ return pnv_eeh_reset_vf_pe(pe, option);
+
+ if (pci_is_root_bus(bus) ||
+ pci_is_root_bus(bus->parent))
+ return pnv_eeh_root_reset(hose, option);
+
+ return pnv_eeh_bridge_reset(bus->self, option);
}
/**
diff --git a/arch/powerpc/platforms/powernv/npu-dma.c b/arch/powerpc/platforms/powernv/npu-dma.c
index 7229acd9b..0459e100b 100644
--- a/arch/powerpc/platforms/powernv/npu-dma.c
+++ b/arch/powerpc/platforms/powernv/npu-dma.c
@@ -12,6 +12,7 @@
#include <linux/export.h>
#include <linux/pci.h>
#include <linux/memblock.h>
+#include <linux/iommu.h>
#include <asm/iommu.h>
#include <asm/pnv-pci.h>
@@ -25,8 +26,6 @@
* Other types of TCE cache invalidation are not functional in the
* hardware.
*/
-#define TCE_KILL_INVAL_ALL PPC_BIT(0)
-
static struct pci_dev *get_pci_dev(struct device_node *dn)
{
return PCI_DN(dn)->pcidev;
@@ -138,22 +137,17 @@ static struct pnv_ioda_pe *get_gpu_pci_dev_and_pe(struct pnv_ioda_pe *npe,
struct pnv_ioda_pe *pe;
struct pci_dn *pdn;
- if (npe->flags & PNV_IODA_PE_PEER) {
- pe = npe->peers[0];
- pdev = pe->pdev;
- } else {
- pdev = pnv_pci_get_gpu_dev(npe->pdev);
- if (!pdev)
- return NULL;
+ pdev = pnv_pci_get_gpu_dev(npe->pdev);
+ if (!pdev)
+ return NULL;
- pdn = pci_get_pdn(pdev);
- if (WARN_ON(!pdn || pdn->pe_number == IODA_INVALID_PE))
- return NULL;
+ pdn = pci_get_pdn(pdev);
+ if (WARN_ON(!pdn || pdn->pe_number == IODA_INVALID_PE))
+ return NULL;
- hose = pci_bus_to_host(pdev->bus);
- phb = hose->private_data;
- pe = &phb->ioda.pe_array[pdn->pe_number];
- }
+ hose = pci_bus_to_host(pdev->bus);
+ phb = hose->private_data;
+ pe = &phb->ioda.pe_array[pdn->pe_number];
if (gpdev)
*gpdev = pdev;
@@ -161,92 +155,70 @@ static struct pnv_ioda_pe *get_gpu_pci_dev_and_pe(struct pnv_ioda_pe *npe,
return pe;
}
-void pnv_npu_tce_invalidate_entire(struct pnv_ioda_pe *npe)
+long pnv_npu_set_window(struct pnv_ioda_pe *npe, int num,
+ struct iommu_table *tbl)
{
struct pnv_phb *phb = npe->phb;
+ int64_t rc;
+ const unsigned long size = tbl->it_indirect_levels ?
+ tbl->it_level_size : tbl->it_size;
+ const __u64 start_addr = tbl->it_offset << tbl->it_page_shift;
+ const __u64 win_size = tbl->it_size << tbl->it_page_shift;
+
+ pe_info(npe, "Setting up window %llx..%llx pg=%lx\n",
+ start_addr, start_addr + win_size - 1,
+ IOMMU_PAGE_SIZE(tbl));
+
+ rc = opal_pci_map_pe_dma_window(phb->opal_id,
+ npe->pe_number,
+ npe->pe_number,
+ tbl->it_indirect_levels + 1,
+ __pa(tbl->it_base),
+ size << 3,
+ IOMMU_PAGE_SIZE(tbl));
+ if (rc) {
+ pe_err(npe, "Failed to configure TCE table, err %lld\n", rc);
+ return rc;
+ }
+ pnv_pci_ioda2_tce_invalidate_entire(phb, false);
- if (WARN_ON(phb->type != PNV_PHB_NPU ||
- !phb->ioda.tce_inval_reg ||
- !(npe->flags & PNV_IODA_PE_DEV)))
- return;
+ /* Add the table to the list so its TCE cache will get invalidated */
+ pnv_pci_link_table_and_group(phb->hose->node, num,
+ tbl, &npe->table_group);
- mb(); /* Ensure previous TCE table stores are visible */
- __raw_writeq(cpu_to_be64(TCE_KILL_INVAL_ALL),
- phb->ioda.tce_inval_reg);
+ return 0;
}
-void pnv_npu_tce_invalidate(struct pnv_ioda_pe *npe,
- struct iommu_table *tbl,
- unsigned long index,
- unsigned long npages,
- bool rm)
+long pnv_npu_unset_window(struct pnv_ioda_pe *npe, int num)
{
struct pnv_phb *phb = npe->phb;
+ int64_t rc;
- /* We can only invalidate the whole cache on NPU */
- unsigned long val = TCE_KILL_INVAL_ALL;
-
- if (WARN_ON(phb->type != PNV_PHB_NPU ||
- !phb->ioda.tce_inval_reg ||
- !(npe->flags & PNV_IODA_PE_DEV)))
- return;
-
- mb(); /* Ensure previous TCE table stores are visible */
- if (rm)
- __raw_rm_writeq(cpu_to_be64(val),
- (__be64 __iomem *) phb->ioda.tce_inval_reg_phys);
- else
- __raw_writeq(cpu_to_be64(val),
- phb->ioda.tce_inval_reg);
-}
-
-void pnv_npu_init_dma_pe(struct pnv_ioda_pe *npe)
-{
- struct pnv_ioda_pe *gpe;
- struct pci_dev *gpdev;
- int i, avail = -1;
-
- if (!npe->pdev || !(npe->flags & PNV_IODA_PE_DEV))
- return;
-
- gpe = get_gpu_pci_dev_and_pe(npe, &gpdev);
- if (!gpe)
- return;
-
- for (i = 0; i < PNV_IODA_MAX_PEER_PES; i++) {
- /* Nothing to do if the PE is already connected. */
- if (gpe->peers[i] == npe)
- return;
+ pe_info(npe, "Removing DMA window\n");
- if (!gpe->peers[i])
- avail = i;
+ rc = opal_pci_map_pe_dma_window(phb->opal_id, npe->pe_number,
+ npe->pe_number,
+ 0/* levels */, 0/* table address */,
+ 0/* table size */, 0/* page size */);
+ if (rc) {
+ pe_err(npe, "Unmapping failed, ret = %lld\n", rc);
+ return rc;
}
+ pnv_pci_ioda2_tce_invalidate_entire(phb, false);
- if (WARN_ON(avail < 0))
- return;
-
- gpe->peers[avail] = npe;
- gpe->flags |= PNV_IODA_PE_PEER;
+ pnv_pci_unlink_table_and_group(npe->table_group.tables[num],
+ &npe->table_group);
- /*
- * We assume that the NPU devices only have a single peer PE
- * (the GPU PCIe device PE).
- */
- npe->peers[0] = gpe;
- npe->flags |= PNV_IODA_PE_PEER;
+ return 0;
}
/*
- * For the NPU we want to point the TCE table at the same table as the
- * real PCI device.
+ * Enables 32 bit DMA on NPU.
*/
-static void pnv_npu_disable_bypass(struct pnv_ioda_pe *npe)
+static void pnv_npu_dma_set_32(struct pnv_ioda_pe *npe)
{
- struct pnv_phb *phb = npe->phb;
struct pci_dev *gpdev;
struct pnv_ioda_pe *gpe;
- void *addr;
- unsigned int size;
int64_t rc;
/*
@@ -260,14 +232,7 @@ static void pnv_npu_disable_bypass(struct pnv_ioda_pe *npe)
if (!gpe)
return;
- addr = (void *)gpe->table_group.tables[0]->it_base;
- size = gpe->table_group.tables[0]->it_size << 3;
- rc = opal_pci_map_pe_dma_window(phb->opal_id, npe->pe_number,
- npe->pe_number, 1, __pa(addr),
- size, 0x1000);
- if (rc != OPAL_SUCCESS)
- pr_warn("%s: Error %lld setting DMA window on PHB#%d-PE#%d\n",
- __func__, rc, phb->hose->global_number, npe->pe_number);
+ rc = pnv_npu_set_window(npe, 0, gpe->table_group.tables[0]);
/*
* We don't initialise npu_pe->tce32_table as we always use
@@ -277,72 +242,120 @@ static void pnv_npu_disable_bypass(struct pnv_ioda_pe *npe)
}
/*
- * Enable/disable bypass mode on the NPU. The NPU only supports one
+ * Enables bypass mode on the NPU. The NPU only supports one
* window per link, so bypass needs to be explicitly enabled or
* disabled. Unlike for a PHB3 bypass and non-bypass modes can't be
* active at the same time.
*/
-int pnv_npu_dma_set_bypass(struct pnv_ioda_pe *npe, bool enable)
+static int pnv_npu_dma_set_bypass(struct pnv_ioda_pe *npe)
{
struct pnv_phb *phb = npe->phb;
int64_t rc = 0;
+ phys_addr_t top = memblock_end_of_DRAM();
if (phb->type != PNV_PHB_NPU || !npe->pdev)
return -EINVAL;
- if (enable) {
- /* Enable the bypass window */
- phys_addr_t top = memblock_end_of_DRAM();
-
- npe->tce_bypass_base = 0;
- top = roundup_pow_of_two(top);
- dev_info(&npe->pdev->dev, "Enabling bypass for PE %d\n",
- npe->pe_number);
- rc = opal_pci_map_pe_dma_window_real(phb->opal_id,
- npe->pe_number, npe->pe_number,
- npe->tce_bypass_base, top);
- } else {
- /*
- * Disable the bypass window by replacing it with the
- * TCE32 window.
- */
- pnv_npu_disable_bypass(npe);
- }
+ rc = pnv_npu_unset_window(npe, 0);
+ if (rc != OPAL_SUCCESS)
+ return rc;
+
+ /* Enable the bypass window */
+
+ top = roundup_pow_of_two(top);
+ dev_info(&npe->pdev->dev, "Enabling bypass for PE %d\n",
+ npe->pe_number);
+ rc = opal_pci_map_pe_dma_window_real(phb->opal_id,
+ npe->pe_number, npe->pe_number,
+ 0 /* bypass base */, top);
+
+ if (rc == OPAL_SUCCESS)
+ pnv_pci_ioda2_tce_invalidate_entire(phb, false);
return rc;
}
-int pnv_npu_dma_set_mask(struct pci_dev *npdev, u64 dma_mask)
+void pnv_npu_try_dma_set_bypass(struct pci_dev *gpdev, bool bypass)
{
- struct pci_controller *hose = pci_bus_to_host(npdev->bus);
- struct pnv_phb *phb = hose->private_data;
- struct pci_dn *pdn = pci_get_pdn(npdev);
- struct pnv_ioda_pe *npe, *gpe;
- struct pci_dev *gpdev;
- uint64_t top;
- bool bypass = false;
+ int i;
+ struct pnv_phb *phb;
+ struct pci_dn *pdn;
+ struct pnv_ioda_pe *npe;
+ struct pci_dev *npdev;
- if (WARN_ON(!pdn || pdn->pe_number == IODA_INVALID_PE))
- return -ENXIO;
+ for (i = 0; ; ++i) {
+ npdev = pnv_pci_get_npu_dev(gpdev, i);
- /* We only do bypass if it's enabled on the linked device */
- npe = &phb->ioda.pe_array[pdn->pe_number];
- gpe = get_gpu_pci_dev_and_pe(npe, &gpdev);
- if (!gpe)
- return -ENODEV;
+ if (!npdev)
+ break;
+
+ pdn = pci_get_pdn(npdev);
+ if (WARN_ON(!pdn || pdn->pe_number == IODA_INVALID_PE))
+ return;
+
+ phb = pci_bus_to_host(npdev->bus)->private_data;
+
+ /* We only do bypass if it's enabled on the linked device */
+ npe = &phb->ioda.pe_array[pdn->pe_number];
+
+ if (bypass) {
+ dev_info(&npdev->dev,
+ "Using 64-bit DMA iommu bypass\n");
+ pnv_npu_dma_set_bypass(npe);
+ } else {
+ dev_info(&npdev->dev, "Using 32-bit DMA via iommu\n");
+ pnv_npu_dma_set_32(npe);
+ }
+ }
+}
- if (gpe->tce_bypass_enabled) {
- top = gpe->tce_bypass_base + memblock_end_of_DRAM() - 1;
- bypass = (dma_mask >= top);
+/* Switch ownership from platform code to external user (e.g. VFIO) */
+void pnv_npu_take_ownership(struct pnv_ioda_pe *npe)
+{
+ struct pnv_phb *phb = npe->phb;
+ int64_t rc;
+
+ /*
+ * Note: NPU has just a single TVE in the hardware which means that
+ * while used by the kernel, it can have either 32bit window or
+ * DMA bypass but never both. So we deconfigure 32bit window only
+ * if it was enabled at the moment of ownership change.
+ */
+ if (npe->table_group.tables[0]) {
+ pnv_npu_unset_window(npe, 0);
+ return;
}
- if (bypass)
- dev_info(&npdev->dev, "Using 64-bit DMA iommu bypass\n");
- else
- dev_info(&npdev->dev, "Using 32-bit DMA via iommu\n");
+ /* Disable bypass */
+ rc = opal_pci_map_pe_dma_window_real(phb->opal_id,
+ npe->pe_number, npe->pe_number,
+ 0 /* bypass base */, 0);
+ if (rc) {
+ pe_err(npe, "Failed to disable bypass, err %lld\n", rc);
+ return;
+ }
+ pnv_pci_ioda2_tce_invalidate_entire(npe->phb, false);
+}
- pnv_npu_dma_set_bypass(npe, bypass);
- *npdev->dev.dma_mask = dma_mask;
+struct pnv_ioda_pe *pnv_pci_npu_setup_iommu(struct pnv_ioda_pe *npe)
+{
+ struct pnv_phb *phb = npe->phb;
+ struct pci_bus *pbus = phb->hose->bus;
+ struct pci_dev *npdev, *gpdev = NULL, *gptmp;
+ struct pnv_ioda_pe *gpe = get_gpu_pci_dev_and_pe(npe, &gpdev);
- return 0;
+ if (!gpe || !gpdev)
+ return NULL;
+
+ list_for_each_entry(npdev, &pbus->devices, bus_list) {
+ gptmp = pnv_pci_get_gpu_dev(npdev);
+
+ if (gptmp != gpdev)
+ continue;
+
+ pe_info(gpe, "Attached NPU %s\n", dev_name(&npdev->dev));
+ iommu_group_add_device(gpe->table_group.group, &npdev->dev);
+ }
+
+ return gpe;
}
diff --git a/arch/powerpc/platforms/powernv/opal-hmi.c b/arch/powerpc/platforms/powernv/opal-hmi.c
index d000f4e21..c0a8201cb 100644
--- a/arch/powerpc/platforms/powernv/opal-hmi.c
+++ b/arch/powerpc/platforms/powernv/opal-hmi.c
@@ -150,15 +150,17 @@ static void print_nx_checkstop_reason(const char *level,
static void print_checkstop_reason(const char *level,
struct OpalHMIEvent *hmi_evt)
{
- switch (hmi_evt->u.xstop_error.xstop_type) {
+ uint8_t type = hmi_evt->u.xstop_error.xstop_type;
+ switch (type) {
case CHECKSTOP_TYPE_CORE:
print_core_checkstop_reason(level, hmi_evt);
break;
case CHECKSTOP_TYPE_NX:
print_nx_checkstop_reason(level, hmi_evt);
break;
- case CHECKSTOP_TYPE_UNKNOWN:
- printk("%s Unknown Malfunction Alert.\n", level);
+ default:
+ printk("%s Unknown Malfunction Alert of type %d\n",
+ level, type);
break;
}
}
diff --git a/arch/powerpc/platforms/powernv/pci-ioda.c b/arch/powerpc/platforms/powernv/pci-ioda.c
index c5baaf3cc..3a5ea8236 100644
--- a/arch/powerpc/platforms/powernv/pci-ioda.c
+++ b/arch/powerpc/platforms/powernv/pci-ioda.c
@@ -48,15 +48,16 @@
#include "powernv.h"
#include "pci.h"
-/* 256M DMA window, 4K TCE pages, 8 bytes TCE */
-#define TCE32_TABLE_SIZE ((0x10000000 / 0x1000) * 8)
+#define PNV_IODA1_M64_NUM 16 /* Number of M64 BARs */
+#define PNV_IODA1_M64_SEGS 8 /* Segments per M64 BAR */
+#define PNV_IODA1_DMA32_SEGSIZE 0x10000000
#define POWERNV_IOMMU_DEFAULT_LEVELS 1
#define POWERNV_IOMMU_MAX_LEVELS 5
static void pnv_pci_ioda2_table_free_pages(struct iommu_table *tbl);
-static void pe_level_printk(const struct pnv_ioda_pe *pe, const char *level,
+void pe_level_printk(const struct pnv_ioda_pe *pe, const char *level,
const char *fmt, ...)
{
struct va_format vaf;
@@ -87,13 +88,6 @@ static void pe_level_printk(const struct pnv_ioda_pe *pe, const char *level,
va_end(args);
}
-#define pe_err(pe, fmt, ...) \
- pe_level_printk(pe, KERN_ERR, fmt, ##__VA_ARGS__)
-#define pe_warn(pe, fmt, ...) \
- pe_level_printk(pe, KERN_WARNING, fmt, ##__VA_ARGS__)
-#define pe_info(pe, fmt, ...) \
- pe_level_printk(pe, KERN_INFO, fmt, ##__VA_ARGS__)
-
static bool pnv_iommu_bypass_disabled __read_mostly;
static int __init iommu_setup(char *str)
@@ -122,9 +116,17 @@ static inline bool pnv_pci_is_mem_pref_64(unsigned long flags)
(IORESOURCE_MEM_64 | IORESOURCE_PREFETCH));
}
+static struct pnv_ioda_pe *pnv_ioda_init_pe(struct pnv_phb *phb, int pe_no)
+{
+ phb->ioda.pe_array[pe_no].phb = phb;
+ phb->ioda.pe_array[pe_no].pe_number = pe_no;
+
+ return &phb->ioda.pe_array[pe_no];
+}
+
static void pnv_ioda_reserve_pe(struct pnv_phb *phb, int pe_no)
{
- if (!(pe_no >= 0 && pe_no < phb->ioda.total_pe)) {
+ if (!(pe_no >= 0 && pe_no < phb->ioda.total_pe_num)) {
pr_warn("%s: Invalid PE %d on PHB#%x\n",
__func__, pe_no, phb->hose->global_number);
return;
@@ -134,32 +136,31 @@ static void pnv_ioda_reserve_pe(struct pnv_phb *phb, int pe_no)
pr_debug("%s: PE %d was reserved on PHB#%x\n",
__func__, pe_no, phb->hose->global_number);
- phb->ioda.pe_array[pe_no].phb = phb;
- phb->ioda.pe_array[pe_no].pe_number = pe_no;
+ pnv_ioda_init_pe(phb, pe_no);
}
-static int pnv_ioda_alloc_pe(struct pnv_phb *phb)
+static struct pnv_ioda_pe *pnv_ioda_alloc_pe(struct pnv_phb *phb)
{
unsigned long pe;
do {
pe = find_next_zero_bit(phb->ioda.pe_alloc,
- phb->ioda.total_pe, 0);
- if (pe >= phb->ioda.total_pe)
- return IODA_INVALID_PE;
+ phb->ioda.total_pe_num, 0);
+ if (pe >= phb->ioda.total_pe_num)
+ return NULL;
} while(test_and_set_bit(pe, phb->ioda.pe_alloc));
- phb->ioda.pe_array[pe].phb = phb;
- phb->ioda.pe_array[pe].pe_number = pe;
- return pe;
+ return pnv_ioda_init_pe(phb, pe);
}
-static void pnv_ioda_free_pe(struct pnv_phb *phb, int pe)
+static void pnv_ioda_free_pe(struct pnv_ioda_pe *pe)
{
- WARN_ON(phb->ioda.pe_array[pe].pdev);
+ struct pnv_phb *phb = pe->phb;
+
+ WARN_ON(pe->pdev);
- memset(&phb->ioda.pe_array[pe], 0, sizeof(struct pnv_ioda_pe));
- clear_bit(pe, phb->ioda.pe_alloc);
+ memset(pe, 0, sizeof(struct pnv_ioda_pe));
+ clear_bit(pe->pe_number, phb->ioda.pe_alloc);
}
/* The default M64 BAR is shared by all PEs */
@@ -199,13 +200,13 @@ static int pnv_ioda2_init_m64(struct pnv_phb *phb)
* expected to be 0 or last one of PE capabicity.
*/
r = &phb->hose->mem_resources[1];
- if (phb->ioda.reserved_pe == 0)
+ if (phb->ioda.reserved_pe_idx == 0)
r->start += phb->ioda.m64_segsize;
- else if (phb->ioda.reserved_pe == (phb->ioda.total_pe - 1))
+ else if (phb->ioda.reserved_pe_idx == (phb->ioda.total_pe_num - 1))
r->end -= phb->ioda.m64_segsize;
else
pr_warn(" Cannot strip M64 segment for reserved PE#%d\n",
- phb->ioda.reserved_pe);
+ phb->ioda.reserved_pe_idx);
return 0;
@@ -219,7 +220,7 @@ fail:
return -EIO;
}
-static void pnv_ioda2_reserve_dev_m64_pe(struct pci_dev *pdev,
+static void pnv_ioda_reserve_dev_m64_pe(struct pci_dev *pdev,
unsigned long *pe_bitmap)
{
struct pci_controller *hose = pci_bus_to_host(pdev->bus);
@@ -246,22 +247,80 @@ static void pnv_ioda2_reserve_dev_m64_pe(struct pci_dev *pdev,
}
}
-static void pnv_ioda2_reserve_m64_pe(struct pci_bus *bus,
- unsigned long *pe_bitmap,
- bool all)
+static int pnv_ioda1_init_m64(struct pnv_phb *phb)
+{
+ struct resource *r;
+ int index;
+
+ /*
+ * There are 16 M64 BARs, each of which has 8 segments. So
+ * there are as many M64 segments as the maximum number of
+ * PEs, which is 128.
+ */
+ for (index = 0; index < PNV_IODA1_M64_NUM; index++) {
+ unsigned long base, segsz = phb->ioda.m64_segsize;
+ int64_t rc;
+
+ base = phb->ioda.m64_base +
+ index * PNV_IODA1_M64_SEGS * segsz;
+ rc = opal_pci_set_phb_mem_window(phb->opal_id,
+ OPAL_M64_WINDOW_TYPE, index, base, 0,
+ PNV_IODA1_M64_SEGS * segsz);
+ if (rc != OPAL_SUCCESS) {
+ pr_warn(" Error %lld setting M64 PHB#%d-BAR#%d\n",
+ rc, phb->hose->global_number, index);
+ goto fail;
+ }
+
+ rc = opal_pci_phb_mmio_enable(phb->opal_id,
+ OPAL_M64_WINDOW_TYPE, index,
+ OPAL_ENABLE_M64_SPLIT);
+ if (rc != OPAL_SUCCESS) {
+ pr_warn(" Error %lld enabling M64 PHB#%d-BAR#%d\n",
+ rc, phb->hose->global_number, index);
+ goto fail;
+ }
+ }
+
+ /*
+ * Exclude the segment used by the reserved PE, which
+ * is expected to be 0 or last supported PE#.
+ */
+ r = &phb->hose->mem_resources[1];
+ if (phb->ioda.reserved_pe_idx == 0)
+ r->start += phb->ioda.m64_segsize;
+ else if (phb->ioda.reserved_pe_idx == (phb->ioda.total_pe_num - 1))
+ r->end -= phb->ioda.m64_segsize;
+ else
+ WARN(1, "Wrong reserved PE#%d on PHB#%d\n",
+ phb->ioda.reserved_pe_idx, phb->hose->global_number);
+
+ return 0;
+
+fail:
+ for ( ; index >= 0; index--)
+ opal_pci_phb_mmio_enable(phb->opal_id,
+ OPAL_M64_WINDOW_TYPE, index, OPAL_DISABLE_M64);
+
+ return -EIO;
+}
+
+static void pnv_ioda_reserve_m64_pe(struct pci_bus *bus,
+ unsigned long *pe_bitmap,
+ bool all)
{
struct pci_dev *pdev;
list_for_each_entry(pdev, &bus->devices, bus_list) {
- pnv_ioda2_reserve_dev_m64_pe(pdev, pe_bitmap);
+ pnv_ioda_reserve_dev_m64_pe(pdev, pe_bitmap);
if (all && pdev->subordinate)
- pnv_ioda2_reserve_m64_pe(pdev->subordinate,
- pe_bitmap, all);
+ pnv_ioda_reserve_m64_pe(pdev->subordinate,
+ pe_bitmap, all);
}
}
-static int pnv_ioda2_pick_m64_pe(struct pci_bus *bus, bool all)
+static struct pnv_ioda_pe *pnv_ioda_pick_m64_pe(struct pci_bus *bus, bool all)
{
struct pci_controller *hose = pci_bus_to_host(bus);
struct pnv_phb *phb = hose->private_data;
@@ -271,28 +330,28 @@ static int pnv_ioda2_pick_m64_pe(struct pci_bus *bus, bool all)
/* Root bus shouldn't use M64 */
if (pci_is_root_bus(bus))
- return IODA_INVALID_PE;
+ return NULL;
/* Allocate bitmap */
- size = _ALIGN_UP(phb->ioda.total_pe / 8, sizeof(unsigned long));
+ size = _ALIGN_UP(phb->ioda.total_pe_num / 8, sizeof(unsigned long));
pe_alloc = kzalloc(size, GFP_KERNEL);
if (!pe_alloc) {
pr_warn("%s: Out of memory !\n",
__func__);
- return IODA_INVALID_PE;
+ return NULL;
}
/* Figure out reserved PE numbers by the PE */
- pnv_ioda2_reserve_m64_pe(bus, pe_alloc, all);
+ pnv_ioda_reserve_m64_pe(bus, pe_alloc, all);
/*
* the current bus might not own M64 window and that's all
* contributed by its child buses. For the case, we needn't
* pick M64 dependent PE#.
*/
- if (bitmap_empty(pe_alloc, phb->ioda.total_pe)) {
+ if (bitmap_empty(pe_alloc, phb->ioda.total_pe_num)) {
kfree(pe_alloc);
- return IODA_INVALID_PE;
+ return NULL;
}
/*
@@ -301,10 +360,11 @@ static int pnv_ioda2_pick_m64_pe(struct pci_bus *bus, bool all)
*/
master_pe = NULL;
i = -1;
- while ((i = find_next_bit(pe_alloc, phb->ioda.total_pe, i + 1)) <
- phb->ioda.total_pe) {
+ while ((i = find_next_bit(pe_alloc, phb->ioda.total_pe_num, i + 1)) <
+ phb->ioda.total_pe_num) {
pe = &phb->ioda.pe_array[i];
+ phb->ioda.m64_segmap[pe->pe_number] = pe->pe_number;
if (!master_pe) {
pe->flags |= PNV_IODA_PE_MASTER;
INIT_LIST_HEAD(&pe->slaves);
@@ -314,10 +374,30 @@ static int pnv_ioda2_pick_m64_pe(struct pci_bus *bus, bool all)
pe->master = master_pe;
list_add_tail(&pe->list, &master_pe->slaves);
}
+
+ /*
+ * P7IOC supports M64DT, which helps mapping M64 segment
+ * to one particular PE#. However, PHB3 has fixed mapping
+ * between M64 segment and PE#. In order to have same logic
+ * for P7IOC and PHB3, we enforce fixed mapping between M64
+ * segment and PE# on P7IOC.
+ */
+ if (phb->type == PNV_PHB_IODA1) {
+ int64_t rc;
+
+ rc = opal_pci_map_pe_mmio_window(phb->opal_id,
+ pe->pe_number, OPAL_M64_WINDOW_TYPE,
+ pe->pe_number / PNV_IODA1_M64_SEGS,
+ pe->pe_number % PNV_IODA1_M64_SEGS);
+ if (rc != OPAL_SUCCESS)
+ pr_warn("%s: Error %lld mapping M64 for PHB#%d-PE#%d\n",
+ __func__, rc, phb->hose->global_number,
+ pe->pe_number);
+ }
}
kfree(pe_alloc);
- return master_pe->pe_number;
+ return master_pe;
}
static void __init pnv_ioda_parse_m64_window(struct pnv_phb *phb)
@@ -328,8 +408,7 @@ static void __init pnv_ioda_parse_m64_window(struct pnv_phb *phb)
const u32 *r;
u64 pci_addr;
- /* FIXME: Support M64 for P7IOC */
- if (phb->type != PNV_PHB_IODA2) {
+ if (phb->type != PNV_PHB_IODA1 && phb->type != PNV_PHB_IODA2) {
pr_info(" Not support M64 window\n");
return;
}
@@ -355,7 +434,7 @@ static void __init pnv_ioda_parse_m64_window(struct pnv_phb *phb)
hose->mem_offset[1] = res->start - pci_addr;
phb->ioda.m64_size = resource_size(res);
- phb->ioda.m64_segsize = phb->ioda.m64_size / phb->ioda.total_pe;
+ phb->ioda.m64_segsize = phb->ioda.m64_size / phb->ioda.total_pe_num;
phb->ioda.m64_base = pci_addr;
pr_info(" MEM64 0x%016llx..0x%016llx -> 0x%016llx\n",
@@ -363,9 +442,12 @@ static void __init pnv_ioda_parse_m64_window(struct pnv_phb *phb)
/* Use last M64 BAR to cover M64 window */
phb->ioda.m64_bar_idx = 15;
- phb->init_m64 = pnv_ioda2_init_m64;
- phb->reserve_m64_pe = pnv_ioda2_reserve_m64_pe;
- phb->pick_m64_pe = pnv_ioda2_pick_m64_pe;
+ if (phb->type == PNV_PHB_IODA1)
+ phb->init_m64 = pnv_ioda1_init_m64;
+ else
+ phb->init_m64 = pnv_ioda2_init_m64;
+ phb->reserve_m64_pe = pnv_ioda_reserve_m64_pe;
+ phb->pick_m64_pe = pnv_ioda_pick_m64_pe;
}
static void pnv_ioda_freeze_pe(struct pnv_phb *phb, int pe_no)
@@ -456,7 +538,7 @@ static int pnv_ioda_get_pe_state(struct pnv_phb *phb, int pe_no)
s64 rc;
/* Sanity check on PE number */
- if (pe_no < 0 || pe_no >= phb->ioda.total_pe)
+ if (pe_no < 0 || pe_no >= phb->ioda.total_pe_num)
return OPAL_EEH_STOPPED_PERM_UNAVAIL;
/*
@@ -808,44 +890,6 @@ out:
return 0;
}
-static void pnv_ioda_link_pe_by_weight(struct pnv_phb *phb,
- struct pnv_ioda_pe *pe)
-{
- struct pnv_ioda_pe *lpe;
-
- list_for_each_entry(lpe, &phb->ioda.pe_dma_list, dma_link) {
- if (lpe->dma_weight < pe->dma_weight) {
- list_add_tail(&pe->dma_link, &lpe->dma_link);
- return;
- }
- }
- list_add_tail(&pe->dma_link, &phb->ioda.pe_dma_list);
-}
-
-static unsigned int pnv_ioda_dma_weight(struct pci_dev *dev)
-{
- /* This is quite simplistic. The "base" weight of a device
- * is 10. 0 means no DMA is to be accounted for it.
- */
-
- /* If it's a bridge, no DMA */
- if (dev->hdr_type != PCI_HEADER_TYPE_NORMAL)
- return 0;
-
- /* Reduce the weight of slow USB controllers */
- if (dev->class == PCI_CLASS_SERIAL_USB_UHCI ||
- dev->class == PCI_CLASS_SERIAL_USB_OHCI ||
- dev->class == PCI_CLASS_SERIAL_USB_EHCI)
- return 3;
-
- /* Increase the weight of RAID (includes Obsidian) */
- if ((dev->class >> 8) == PCI_CLASS_STORAGE_RAID)
- return 15;
-
- /* Default */
- return 10;
-}
-
#ifdef CONFIG_PCI_IOV
static int pnv_pci_vf_resource_shift(struct pci_dev *dev, int offset)
{
@@ -919,7 +963,6 @@ static struct pnv_ioda_pe *pnv_ioda_setup_dev_PE(struct pci_dev *dev)
struct pnv_phb *phb = hose->private_data;
struct pci_dn *pdn = pci_get_pdn(dev);
struct pnv_ioda_pe *pe;
- int pe_num;
if (!pdn) {
pr_err("%s: Device tree node not associated properly\n",
@@ -929,8 +972,8 @@ static struct pnv_ioda_pe *pnv_ioda_setup_dev_PE(struct pci_dev *dev)
if (pdn->pe_number != IODA_INVALID_PE)
return NULL;
- pe_num = pnv_ioda_alloc_pe(phb);
- if (pe_num == IODA_INVALID_PE) {
+ pe = pnv_ioda_alloc_pe(phb);
+ if (!pe) {
pr_warning("%s: Not enough PE# available, disabling device\n",
pci_name(dev));
return NULL;
@@ -943,14 +986,12 @@ static struct pnv_ioda_pe *pnv_ioda_setup_dev_PE(struct pci_dev *dev)
*
* At some point we want to remove the PDN completely anyways
*/
- pe = &phb->ioda.pe_array[pe_num];
pci_dev_get(dev);
pdn->pcidev = dev;
- pdn->pe_number = pe_num;
+ pdn->pe_number = pe->pe_number;
pe->flags = PNV_IODA_PE_DEV;
pe->pdev = dev;
pe->pbus = NULL;
- pe->tce32_seg = -1;
pe->mve_number = -1;
pe->rid = dev->bus->number << 8 | pdn->devfn;
@@ -958,23 +999,15 @@ static struct pnv_ioda_pe *pnv_ioda_setup_dev_PE(struct pci_dev *dev)
if (pnv_ioda_configure_pe(phb, pe)) {
/* XXX What do we do here ? */
- if (pe_num)
- pnv_ioda_free_pe(phb, pe_num);
+ pnv_ioda_free_pe(pe);
pdn->pe_number = IODA_INVALID_PE;
pe->pdev = NULL;
pci_dev_put(dev);
return NULL;
}
- /* Assign a DMA weight to the device */
- pe->dma_weight = pnv_ioda_dma_weight(dev);
- if (pe->dma_weight != 0) {
- phb->ioda.dma_weight += pe->dma_weight;
- phb->ioda.dma_pe_count++;
- }
-
- /* Link the PE */
- pnv_ioda_link_pe_by_weight(phb, pe);
+ /* Put PE to the list */
+ list_add_tail(&pe->list, &phb->ioda.pe_list);
return pe;
}
@@ -993,7 +1026,6 @@ static void pnv_ioda_setup_same_PE(struct pci_bus *bus, struct pnv_ioda_pe *pe)
}
pdn->pcidev = dev;
pdn->pe_number = pe->pe_number;
- pe->dma_weight += pnv_ioda_dma_weight(dev);
if ((pe->flags & PNV_IODA_PE_BUS_ALL) && dev->subordinate)
pnv_ioda_setup_same_PE(dev->subordinate, pe);
}
@@ -1005,49 +1037,44 @@ static void pnv_ioda_setup_same_PE(struct pci_bus *bus, struct pnv_ioda_pe *pe)
* subordinate PCI devices and buses. The second type of PE is normally
* orgiriated by PCIe-to-PCI bridge or PLX switch downstream ports.
*/
-static void pnv_ioda_setup_bus_PE(struct pci_bus *bus, bool all)
+static struct pnv_ioda_pe *pnv_ioda_setup_bus_PE(struct pci_bus *bus, bool all)
{
struct pci_controller *hose = pci_bus_to_host(bus);
struct pnv_phb *phb = hose->private_data;
- struct pnv_ioda_pe *pe;
- int pe_num = IODA_INVALID_PE;
+ struct pnv_ioda_pe *pe = NULL;
/* Check if PE is determined by M64 */
if (phb->pick_m64_pe)
- pe_num = phb->pick_m64_pe(bus, all);
+ pe = phb->pick_m64_pe(bus, all);
/* The PE number isn't pinned by M64 */
- if (pe_num == IODA_INVALID_PE)
- pe_num = pnv_ioda_alloc_pe(phb);
+ if (!pe)
+ pe = pnv_ioda_alloc_pe(phb);
- if (pe_num == IODA_INVALID_PE) {
+ if (!pe) {
pr_warning("%s: Not enough PE# available for PCI bus %04x:%02x\n",
__func__, pci_domain_nr(bus), bus->number);
- return;
+ return NULL;
}
- pe = &phb->ioda.pe_array[pe_num];
pe->flags |= (all ? PNV_IODA_PE_BUS_ALL : PNV_IODA_PE_BUS);
pe->pbus = bus;
pe->pdev = NULL;
- pe->tce32_seg = -1;
pe->mve_number = -1;
pe->rid = bus->busn_res.start << 8;
- pe->dma_weight = 0;
if (all)
pe_info(pe, "Secondary bus %d..%d associated with PE#%d\n",
- bus->busn_res.start, bus->busn_res.end, pe_num);
+ bus->busn_res.start, bus->busn_res.end, pe->pe_number);
else
pe_info(pe, "Secondary bus %d associated with PE#%d\n",
- bus->busn_res.start, pe_num);
+ bus->busn_res.start, pe->pe_number);
if (pnv_ioda_configure_pe(phb, pe)) {
/* XXX What do we do here ? */
- if (pe_num)
- pnv_ioda_free_pe(phb, pe_num);
+ pnv_ioda_free_pe(pe);
pe->pbus = NULL;
- return;
+ return NULL;
}
/* Associate it with all child devices */
@@ -1056,16 +1083,7 @@ static void pnv_ioda_setup_bus_PE(struct pci_bus *bus, bool all)
/* Put PE to the list */
list_add_tail(&pe->list, &phb->ioda.pe_list);
- /* Account for one DMA PE if at least one DMA capable device exist
- * below the bridge
- */
- if (pe->dma_weight != 0) {
- phb->ioda.dma_weight += pe->dma_weight;
- phb->ioda.dma_pe_count++;
- }
-
- /* Link the PE */
- pnv_ioda_link_pe_by_weight(phb, pe);
+ return pe;
}
static struct pnv_ioda_pe *pnv_ioda_setup_npu_PE(struct pci_dev *npu_pdev)
@@ -1088,7 +1106,7 @@ static struct pnv_ioda_pe *pnv_ioda_setup_npu_PE(struct pci_dev *npu_pdev)
* same GPU get assigned the same PE.
*/
gpu_pdev = pnv_pci_get_gpu_dev(npu_pdev);
- for (pe_num = 0; pe_num < phb->ioda.total_pe; pe_num++) {
+ for (pe_num = 0; pe_num < phb->ioda.total_pe_num; pe_num++) {
pe = &phb->ioda.pe_array[pe_num];
if (!pe->pdev)
continue;
@@ -1106,7 +1124,6 @@ static struct pnv_ioda_pe *pnv_ioda_setup_npu_PE(struct pci_dev *npu_pdev)
rid = npu_pdev->bus->number << 8 | npu_pdn->devfn;
npu_pdn->pcidev = npu_pdev;
npu_pdn->pe_number = pe_num;
- pe->dma_weight += pnv_ioda_dma_weight(npu_pdev);
phb->ioda.pe_rmap[rid] = pe->pe_number;
/* Map the PE to this link */
@@ -1378,7 +1395,7 @@ static void pnv_ioda_release_vf_PE(struct pci_dev *pdev)
pnv_ioda_deconfigure_pe(phb, pe);
- pnv_ioda_free_pe(phb, pe->pe_number);
+ pnv_ioda_free_pe(pe);
}
}
@@ -1387,6 +1404,7 @@ void pnv_pci_sriov_disable(struct pci_dev *pdev)
struct pci_bus *bus;
struct pci_controller *hose;
struct pnv_phb *phb;
+ struct pnv_ioda_pe *pe;
struct pci_dn *pdn;
struct pci_sriov *iov;
u16 num_vfs, i;
@@ -1411,8 +1429,11 @@ void pnv_pci_sriov_disable(struct pci_dev *pdev)
/* Release PE numbers */
if (pdn->m64_single_mode) {
for (i = 0; i < num_vfs; i++) {
- if (pdn->pe_num_map[i] != IODA_INVALID_PE)
- pnv_ioda_free_pe(phb, pdn->pe_num_map[i]);
+ if (pdn->pe_num_map[i] == IODA_INVALID_PE)
+ continue;
+
+ pe = &phb->ioda.pe_array[pdn->pe_num_map[i]];
+ pnv_ioda_free_pe(pe);
}
} else
bitmap_clear(phb->ioda.pe_alloc, *pdn->pe_num_map, num_vfs);
@@ -1454,7 +1475,6 @@ static void pnv_ioda_setup_vf_PE(struct pci_dev *pdev, u16 num_vfs)
pe->flags = PNV_IODA_PE_VF;
pe->pbus = NULL;
pe->parent_dev = pdev;
- pe->tce32_seg = -1;
pe->mve_number = -1;
pe->rid = (pci_iov_virtfn_bus(pdev, vf_index) << 8) |
pci_iov_virtfn_devfn(pdev, vf_index);
@@ -1466,8 +1486,7 @@ static void pnv_ioda_setup_vf_PE(struct pci_dev *pdev, u16 num_vfs)
if (pnv_ioda_configure_pe(phb, pe)) {
/* XXX What do we do here ? */
- if (pe_num)
- pnv_ioda_free_pe(phb, pe_num);
+ pnv_ioda_free_pe(pe);
pe->pdev = NULL;
continue;
}
@@ -1486,6 +1505,7 @@ int pnv_pci_sriov_enable(struct pci_dev *pdev, u16 num_vfs)
struct pci_bus *bus;
struct pci_controller *hose;
struct pnv_phb *phb;
+ struct pnv_ioda_pe *pe;
struct pci_dn *pdn;
int ret;
u16 i;
@@ -1528,18 +1548,20 @@ int pnv_pci_sriov_enable(struct pci_dev *pdev, u16 num_vfs)
/* Calculate available PE for required VFs */
if (pdn->m64_single_mode) {
for (i = 0; i < num_vfs; i++) {
- pdn->pe_num_map[i] = pnv_ioda_alloc_pe(phb);
- if (pdn->pe_num_map[i] == IODA_INVALID_PE) {
+ pe = pnv_ioda_alloc_pe(phb);
+ if (!pe) {
ret = -EBUSY;
goto m64_failed;
}
+
+ pdn->pe_num_map[i] = pe->pe_number;
}
} else {
mutex_lock(&phb->ioda.pe_alloc_mutex);
*pdn->pe_num_map = bitmap_find_next_zero_area(
- phb->ioda.pe_alloc, phb->ioda.total_pe,
+ phb->ioda.pe_alloc, phb->ioda.total_pe_num,
0, num_vfs, 0);
- if (*pdn->pe_num_map >= phb->ioda.total_pe) {
+ if (*pdn->pe_num_map >= phb->ioda.total_pe_num) {
mutex_unlock(&phb->ioda.pe_alloc_mutex);
dev_info(&pdev->dev, "Failed to enable VF%d\n", num_vfs);
kfree(pdn->pe_num_map);
@@ -1577,8 +1599,11 @@ int pnv_pci_sriov_enable(struct pci_dev *pdev, u16 num_vfs)
m64_failed:
if (pdn->m64_single_mode) {
for (i = 0; i < num_vfs; i++) {
- if (pdn->pe_num_map[i] != IODA_INVALID_PE)
- pnv_ioda_free_pe(phb, pdn->pe_num_map[i]);
+ if (pdn->pe_num_map[i] == IODA_INVALID_PE)
+ continue;
+
+ pe = &phb->ioda.pe_array[pdn->pe_num_map[i]];
+ pnv_ioda_free_pe(pe);
}
} else
bitmap_clear(phb->ioda.pe_alloc, *pdn->pe_num_map, num_vfs);
@@ -1640,8 +1665,6 @@ static int pnv_pci_ioda_dma_set_mask(struct pci_dev *pdev, u64 dma_mask)
struct pnv_ioda_pe *pe;
uint64_t top;
bool bypass = false;
- struct pci_dev *linked_npu_dev;
- int i;
if (WARN_ON(!pdn || pdn->pe_number == IODA_INVALID_PE))
return -ENODEV;;
@@ -1662,15 +1685,7 @@ static int pnv_pci_ioda_dma_set_mask(struct pci_dev *pdev, u64 dma_mask)
*pdev->dev.dma_mask = dma_mask;
/* Update peer npu devices */
- if (pe->flags & PNV_IODA_PE_PEER)
- for (i = 0; i < PNV_IODA_MAX_PEER_PES; i++) {
- if (!pe->peers[i])
- continue;
-
- linked_npu_dev = pe->peers[i]->pdev;
- if (dma_get_mask(&linked_npu_dev->dev) != dma_mask)
- dma_set_mask(&linked_npu_dev->dev, dma_mask);
- }
+ pnv_npu_try_dma_set_bypass(pdev, bypass);
return 0;
}
@@ -1811,28 +1826,34 @@ static struct iommu_table_ops pnv_ioda1_iommu_ops = {
.get = pnv_tce_get,
};
-static inline void pnv_pci_ioda2_tce_invalidate_entire(struct pnv_ioda_pe *pe)
+#define TCE_KILL_INVAL_ALL PPC_BIT(0)
+#define TCE_KILL_INVAL_PE PPC_BIT(1)
+#define TCE_KILL_INVAL_TCE PPC_BIT(2)
+
+void pnv_pci_ioda2_tce_invalidate_entire(struct pnv_phb *phb, bool rm)
+{
+ const unsigned long val = TCE_KILL_INVAL_ALL;
+
+ mb(); /* Ensure previous TCE table stores are visible */
+ if (rm)
+ __raw_rm_writeq(cpu_to_be64(val),
+ (__be64 __iomem *)
+ phb->ioda.tce_inval_reg_phys);
+ else
+ __raw_writeq(cpu_to_be64(val), phb->ioda.tce_inval_reg);
+}
+
+static inline void pnv_pci_ioda2_tce_invalidate_pe(struct pnv_ioda_pe *pe)
{
/* 01xb - invalidate TCEs that match the specified PE# */
- unsigned long val = (0x4ull << 60) | (pe->pe_number & 0xFF);
+ unsigned long val = TCE_KILL_INVAL_PE | (pe->pe_number & 0xFF);
struct pnv_phb *phb = pe->phb;
- struct pnv_ioda_pe *npe;
- int i;
if (!phb->ioda.tce_inval_reg)
return;
mb(); /* Ensure above stores are visible */
__raw_writeq(cpu_to_be64(val), phb->ioda.tce_inval_reg);
-
- if (pe->flags & PNV_IODA_PE_PEER)
- for (i = 0; i < PNV_IODA_MAX_PEER_PES; i++) {
- npe = pe->peers[i];
- if (!npe || npe->phb->type != PNV_PHB_NPU)
- continue;
-
- pnv_npu_tce_invalidate_entire(npe);
- }
}
static void pnv_pci_ioda2_do_tce_invalidate(unsigned pe_number, bool rm,
@@ -1842,7 +1863,7 @@ static void pnv_pci_ioda2_do_tce_invalidate(unsigned pe_number, bool rm,
unsigned long start, end, inc;
/* We'll invalidate DMA address in PE scope */
- start = 0x2ull << 60;
+ start = TCE_KILL_INVAL_TCE;
start |= (pe_number & 0xFF);
end = start;
@@ -1867,28 +1888,24 @@ static void pnv_pci_ioda2_tce_invalidate(struct iommu_table *tbl,
struct iommu_table_group_link *tgl;
list_for_each_entry_rcu(tgl, &tbl->it_group_list, next) {
- struct pnv_ioda_pe *npe;
struct pnv_ioda_pe *pe = container_of(tgl->table_group,
struct pnv_ioda_pe, table_group);
__be64 __iomem *invalidate = rm ?
(__be64 __iomem *)pe->phb->ioda.tce_inval_reg_phys :
pe->phb->ioda.tce_inval_reg;
- int i;
+ if (pe->phb->type == PNV_PHB_NPU) {
+ /*
+ * The NVLink hardware does not support TCE kill
+ * per TCE entry so we have to invalidate
+ * the entire cache for it.
+ */
+ pnv_pci_ioda2_tce_invalidate_entire(pe->phb, rm);
+ continue;
+ }
pnv_pci_ioda2_do_tce_invalidate(pe->pe_number, rm,
invalidate, tbl->it_page_shift,
index, npages);
-
- if (pe->flags & PNV_IODA_PE_PEER)
- /* Invalidate PEs using the same TCE table */
- for (i = 0; i < PNV_IODA_MAX_PEER_PES; i++) {
- npe = pe->peers[i];
- if (!npe || npe->phb->type != PNV_PHB_NPU)
- continue;
-
- pnv_npu_tce_invalidate(npe, tbl, index,
- npages, rm);
- }
}
}
@@ -1945,56 +1962,140 @@ static struct iommu_table_ops pnv_ioda2_iommu_ops = {
.free = pnv_ioda2_table_free,
};
-static void pnv_pci_ioda_setup_dma_pe(struct pnv_phb *phb,
- struct pnv_ioda_pe *pe, unsigned int base,
- unsigned int segs)
+static int pnv_pci_ioda_dev_dma_weight(struct pci_dev *dev, void *data)
+{
+ unsigned int *weight = (unsigned int *)data;
+
+ /* This is quite simplistic. The "base" weight of a device
+ * is 10. 0 means no DMA is to be accounted for it.
+ */
+ if (dev->hdr_type != PCI_HEADER_TYPE_NORMAL)
+ return 0;
+
+ if (dev->class == PCI_CLASS_SERIAL_USB_UHCI ||
+ dev->class == PCI_CLASS_SERIAL_USB_OHCI ||
+ dev->class == PCI_CLASS_SERIAL_USB_EHCI)
+ *weight += 3;
+ else if ((dev->class >> 8) == PCI_CLASS_STORAGE_RAID)
+ *weight += 15;
+ else
+ *weight += 10;
+
+ return 0;
+}
+
+static unsigned int pnv_pci_ioda_pe_dma_weight(struct pnv_ioda_pe *pe)
+{
+ unsigned int weight = 0;
+
+ /* SRIOV VF has same DMA32 weight as its PF */
+#ifdef CONFIG_PCI_IOV
+ if ((pe->flags & PNV_IODA_PE_VF) && pe->parent_dev) {
+ pnv_pci_ioda_dev_dma_weight(pe->parent_dev, &weight);
+ return weight;
+ }
+#endif
+
+ if ((pe->flags & PNV_IODA_PE_DEV) && pe->pdev) {
+ pnv_pci_ioda_dev_dma_weight(pe->pdev, &weight);
+ } else if ((pe->flags & PNV_IODA_PE_BUS) && pe->pbus) {
+ struct pci_dev *pdev;
+
+ list_for_each_entry(pdev, &pe->pbus->devices, bus_list)
+ pnv_pci_ioda_dev_dma_weight(pdev, &weight);
+ } else if ((pe->flags & PNV_IODA_PE_BUS_ALL) && pe->pbus) {
+ pci_walk_bus(pe->pbus, pnv_pci_ioda_dev_dma_weight, &weight);
+ }
+
+ return weight;
+}
+
+static void pnv_pci_ioda1_setup_dma_pe(struct pnv_phb *phb,
+ struct pnv_ioda_pe *pe)
{
struct page *tce_mem = NULL;
struct iommu_table *tbl;
- unsigned int i;
+ unsigned int weight, total_weight = 0;
+ unsigned int tce32_segsz, base, segs, avail, i;
int64_t rc;
void *addr;
/* XXX FIXME: Handle 64-bit only DMA devices */
/* XXX FIXME: Provide 64-bit DMA facilities & non-4K TCE tables etc.. */
/* XXX FIXME: Allocate multi-level tables on PHB3 */
+ weight = pnv_pci_ioda_pe_dma_weight(pe);
+ if (!weight)
+ return;
- /* We shouldn't already have a 32-bit DMA associated */
- if (WARN_ON(pe->tce32_seg >= 0))
+ pci_walk_bus(phb->hose->bus, pnv_pci_ioda_dev_dma_weight,
+ &total_weight);
+ segs = (weight * phb->ioda.dma32_count) / total_weight;
+ if (!segs)
+ segs = 1;
+
+ /*
+ * Allocate contiguous DMA32 segments. We begin with the expected
+ * number of segments. With one more attempt, the number of DMA32
+ * segments to be allocated is decreased by one until one segment
+ * is allocated successfully.
+ */
+ do {
+ for (base = 0; base <= phb->ioda.dma32_count - segs; base++) {
+ for (avail = 0, i = base; i < base + segs; i++) {
+ if (phb->ioda.dma32_segmap[i] ==
+ IODA_INVALID_PE)
+ avail++;
+ }
+
+ if (avail == segs)
+ goto found;
+ }
+ } while (--segs);
+
+ if (!segs) {
+ pe_warn(pe, "No available DMA32 segments\n");
return;
+ }
+found:
tbl = pnv_pci_table_alloc(phb->hose->node);
iommu_register_group(&pe->table_group, phb->hose->global_number,
pe->pe_number);
pnv_pci_link_table_and_group(phb->hose->node, 0, tbl, &pe->table_group);
/* Grab a 32-bit TCE table */
- pe->tce32_seg = base;
+ pe_info(pe, "DMA weight %d (%d), assigned (%d) %d DMA32 segments\n",
+ weight, total_weight, base, segs);
pe_info(pe, " Setting up 32-bit TCE table at %08x..%08x\n",
- (base << 28), ((base + segs) << 28) - 1);
+ base * PNV_IODA1_DMA32_SEGSIZE,
+ (base + segs) * PNV_IODA1_DMA32_SEGSIZE - 1);
/* XXX Currently, we allocate one big contiguous table for the
* TCEs. We only really need one chunk per 256M of TCE space
* (ie per segment) but that's an optimization for later, it
* requires some added smarts with our get/put_tce implementation
+ *
+ * Each TCE page is 4KB in size and each TCE entry occupies 8
+ * bytes
*/
+ tce32_segsz = PNV_IODA1_DMA32_SEGSIZE >> (IOMMU_PAGE_SHIFT_4K - 3);
tce_mem = alloc_pages_node(phb->hose->node, GFP_KERNEL,
- get_order(TCE32_TABLE_SIZE * segs));
+ get_order(tce32_segsz * segs));
if (!tce_mem) {
pe_err(pe, " Failed to allocate a 32-bit TCE memory\n");
goto fail;
}
addr = page_address(tce_mem);
- memset(addr, 0, TCE32_TABLE_SIZE * segs);
+ memset(addr, 0, tce32_segsz * segs);
/* Configure HW */
for (i = 0; i < segs; i++) {
rc = opal_pci_map_pe_dma_window(phb->opal_id,
pe->pe_number,
base + i, 1,
- __pa(addr) + TCE32_TABLE_SIZE * i,
- TCE32_TABLE_SIZE, 0x1000);
+ __pa(addr) + tce32_segsz * i,
+ tce32_segsz, IOMMU_PAGE_SIZE_4K);
if (rc) {
pe_err(pe, " Failed to configure 32-bit TCE table,"
" err %ld\n", rc);
@@ -2002,9 +2103,14 @@ static void pnv_pci_ioda_setup_dma_pe(struct pnv_phb *phb,
}
}
+ /* Setup DMA32 segment mapping */
+ for (i = base; i < base + segs; i++)
+ phb->ioda.dma32_segmap[i] = pe->pe_number;
+
/* Setup linux iommu table */
- pnv_pci_setup_iommu_table(tbl, addr, TCE32_TABLE_SIZE * segs,
- base << 28, IOMMU_PAGE_SHIFT_4K);
+ pnv_pci_setup_iommu_table(tbl, addr, tce32_segsz * segs,
+ base * PNV_IODA1_DMA32_SEGSIZE,
+ IOMMU_PAGE_SHIFT_4K);
/* OPAL variant of P7IOC SW invalidated TCEs */
if (phb->ioda.tce_inval_reg)
@@ -2031,10 +2137,8 @@ static void pnv_pci_ioda_setup_dma_pe(struct pnv_phb *phb,
return;
fail:
/* XXX Failure: Try to fallback to 64-bit only ? */
- if (pe->tce32_seg >= 0)
- pe->tce32_seg = -1;
if (tce_mem)
- __free_pages(tce_mem, get_order(TCE32_TABLE_SIZE * segs));
+ __free_pages(tce_mem, get_order(tce32_segsz * segs));
if (tbl) {
pnv_pci_unlink_table_and_group(tbl, &pe->table_group);
iommu_free_table(tbl, "pnv");
@@ -2075,7 +2179,7 @@ static long pnv_pci_ioda2_set_window(struct iommu_table_group *table_group,
pnv_pci_link_table_and_group(phb->hose->node, num,
tbl, &pe->table_group);
- pnv_pci_ioda2_tce_invalidate_entire(pe);
+ pnv_pci_ioda2_tce_invalidate_pe(pe);
return 0;
}
@@ -2219,7 +2323,7 @@ static long pnv_pci_ioda2_unset_window(struct iommu_table_group *table_group,
if (ret)
pe_warn(pe, "Unmapping failed, ret = %ld\n", ret);
else
- pnv_pci_ioda2_tce_invalidate_entire(pe);
+ pnv_pci_ioda2_tce_invalidate_pe(pe);
pnv_pci_unlink_table_and_group(table_group->tables[num], table_group);
@@ -2288,6 +2392,116 @@ static struct iommu_table_group_ops pnv_pci_ioda2_ops = {
.take_ownership = pnv_ioda2_take_ownership,
.release_ownership = pnv_ioda2_release_ownership,
};
+
+static int gpe_table_group_to_npe_cb(struct device *dev, void *opaque)
+{
+ struct pci_controller *hose;
+ struct pnv_phb *phb;
+ struct pnv_ioda_pe **ptmppe = opaque;
+ struct pci_dev *pdev = container_of(dev, struct pci_dev, dev);
+ struct pci_dn *pdn = pci_get_pdn(pdev);
+
+ if (!pdn || pdn->pe_number == IODA_INVALID_PE)
+ return 0;
+
+ hose = pci_bus_to_host(pdev->bus);
+ phb = hose->private_data;
+ if (phb->type != PNV_PHB_NPU)
+ return 0;
+
+ *ptmppe = &phb->ioda.pe_array[pdn->pe_number];
+
+ return 1;
+}
+
+/*
+ * This returns PE of associated NPU.
+ * This assumes that NPU is in the same IOMMU group with GPU and there is
+ * no other PEs.
+ */
+static struct pnv_ioda_pe *gpe_table_group_to_npe(
+ struct iommu_table_group *table_group)
+{
+ struct pnv_ioda_pe *npe = NULL;
+ int ret = iommu_group_for_each_dev(table_group->group, &npe,
+ gpe_table_group_to_npe_cb);
+
+ BUG_ON(!ret || !npe);
+
+ return npe;
+}
+
+static long pnv_pci_ioda2_npu_set_window(struct iommu_table_group *table_group,
+ int num, struct iommu_table *tbl)
+{
+ long ret = pnv_pci_ioda2_set_window(table_group, num, tbl);
+
+ if (ret)
+ return ret;
+
+ ret = pnv_npu_set_window(gpe_table_group_to_npe(table_group), num, tbl);
+ if (ret)
+ pnv_pci_ioda2_unset_window(table_group, num);
+
+ return ret;
+}
+
+static long pnv_pci_ioda2_npu_unset_window(
+ struct iommu_table_group *table_group,
+ int num)
+{
+ long ret = pnv_pci_ioda2_unset_window(table_group, num);
+
+ if (ret)
+ return ret;
+
+ return pnv_npu_unset_window(gpe_table_group_to_npe(table_group), num);
+}
+
+static void pnv_ioda2_npu_take_ownership(struct iommu_table_group *table_group)
+{
+ /*
+ * Detach NPU first as pnv_ioda2_take_ownership() will destroy
+ * the iommu_table if 32bit DMA is enabled.
+ */
+ pnv_npu_take_ownership(gpe_table_group_to_npe(table_group));
+ pnv_ioda2_take_ownership(table_group);
+}
+
+static struct iommu_table_group_ops pnv_pci_ioda2_npu_ops = {
+ .get_table_size = pnv_pci_ioda2_get_table_size,
+ .create_table = pnv_pci_ioda2_create_table,
+ .set_window = pnv_pci_ioda2_npu_set_window,
+ .unset_window = pnv_pci_ioda2_npu_unset_window,
+ .take_ownership = pnv_ioda2_npu_take_ownership,
+ .release_ownership = pnv_ioda2_release_ownership,
+};
+
+static void pnv_pci_ioda_setup_iommu_api(void)
+{
+ struct pci_controller *hose, *tmp;
+ struct pnv_phb *phb;
+ struct pnv_ioda_pe *pe, *gpe;
+
+ /*
+ * Now we have all PHBs discovered, time to add NPU devices to
+ * the corresponding IOMMU groups.
+ */
+ list_for_each_entry_safe(hose, tmp, &hose_list, list_node) {
+ phb = hose->private_data;
+
+ if (phb->type != PNV_PHB_NPU)
+ continue;
+
+ list_for_each_entry(pe, &phb->ioda.pe_list, list) {
+ gpe = pnv_pci_npu_setup_iommu(pe);
+ if (gpe)
+ gpe->table_group.ops = &pnv_pci_ioda2_npu_ops;
+ }
+ }
+}
+#else /* !CONFIG_IOMMU_API */
+static void pnv_pci_ioda_setup_iommu_api(void) { };
#endif
static void pnv_pci_ioda_setup_opal_tce_kill(struct pnv_phb *phb)
@@ -2443,10 +2657,6 @@ static void pnv_pci_ioda2_setup_dma_pe(struct pnv_phb *phb,
{
int64_t rc;
- /* We shouldn't already have a 32-bit DMA associated */
- if (WARN_ON(pe->tce32_seg >= 0))
- return;
-
/* TVE #1 is selected by PCI address bit 59 */
pe->tce_bypass_base = 1ull << 59;
@@ -2454,7 +2664,6 @@ static void pnv_pci_ioda2_setup_dma_pe(struct pnv_phb *phb,
pe->pe_number);
/* The PE will reserve all possible 32-bits space */
- pe->tce32_seg = 0;
pe_info(pe, "Setting up 32-bit TCE table at 0..%08x\n",
phb->ioda.m32_pci_base);
@@ -2470,11 +2679,8 @@ static void pnv_pci_ioda2_setup_dma_pe(struct pnv_phb *phb,
#endif
rc = pnv_pci_ioda2_setup_default_config(pe);
- if (rc) {
- if (pe->tce32_seg >= 0)
- pe->tce32_seg = -1;
+ if (rc)
return;
- }
if (pe->flags & PNV_IODA_PE_DEV)
iommu_add_device(&pe->pdev->dev);
@@ -2485,47 +2691,24 @@ static void pnv_pci_ioda2_setup_dma_pe(struct pnv_phb *phb,
static void pnv_ioda_setup_dma(struct pnv_phb *phb)
{
struct pci_controller *hose = phb->hose;
- unsigned int residual, remaining, segs, tw, base;
struct pnv_ioda_pe *pe;
+ unsigned int weight;
/* If we have more PE# than segments available, hand out one
* per PE until we run out and let the rest fail. If not,
* then we assign at least one segment per PE, plus more based
* on the amount of devices under that PE
*/
- if (phb->ioda.dma_pe_count > phb->ioda.tce32_count)
- residual = 0;
- else
- residual = phb->ioda.tce32_count -
- phb->ioda.dma_pe_count;
-
- pr_info("PCI: Domain %04x has %ld available 32-bit DMA segments\n",
- hose->global_number, phb->ioda.tce32_count);
- pr_info("PCI: %d PE# for a total weight of %d\n",
- phb->ioda.dma_pe_count, phb->ioda.dma_weight);
+ pr_info("PCI: Domain %04x has %d available 32-bit DMA segments\n",
+ hose->global_number, phb->ioda.dma32_count);
pnv_pci_ioda_setup_opal_tce_kill(phb);
- /* Walk our PE list and configure their DMA segments, hand them
- * out one base segment plus any residual segments based on
- * weight
- */
- remaining = phb->ioda.tce32_count;
- tw = phb->ioda.dma_weight;
- base = 0;
- list_for_each_entry(pe, &phb->ioda.pe_dma_list, dma_link) {
- if (!pe->dma_weight)
- continue;
- if (!remaining) {
- pe_warn(pe, "No DMA32 resources available\n");
+ /* Walk our PE list and configure their DMA segments */
+ list_for_each_entry(pe, &phb->ioda.pe_list, list) {
+ weight = pnv_pci_ioda_pe_dma_weight(pe);
+ if (!weight)
continue;
- }
- segs = 1;
- if (residual) {
- segs += ((pe->dma_weight * residual) + (tw / 2)) / tw;
- if (segs > remaining)
- segs = remaining;
- }
/*
* For IODA2 compliant PHB3, we needn't care about the weight.
@@ -2533,12 +2716,9 @@ static void pnv_ioda_setup_dma(struct pnv_phb *phb)
* the specific PE.
*/
if (phb->type == PNV_PHB_IODA1) {
- pe_info(pe, "DMA weight %d, assigned %d DMA32 segments\n",
- pe->dma_weight, segs);
- pnv_pci_ioda_setup_dma_pe(phb, pe, base, segs);
+ pnv_pci_ioda1_setup_dma_pe(phb, pe);
} else if (phb->type == PNV_PHB_IODA2) {
pe_info(pe, "Assign DMA32 space\n");
- segs = 0;
pnv_pci_ioda2_setup_dma_pe(phb, pe);
} else if (phb->type == PNV_PHB_NPU) {
/*
@@ -2548,9 +2728,6 @@ static void pnv_ioda_setup_dma(struct pnv_phb *phb)
* as the PHB3 TVT.
*/
}
-
- remaining -= segs;
- base += segs;
}
}
@@ -2858,7 +3035,7 @@ static void pnv_pci_ioda_fixup_iov_resources(struct pci_dev *pdev)
pdn->m64_single_mode = false;
total_vfs = pci_sriov_get_totalvfs(pdev);
- mul = phb->ioda.total_pe;
+ mul = phb->ioda.total_pe_num;
total_vf_bar_sz = 0;
for (i = 0; i < PCI_SRIOV_NUM_BARS; i++) {
@@ -2929,19 +3106,72 @@ truncate_iov:
}
#endif /* CONFIG_PCI_IOV */
+static void pnv_ioda_setup_pe_res(struct pnv_ioda_pe *pe,
+ struct resource *res)
+{
+ struct pnv_phb *phb = pe->phb;
+ struct pci_bus_region region;
+ int index;
+ int64_t rc;
+
+ if (!res || !res->flags || res->start > res->end)
+ return;
+
+ if (res->flags & IORESOURCE_IO) {
+ region.start = res->start - phb->ioda.io_pci_base;
+ region.end = res->end - phb->ioda.io_pci_base;
+ index = region.start / phb->ioda.io_segsize;
+
+ while (index < phb->ioda.total_pe_num &&
+ region.start <= region.end) {
+ phb->ioda.io_segmap[index] = pe->pe_number;
+ rc = opal_pci_map_pe_mmio_window(phb->opal_id,
+ pe->pe_number, OPAL_IO_WINDOW_TYPE, 0, index);
+ if (rc != OPAL_SUCCESS) {
+ pr_err("%s: Error %lld mapping IO segment#%d to PE#%d\n",
+ __func__, rc, index, pe->pe_number);
+ break;
+ }
+
+ region.start += phb->ioda.io_segsize;
+ index++;
+ }
+ } else if ((res->flags & IORESOURCE_MEM) &&
+ !pnv_pci_is_mem_pref_64(res->flags)) {
+ region.start = res->start -
+ phb->hose->mem_offset[0] -
+ phb->ioda.m32_pci_base;
+ region.end = res->end -
+ phb->hose->mem_offset[0] -
+ phb->ioda.m32_pci_base;
+ index = region.start / phb->ioda.m32_segsize;
+
+ while (index < phb->ioda.total_pe_num &&
+ region.start <= region.end) {
+ phb->ioda.m32_segmap[index] = pe->pe_number;
+ rc = opal_pci_map_pe_mmio_window(phb->opal_id,
+ pe->pe_number, OPAL_M32_WINDOW_TYPE, 0, index);
+ if (rc != OPAL_SUCCESS) {
+ pr_err("%s: Error %lld mapping M32 segment#%d to PE#%d",
+ __func__, rc, index, pe->pe_number);
+ break;
+ }
+
+ region.start += phb->ioda.m32_segsize;
+ index++;
+ }
+ }
+}
+
/*
* This function is supposed to be called on basis of PE from top
* to bottom style. So the the I/O or MMIO segment assigned to
* parent PE could be overrided by its child PEs if necessary.
*/
-static void pnv_ioda_setup_pe_seg(struct pci_controller *hose,
- struct pnv_ioda_pe *pe)
+static void pnv_ioda_setup_pe_seg(struct pnv_ioda_pe *pe)
{
- struct pnv_phb *phb = hose->private_data;
- struct pci_bus_region region;
- struct resource *res;
- int i, index;
- int rc;
+ struct pci_dev *pdev;
+ int i;
/*
* NOTE: We only care PCI bus based PE for now. For PCI
@@ -2950,57 +3180,20 @@ static void pnv_ioda_setup_pe_seg(struct pci_controller *hose,
*/
BUG_ON(!(pe->flags & (PNV_IODA_PE_BUS | PNV_IODA_PE_BUS_ALL)));
- pci_bus_for_each_resource(pe->pbus, res, i) {
- if (!res || !res->flags ||
- res->start > res->end)
- continue;
+ list_for_each_entry(pdev, &pe->pbus->devices, bus_list) {
+ for (i = 0; i <= PCI_ROM_RESOURCE; i++)
+ pnv_ioda_setup_pe_res(pe, &pdev->resource[i]);
- if (res->flags & IORESOURCE_IO) {
- region.start = res->start - phb->ioda.io_pci_base;
- region.end = res->end - phb->ioda.io_pci_base;
- index = region.start / phb->ioda.io_segsize;
-
- while (index < phb->ioda.total_pe &&
- region.start <= region.end) {
- phb->ioda.io_segmap[index] = pe->pe_number;
- rc = opal_pci_map_pe_mmio_window(phb->opal_id,
- pe->pe_number, OPAL_IO_WINDOW_TYPE, 0, index);
- if (rc != OPAL_SUCCESS) {
- pr_err("%s: OPAL error %d when mapping IO "
- "segment #%d to PE#%d\n",
- __func__, rc, index, pe->pe_number);
- break;
- }
-
- region.start += phb->ioda.io_segsize;
- index++;
- }
- } else if ((res->flags & IORESOURCE_MEM) &&
- !pnv_pci_is_mem_pref_64(res->flags)) {
- region.start = res->start -
- hose->mem_offset[0] -
- phb->ioda.m32_pci_base;
- region.end = res->end -
- hose->mem_offset[0] -
- phb->ioda.m32_pci_base;
- index = region.start / phb->ioda.m32_segsize;
-
- while (index < phb->ioda.total_pe &&
- region.start <= region.end) {
- phb->ioda.m32_segmap[index] = pe->pe_number;
- rc = opal_pci_map_pe_mmio_window(phb->opal_id,
- pe->pe_number, OPAL_M32_WINDOW_TYPE, 0, index);
- if (rc != OPAL_SUCCESS) {
- pr_err("%s: OPAL error %d when mapping M32 "
- "segment#%d to PE#%d",
- __func__, rc, index, pe->pe_number);
- break;
- }
-
- region.start += phb->ioda.m32_segsize;
- index++;
- }
- }
+ /*
+ * If the PE contains all subordinate PCI buses, the
+ * windows of the child bridges should be mapped to
+ * the PE as well.
+ */
+ if (!(pe->flags & PNV_IODA_PE_BUS_ALL) || !pci_is_bridge(pdev))
+ continue;
+ for (i = 0; i < PCI_BRIDGE_RESOURCE_NUM; i++)
+ pnv_ioda_setup_pe_res(pe,
+ &pdev->resource[PCI_BRIDGE_RESOURCES + i]);
}
}
@@ -3018,7 +3211,7 @@ static void pnv_pci_ioda_setup_seg(void)
continue;
list_for_each_entry(pe, &phb->ioda.pe_list, list) {
- pnv_ioda_setup_pe_seg(hose, pe);
+ pnv_ioda_setup_pe_seg(pe);
}
}
}
@@ -3035,6 +3228,8 @@ static void pnv_pci_ioda_setup_DMA(void)
phb = hose->private_data;
phb->initialized = 1;
}
+
+ pnv_pci_ioda_setup_iommu_api();
}
static void pnv_pci_ioda_create_dbgfs(void)
@@ -3056,27 +3251,6 @@ static void pnv_pci_ioda_create_dbgfs(void)
#endif /* CONFIG_DEBUG_FS */
}
-static void pnv_npu_ioda_fixup(void)
-{
- bool enable_bypass;
- struct pci_controller *hose, *tmp;
- struct pnv_phb *phb;
- struct pnv_ioda_pe *pe;
-
- list_for_each_entry_safe(hose, tmp, &hose_list, list_node) {
- phb = hose->private_data;
- if (phb->type != PNV_PHB_NPU)
- continue;
-
- list_for_each_entry(pe, &phb->ioda.pe_dma_list, dma_link) {
- enable_bypass = dma_get_mask(&pe->pdev->dev) ==
- DMA_BIT_MASK(64);
- pnv_npu_init_dma_pe(pe);
- pnv_npu_dma_set_bypass(pe, enable_bypass);
- }
- }
-}
-
static void pnv_pci_ioda_fixup(void)
{
pnv_pci_ioda_setup_PEs();
@@ -3089,9 +3263,6 @@ static void pnv_pci_ioda_fixup(void)
eeh_init();
eeh_addr_cache_build();
#endif
-
- /* Link NPU IODA tables to their PCI devices. */
- pnv_npu_ioda_fixup();
}
/*
@@ -3195,12 +3366,6 @@ static bool pnv_pci_enable_device_hook(struct pci_dev *dev)
return true;
}
-static u32 pnv_ioda_bdfn_to_pe(struct pnv_phb *phb, struct pci_bus *bus,
- u32 devfn)
-{
- return phb->ioda.pe_rmap[(bus->number << 8) | devfn];
-}
-
static void pnv_pci_ioda_shutdown(struct pci_controller *hose)
{
struct pnv_phb *phb = hose->private_data;
@@ -3210,31 +3375,39 @@ static void pnv_pci_ioda_shutdown(struct pci_controller *hose)
}
static const struct pci_controller_ops pnv_pci_ioda_controller_ops = {
- .dma_dev_setup = pnv_pci_dma_dev_setup,
- .dma_bus_setup = pnv_pci_dma_bus_setup,
+ .dma_dev_setup = pnv_pci_dma_dev_setup,
+ .dma_bus_setup = pnv_pci_dma_bus_setup,
#ifdef CONFIG_PCI_MSI
- .setup_msi_irqs = pnv_setup_msi_irqs,
- .teardown_msi_irqs = pnv_teardown_msi_irqs,
+ .setup_msi_irqs = pnv_setup_msi_irqs,
+ .teardown_msi_irqs = pnv_teardown_msi_irqs,
#endif
- .enable_device_hook = pnv_pci_enable_device_hook,
- .window_alignment = pnv_pci_window_alignment,
- .reset_secondary_bus = pnv_pci_reset_secondary_bus,
- .dma_set_mask = pnv_pci_ioda_dma_set_mask,
- .dma_get_required_mask = pnv_pci_ioda_dma_get_required_mask,
- .shutdown = pnv_pci_ioda_shutdown,
+ .enable_device_hook = pnv_pci_enable_device_hook,
+ .window_alignment = pnv_pci_window_alignment,
+ .reset_secondary_bus = pnv_pci_reset_secondary_bus,
+ .dma_set_mask = pnv_pci_ioda_dma_set_mask,
+ .dma_get_required_mask = pnv_pci_ioda_dma_get_required_mask,
+ .shutdown = pnv_pci_ioda_shutdown,
};
+static int pnv_npu_dma_set_mask(struct pci_dev *npdev, u64 dma_mask)
+{
+ dev_err_once(&npdev->dev,
+ "%s operation unsupported for NVLink devices\n",
+ __func__);
+ return -EPERM;
+}
+
static const struct pci_controller_ops pnv_npu_ioda_controller_ops = {
- .dma_dev_setup = pnv_pci_dma_dev_setup,
+ .dma_dev_setup = pnv_pci_dma_dev_setup,
#ifdef CONFIG_PCI_MSI
- .setup_msi_irqs = pnv_setup_msi_irqs,
- .teardown_msi_irqs = pnv_teardown_msi_irqs,
+ .setup_msi_irqs = pnv_setup_msi_irqs,
+ .teardown_msi_irqs = pnv_teardown_msi_irqs,
#endif
- .enable_device_hook = pnv_pci_enable_device_hook,
- .window_alignment = pnv_pci_window_alignment,
- .reset_secondary_bus = pnv_pci_reset_secondary_bus,
- .dma_set_mask = pnv_npu_dma_set_mask,
- .shutdown = pnv_pci_ioda_shutdown,
+ .enable_device_hook = pnv_pci_enable_device_hook,
+ .window_alignment = pnv_pci_window_alignment,
+ .reset_secondary_bus = pnv_pci_reset_secondary_bus,
+ .dma_set_mask = pnv_npu_dma_set_mask,
+ .shutdown = pnv_pci_ioda_shutdown,
};
static void __init pnv_pci_init_ioda_phb(struct device_node *np,
@@ -3242,10 +3415,12 @@ static void __init pnv_pci_init_ioda_phb(struct device_node *np,
{
struct pci_controller *hose;
struct pnv_phb *phb;
- unsigned long size, m32map_off, pemap_off, iomap_off = 0;
+ unsigned long size, m64map_off, m32map_off, pemap_off;
+ unsigned long iomap_off = 0, dma32map_off = 0;
const __be64 *prop64;
const __be32 *prop32;
int len;
+ unsigned int segno;
u64 phb_id;
void *aux;
long rc;
@@ -3306,13 +3481,13 @@ static void __init pnv_pci_init_ioda_phb(struct device_node *np,
pr_err(" Failed to map registers !\n");
/* Initialize more IODA stuff */
- phb->ioda.total_pe = 1;
+ phb->ioda.total_pe_num = 1;
prop32 = of_get_property(np, "ibm,opal-num-pes", NULL);
if (prop32)
- phb->ioda.total_pe = be32_to_cpup(prop32);
+ phb->ioda.total_pe_num = be32_to_cpup(prop32);
prop32 = of_get_property(np, "ibm,opal-reserved-pe", NULL);
if (prop32)
- phb->ioda.reserved_pe = be32_to_cpup(prop32);
+ phb->ioda.reserved_pe_idx = be32_to_cpup(prop32);
/* Parse 64-bit MMIO range */
pnv_ioda_parse_m64_window(phb);
@@ -3321,36 +3496,58 @@ static void __init pnv_pci_init_ioda_phb(struct device_node *np,
/* FW Has already off top 64k of M32 space (MSI space) */
phb->ioda.m32_size += 0x10000;
- phb->ioda.m32_segsize = phb->ioda.m32_size / phb->ioda.total_pe;
+ phb->ioda.m32_segsize = phb->ioda.m32_size / phb->ioda.total_pe_num;
phb->ioda.m32_pci_base = hose->mem_resources[0].start - hose->mem_offset[0];
phb->ioda.io_size = hose->pci_io_size;
- phb->ioda.io_segsize = phb->ioda.io_size / phb->ioda.total_pe;
+ phb->ioda.io_segsize = phb->ioda.io_size / phb->ioda.total_pe_num;
phb->ioda.io_pci_base = 0; /* XXX calculate this ? */
+ /* Calculate how many 32-bit TCE segments we have */
+ phb->ioda.dma32_count = phb->ioda.m32_pci_base /
+ PNV_IODA1_DMA32_SEGSIZE;
+
/* Allocate aux data & arrays. We don't have IO ports on PHB3 */
- size = _ALIGN_UP(phb->ioda.total_pe / 8, sizeof(unsigned long));
+ size = _ALIGN_UP(max_t(unsigned, phb->ioda.total_pe_num, 8) / 8,
+ sizeof(unsigned long));
+ m64map_off = size;
+ size += phb->ioda.total_pe_num * sizeof(phb->ioda.m64_segmap[0]);
m32map_off = size;
- size += phb->ioda.total_pe * sizeof(phb->ioda.m32_segmap[0]);
+ size += phb->ioda.total_pe_num * sizeof(phb->ioda.m32_segmap[0]);
if (phb->type == PNV_PHB_IODA1) {
iomap_off = size;
- size += phb->ioda.total_pe * sizeof(phb->ioda.io_segmap[0]);
+ size += phb->ioda.total_pe_num * sizeof(phb->ioda.io_segmap[0]);
+ dma32map_off = size;
+ size += phb->ioda.dma32_count *
+ sizeof(phb->ioda.dma32_segmap[0]);
}
pemap_off = size;
- size += phb->ioda.total_pe * sizeof(struct pnv_ioda_pe);
+ size += phb->ioda.total_pe_num * sizeof(struct pnv_ioda_pe);
aux = memblock_virt_alloc(size, 0);
phb->ioda.pe_alloc = aux;
+ phb->ioda.m64_segmap = aux + m64map_off;
phb->ioda.m32_segmap = aux + m32map_off;
- if (phb->type == PNV_PHB_IODA1)
+ for (segno = 0; segno < phb->ioda.total_pe_num; segno++) {
+ phb->ioda.m64_segmap[segno] = IODA_INVALID_PE;
+ phb->ioda.m32_segmap[segno] = IODA_INVALID_PE;
+ }
+ if (phb->type == PNV_PHB_IODA1) {
phb->ioda.io_segmap = aux + iomap_off;
+ for (segno = 0; segno < phb->ioda.total_pe_num; segno++)
+ phb->ioda.io_segmap[segno] = IODA_INVALID_PE;
+
+ phb->ioda.dma32_segmap = aux + dma32map_off;
+ for (segno = 0; segno < phb->ioda.dma32_count; segno++)
+ phb->ioda.dma32_segmap[segno] = IODA_INVALID_PE;
+ }
phb->ioda.pe_array = aux + pemap_off;
- set_bit(phb->ioda.reserved_pe, phb->ioda.pe_alloc);
+ set_bit(phb->ioda.reserved_pe_idx, phb->ioda.pe_alloc);
- INIT_LIST_HEAD(&phb->ioda.pe_dma_list);
INIT_LIST_HEAD(&phb->ioda.pe_list);
mutex_init(&phb->ioda.pe_list_mutex);
/* Calculate how many 32-bit TCE segments we have */
- phb->ioda.tce32_count = phb->ioda.m32_pci_base >> 28;
+ phb->ioda.dma32_count = phb->ioda.m32_pci_base /
+ PNV_IODA1_DMA32_SEGSIZE;
#if 0 /* We should really do that ... */
rc = opal_pci_set_phb_mem_window(opal->phb_id,
@@ -3362,7 +3559,7 @@ static void __init pnv_pci_init_ioda_phb(struct device_node *np,
#endif
pr_info(" %03d (%03d) PE's M32: 0x%x [segment=0x%x]\n",
- phb->ioda.total_pe, phb->ioda.reserved_pe,
+ phb->ioda.total_pe_num, phb->ioda.reserved_pe_idx,
phb->ioda.m32_size, phb->ioda.m32_segsize);
if (phb->ioda.m64_size)
pr_info(" M64: 0x%lx [segment=0x%lx]\n",
@@ -3377,12 +3574,6 @@ static void __init pnv_pci_init_ioda_phb(struct device_node *np,
phb->freeze_pe = pnv_ioda_freeze_pe;
phb->unfreeze_pe = pnv_ioda_unfreeze_pe;
- /* Setup RID -> PE mapping function */
- phb->bdfn_to_pe = pnv_ioda_bdfn_to_pe;
-
- /* Setup TCEs */
- phb->dma_dev_setup = pnv_pci_ioda_dma_dev_setup;
-
/* Setup MSI support */
pnv_pci_init_ioda_msis(phb);
@@ -3395,10 +3586,12 @@ static void __init pnv_pci_init_ioda_phb(struct device_node *np,
*/
ppc_md.pcibios_fixup = pnv_pci_ioda_fixup;
- if (phb->type == PNV_PHB_NPU)
+ if (phb->type == PNV_PHB_NPU) {
hose->controller_ops = pnv_npu_ioda_controller_ops;
- else
+ } else {
+ phb->dma_dev_setup = pnv_pci_ioda_dma_dev_setup;
hose->controller_ops = pnv_pci_ioda_controller_ops;
+ }
#ifdef CONFIG_PCI_IOV
ppc_md.pcibios_fixup_sriov = pnv_pci_ioda_fixup_iov_resources;
diff --git a/arch/powerpc/platforms/powernv/pci.c b/arch/powerpc/platforms/powernv/pci.c
index 73c8dc2a3..1d92bd93b 100644
--- a/arch/powerpc/platforms/powernv/pci.c
+++ b/arch/powerpc/platforms/powernv/pci.c
@@ -39,9 +39,6 @@
/* Delay in usec */
#define PCI_RESET_DELAY_US 3000000
-#define cfg_dbg(fmt...) do { } while(0)
-//#define cfg_dbg(fmt...) printk(fmt)
-
#ifdef CONFIG_PCI_MSI
int pnv_setup_msi_irqs(struct pci_dev *pdev, int nvec, int type)
{
@@ -370,7 +367,7 @@ static void pnv_pci_config_check_eeh(struct pci_dn *pdn)
struct pnv_phb *phb = pdn->phb->private_data;
u8 fstate;
__be16 pcierr;
- int pe_no;
+ unsigned int pe_no;
s64 rc;
/*
@@ -380,7 +377,7 @@ static void pnv_pci_config_check_eeh(struct pci_dn *pdn)
*/
pe_no = pdn->pe_number;
if (pe_no == IODA_INVALID_PE) {
- pe_no = phb->ioda.reserved_pe;
+ pe_no = phb->ioda.reserved_pe_idx;
}
/*
@@ -402,8 +399,8 @@ static void pnv_pci_config_check_eeh(struct pci_dn *pdn)
}
}
- cfg_dbg(" -> EEH check, bdfn=%04x PE#%d fstate=%x\n",
- (pdn->busno << 8) | (pdn->devfn), pe_no, fstate);
+ pr_devel(" -> EEH check, bdfn=%04x PE#%d fstate=%x\n",
+ (pdn->busno << 8) | (pdn->devfn), pe_no, fstate);
/* Clear the frozen state if applicable */
if (fstate == OPAL_EEH_STOPPED_MMIO_FREEZE ||
@@ -451,8 +448,8 @@ int pnv_pci_cfg_read(struct pci_dn *pdn,
return PCIBIOS_FUNC_NOT_SUPPORTED;
}
- cfg_dbg("%s: bus: %x devfn: %x +%x/%x -> %08x\n",
- __func__, pdn->busno, pdn->devfn, where, size, *val);
+ pr_devel("%s: bus: %x devfn: %x +%x/%x -> %08x\n",
+ __func__, pdn->busno, pdn->devfn, where, size, *val);
return PCIBIOS_SUCCESSFUL;
}
@@ -462,8 +459,8 @@ int pnv_pci_cfg_write(struct pci_dn *pdn,
struct pnv_phb *phb = pdn->phb->private_data;
u32 bdfn = (pdn->busno << 8) | pdn->devfn;
- cfg_dbg("%s: bus: %x devfn: %x +%x/%x -> %08x\n",
- pdn->busno, pdn->devfn, where, size, val);
+ pr_devel("%s: bus: %x devfn: %x +%x/%x -> %08x\n",
+ __func__, pdn->busno, pdn->devfn, where, size, val);
switch (size) {
case 1:
opal_pci_config_write_byte(phb->opal_id, bdfn, where, val);
diff --git a/arch/powerpc/platforms/powernv/pci.h b/arch/powerpc/platforms/powernv/pci.h
index 3f814f382..7dee25e30 100644
--- a/arch/powerpc/platforms/powernv/pci.h
+++ b/arch/powerpc/platforms/powernv/pci.h
@@ -24,7 +24,6 @@ enum pnv_phb_model {
#define PNV_IODA_PE_MASTER (1 << 3) /* Master PE in compound case */
#define PNV_IODA_PE_SLAVE (1 << 4) /* Slave PE in compound case */
#define PNV_IODA_PE_VF (1 << 5) /* PE for one VF */
-#define PNV_IODA_PE_PEER (1 << 6) /* PE has peers */
/* Data associated with a PE, including IOMMU tracking etc.. */
struct pnv_phb;
@@ -32,9 +31,6 @@ struct pnv_ioda_pe {
unsigned long flags;
struct pnv_phb *phb;
-#define PNV_IODA_MAX_PEER_PES 8
- struct pnv_ioda_pe *peers[PNV_IODA_MAX_PEER_PES];
-
/* A PE can be associated with a single device or an
* entire bus (& children). In the former case, pdev
* is populated, in the later case, pbus is.
@@ -53,14 +49,7 @@ struct pnv_ioda_pe {
/* PE number */
unsigned int pe_number;
- /* "Weight" assigned to the PE for the sake of DMA resource
- * allocations
- */
- unsigned int dma_weight;
-
/* "Base" iommu table, ie, 4K TCEs, 32-bit DMA */
- int tce32_seg;
- int tce32_segcount;
struct iommu_table_group table_group;
/* 64-bit TCE bypass region */
@@ -78,7 +67,6 @@ struct pnv_ioda_pe {
struct list_head slaves;
/* Link in list of PE#s */
- struct list_head dma_link;
struct list_head list;
};
@@ -110,19 +98,18 @@ struct pnv_phb {
unsigned int is_64, struct msi_msg *msg);
void (*dma_dev_setup)(struct pnv_phb *phb, struct pci_dev *pdev);
void (*fixup_phb)(struct pci_controller *hose);
- u32 (*bdfn_to_pe)(struct pnv_phb *phb, struct pci_bus *bus, u32 devfn);
int (*init_m64)(struct pnv_phb *phb);
void (*reserve_m64_pe)(struct pci_bus *bus,
unsigned long *pe_bitmap, bool all);
- int (*pick_m64_pe)(struct pci_bus *bus, bool all);
+ struct pnv_ioda_pe *(*pick_m64_pe)(struct pci_bus *bus, bool all);
int (*get_pe_state)(struct pnv_phb *phb, int pe_no);
void (*freeze_pe)(struct pnv_phb *phb, int pe_no);
int (*unfreeze_pe)(struct pnv_phb *phb, int pe_no, int opt);
struct {
/* Global bridge info */
- unsigned int total_pe;
- unsigned int reserved_pe;
+ unsigned int total_pe_num;
+ unsigned int reserved_pe_idx;
/* 32-bit MMIO window */
unsigned int m32_size;
@@ -141,15 +128,19 @@ struct pnv_phb {
unsigned int io_segsize;
unsigned int io_pci_base;
- /* PE allocation bitmap */
- unsigned long *pe_alloc;
- /* PE allocation mutex */
+ /* PE allocation */
struct mutex pe_alloc_mutex;
+ unsigned long *pe_alloc;
+ struct pnv_ioda_pe *pe_array;
/* M32 & IO segment maps */
+ unsigned int *m64_segmap;
unsigned int *m32_segmap;
unsigned int *io_segmap;
- struct pnv_ioda_pe *pe_array;
+
+ /* DMA32 segment maps - IODA1 only */
+ unsigned int dma32_count;
+ unsigned int *dma32_segmap;
/* IRQ chip */
int irq_chip_init;
@@ -167,20 +158,6 @@ struct pnv_phb {
*/
unsigned char pe_rmap[0x10000];
- /* 32-bit TCE tables allocation */
- unsigned long tce32_count;
-
- /* Total "weight" for the sake of DMA resources
- * allocation
- */
- unsigned int dma_weight;
- unsigned int dma_pe_count;
-
- /* Sorted list of used PE's, sorted at
- * boot for resource allocation purposes
- */
- struct list_head pe_dma_list;
-
/* TCE cache invalidate registers (physical and
* remapped)
*/
@@ -236,16 +213,23 @@ extern void pnv_pci_dma_bus_setup(struct pci_bus *bus);
extern int pnv_setup_msi_irqs(struct pci_dev *pdev, int nvec, int type);
extern void pnv_teardown_msi_irqs(struct pci_dev *pdev);
+extern void pe_level_printk(const struct pnv_ioda_pe *pe, const char *level,
+ const char *fmt, ...);
+#define pe_err(pe, fmt, ...) \
+ pe_level_printk(pe, KERN_ERR, fmt, ##__VA_ARGS__)
+#define pe_warn(pe, fmt, ...) \
+ pe_level_printk(pe, KERN_WARNING, fmt, ##__VA_ARGS__)
+#define pe_info(pe, fmt, ...) \
+ pe_level_printk(pe, KERN_INFO, fmt, ##__VA_ARGS__)
+
/* Nvlink functions */
-extern void pnv_npu_tce_invalidate_entire(struct pnv_ioda_pe *npe);
-extern void pnv_npu_tce_invalidate(struct pnv_ioda_pe *npe,
- struct iommu_table *tbl,
- unsigned long index,
- unsigned long npages,
- bool rm);
-extern void pnv_npu_init_dma_pe(struct pnv_ioda_pe *npe);
-extern void pnv_npu_setup_dma_pe(struct pnv_ioda_pe *npe);
-extern int pnv_npu_dma_set_bypass(struct pnv_ioda_pe *npe, bool enabled);
-extern int pnv_npu_dma_set_mask(struct pci_dev *npdev, u64 dma_mask);
+extern void pnv_npu_try_dma_set_bypass(struct pci_dev *gpdev, bool bypass);
+extern void pnv_pci_ioda2_tce_invalidate_entire(struct pnv_phb *phb, bool rm);
+extern struct pnv_ioda_pe *pnv_pci_npu_setup_iommu(struct pnv_ioda_pe *npe);
+extern long pnv_npu_set_window(struct pnv_ioda_pe *npe, int num,
+ struct iommu_table *tbl);
+extern long pnv_npu_unset_window(struct pnv_ioda_pe *npe, int num);
+extern void pnv_npu_take_ownership(struct pnv_ioda_pe *npe);
+extern void pnv_npu_release_ownership(struct pnv_ioda_pe *npe);
#endif /* __POWERNV_PCI_H */
diff --git a/arch/powerpc/platforms/powernv/setup.c b/arch/powerpc/platforms/powernv/setup.c
index 1acb0c72d..ee6430bed 100644
--- a/arch/powerpc/platforms/powernv/setup.c
+++ b/arch/powerpc/platforms/powernv/setup.c
@@ -273,7 +273,10 @@ static int __init pnv_probe(void)
if (!of_flat_dt_is_compatible(root, "ibm,powernv"))
return 0;
- hpte_init_native();
+ if (IS_ENABLED(CONFIG_PPC_RADIX_MMU) && radix_enabled())
+ radix_init_native();
+ else if (IS_ENABLED(CONFIG_PPC_STD_MMU_64))
+ hpte_init_native();
if (firmware_has_feature(FW_FEATURE_OPAL))
pnv_setup_machdep_opal();
diff --git a/arch/powerpc/platforms/ps3/htab.c b/arch/powerpc/platforms/ps3/htab.c
index 2f95d33cf..c9a3e6771 100644
--- a/arch/powerpc/platforms/ps3/htab.c
+++ b/arch/powerpc/platforms/ps3/htab.c
@@ -63,7 +63,7 @@ static long ps3_hpte_insert(unsigned long hpte_group, unsigned long vpn,
vflags &= ~HPTE_V_SECONDARY;
hpte_v = hpte_encode_v(vpn, psize, apsize, ssize) | vflags | HPTE_V_VALID;
- hpte_r = hpte_encode_r(ps3_mm_phys_to_lpar(pa), psize, apsize) | rflags;
+ hpte_r = hpte_encode_r(ps3_mm_phys_to_lpar(pa), psize, apsize, ssize) | rflags;
spin_lock_irqsave(&ps3_htab_lock, flags);
diff --git a/arch/powerpc/platforms/ps3/spu.c b/arch/powerpc/platforms/ps3/spu.c
index a0bca05e2..492b2575e 100644
--- a/arch/powerpc/platforms/ps3/spu.c
+++ b/arch/powerpc/platforms/ps3/spu.c
@@ -205,7 +205,7 @@ static void spu_unmap(struct spu *spu)
static int __init setup_areas(struct spu *spu)
{
struct table {char* name; unsigned long addr; unsigned long size;};
- static const unsigned long shadow_flags = _PAGE_NO_CACHE | 3;
+ unsigned long shadow_flags = pgprot_val(pgprot_noncached_wc(PAGE_KERNEL_RO));
spu_pdata(spu)->shadow = __ioremap(spu_pdata(spu)->shadow_addr,
sizeof(struct spe_shadow),
@@ -216,7 +216,7 @@ static int __init setup_areas(struct spu *spu)
}
spu->local_store = (__force void *)ioremap_prot(spu->local_store_phys,
- LS_SIZE, _PAGE_NO_CACHE);
+ LS_SIZE, pgprot_val(pgprot_noncached_wc(__pgprot(0))));
if (!spu->local_store) {
pr_debug("%s:%d: ioremap local_store failed\n",
diff --git a/arch/powerpc/platforms/pseries/eeh_pseries.c b/arch/powerpc/platforms/pseries/eeh_pseries.c
index 405baaf96..3998e0f9a 100644
--- a/arch/powerpc/platforms/pseries/eeh_pseries.c
+++ b/arch/powerpc/platforms/pseries/eeh_pseries.c
@@ -53,7 +53,6 @@ static int ibm_read_slot_reset_state2;
static int ibm_slot_error_detail;
static int ibm_get_config_addr_info;
static int ibm_get_config_addr_info2;
-static int ibm_configure_bridge;
static int ibm_configure_pe;
/*
@@ -81,7 +80,14 @@ static int pseries_eeh_init(void)
ibm_get_config_addr_info2 = rtas_token("ibm,get-config-addr-info2");
ibm_get_config_addr_info = rtas_token("ibm,get-config-addr-info");
ibm_configure_pe = rtas_token("ibm,configure-pe");
- ibm_configure_bridge = rtas_token("ibm,configure-bridge");
+
+ /*
+ * ibm,configure-pe and ibm,configure-bridge have the same semantics,
+ * however ibm,configure-pe can be faster. If we can't find
+ * ibm,configure-pe then fall back to using ibm,configure-bridge.
+ */
+ if (ibm_configure_pe == RTAS_UNKNOWN_SERVICE)
+ ibm_configure_pe = rtas_token("ibm,configure-bridge");
/*
* Necessary sanity check. We needn't check "get-config-addr-info"
@@ -93,8 +99,7 @@ static int pseries_eeh_init(void)
(ibm_read_slot_reset_state2 == RTAS_UNKNOWN_SERVICE &&
ibm_read_slot_reset_state == RTAS_UNKNOWN_SERVICE) ||
ibm_slot_error_detail == RTAS_UNKNOWN_SERVICE ||
- (ibm_configure_pe == RTAS_UNKNOWN_SERVICE &&
- ibm_configure_bridge == RTAS_UNKNOWN_SERVICE)) {
+ ibm_configure_pe == RTAS_UNKNOWN_SERVICE) {
pr_info("EEH functionality not supported\n");
return -EINVAL;
}
@@ -624,18 +629,9 @@ static int pseries_eeh_configure_bridge(struct eeh_pe *pe)
config_addr = pe->addr;
while (max_wait > 0) {
- /* Use new configure-pe function, if supported */
- if (ibm_configure_pe != RTAS_UNKNOWN_SERVICE) {
- ret = rtas_call(ibm_configure_pe, 3, 1, NULL,
- config_addr, BUID_HI(pe->phb->buid),
- BUID_LO(pe->phb->buid));
- } else if (ibm_configure_bridge != RTAS_UNKNOWN_SERVICE) {
- ret = rtas_call(ibm_configure_bridge, 3, 1, NULL,
- config_addr, BUID_HI(pe->phb->buid),
- BUID_LO(pe->phb->buid));
- } else {
- return -EFAULT;
- }
+ ret = rtas_call(ibm_configure_pe, 3, 1, NULL,
+ config_addr, BUID_HI(pe->phb->buid),
+ BUID_LO(pe->phb->buid));
if (!ret)
return ret;
diff --git a/arch/powerpc/platforms/pseries/hotplug-memory.c b/arch/powerpc/platforms/pseries/hotplug-memory.c
index e9ff44cd5..2ce138542 100644
--- a/arch/powerpc/platforms/pseries/hotplug-memory.c
+++ b/arch/powerpc/platforms/pseries/hotplug-memory.c
@@ -116,6 +116,155 @@ static struct property *dlpar_clone_drconf_property(struct device_node *dn)
return new_prop;
}
+static void dlpar_update_drconf_property(struct device_node *dn,
+ struct property *prop)
+{
+ struct of_drconf_cell *lmbs;
+ u32 num_lmbs, *p;
+ int i;
+
+ /* Convert the property back to BE */
+ p = prop->value;
+ num_lmbs = *p;
+ *p = cpu_to_be32(*p);
+ p++;
+
+ lmbs = (struct of_drconf_cell *)p;
+ for (i = 0; i < num_lmbs; i++) {
+ lmbs[i].base_addr = cpu_to_be64(lmbs[i].base_addr);
+ lmbs[i].drc_index = cpu_to_be32(lmbs[i].drc_index);
+ lmbs[i].flags = cpu_to_be32(lmbs[i].flags);
+ }
+
+ rtas_hp_event = true;
+ of_update_property(dn, prop);
+ rtas_hp_event = false;
+}
+
+static int dlpar_update_device_tree_lmb(struct of_drconf_cell *lmb)
+{
+ struct device_node *dn;
+ struct property *prop;
+ struct of_drconf_cell *lmbs;
+ u32 *p, num_lmbs;
+ int i;
+
+ dn = of_find_node_by_path("/ibm,dynamic-reconfiguration-memory");
+ if (!dn)
+ return -ENODEV;
+
+ prop = dlpar_clone_drconf_property(dn);
+ if (!prop) {
+ of_node_put(dn);
+ return -ENODEV;
+ }
+
+ p = prop->value;
+ num_lmbs = *p++;
+ lmbs = (struct of_drconf_cell *)p;
+
+ for (i = 0; i < num_lmbs; i++) {
+ if (lmbs[i].drc_index == lmb->drc_index) {
+ lmbs[i].flags = lmb->flags;
+ lmbs[i].aa_index = lmb->aa_index;
+
+ dlpar_update_drconf_property(dn, prop);
+ break;
+ }
+ }
+
+ of_node_put(dn);
+ return 0;
+}
+
+static u32 lookup_lmb_associativity_index(struct of_drconf_cell *lmb)
+{
+ struct device_node *parent, *lmb_node, *dr_node;
+ const u32 *lmb_assoc;
+ const u32 *assoc_arrays;
+ u32 aa_index;
+ int aa_arrays, aa_array_entries, aa_array_sz;
+ int i;
+
+ parent = of_find_node_by_path("/");
+ if (!parent)
+ return -ENODEV;
+
+ lmb_node = dlpar_configure_connector(cpu_to_be32(lmb->drc_index),
+ parent);
+ of_node_put(parent);
+ if (!lmb_node)
+ return -EINVAL;
+
+ lmb_assoc = of_get_property(lmb_node, "ibm,associativity", NULL);
+ if (!lmb_assoc) {
+ dlpar_free_cc_nodes(lmb_node);
+ return -ENODEV;
+ }
+
+ dr_node = of_find_node_by_path("/ibm,dynamic-reconfiguration-memory");
+ if (!dr_node) {
+ dlpar_free_cc_nodes(lmb_node);
+ return -ENODEV;
+ }
+
+ assoc_arrays = of_get_property(dr_node,
+ "ibm,associativity-lookup-arrays",
+ NULL);
+ of_node_put(dr_node);
+ if (!assoc_arrays) {
+ dlpar_free_cc_nodes(lmb_node);
+ return -ENODEV;
+ }
+
+ /* The ibm,associativity-lookup-arrays property is defined to be
+ * a 32-bit value specifying the number of associativity arrays
+ * followed by a 32-bitvalue specifying the number of entries per
+ * array, followed by the associativity arrays.
+ */
+ aa_arrays = be32_to_cpu(assoc_arrays[0]);
+ aa_array_entries = be32_to_cpu(assoc_arrays[1]);
+ aa_array_sz = aa_array_entries * sizeof(u32);
+
+ aa_index = -1;
+ for (i = 0; i < aa_arrays; i++) {
+ int indx = (i * aa_array_entries) + 2;
+
+ if (memcmp(&assoc_arrays[indx], &lmb_assoc[1], aa_array_sz))
+ continue;
+
+ aa_index = i;
+ break;
+ }
+
+ dlpar_free_cc_nodes(lmb_node);
+ return aa_index;
+}
+
+static int dlpar_add_device_tree_lmb(struct of_drconf_cell *lmb)
+{
+ int aa_index;
+
+ lmb->flags |= DRCONF_MEM_ASSIGNED;
+
+ aa_index = lookup_lmb_associativity_index(lmb);
+ if (aa_index < 0) {
+ pr_err("Couldn't find associativity index for drc index %x\n",
+ lmb->drc_index);
+ return aa_index;
+ }
+
+ lmb->aa_index = aa_index;
+ return dlpar_update_device_tree_lmb(lmb);
+}
+
+static int dlpar_remove_device_tree_lmb(struct of_drconf_cell *lmb)
+{
+ lmb->flags &= ~DRCONF_MEM_ASSIGNED;
+ lmb->aa_index = 0xffffffff;
+ return dlpar_update_device_tree_lmb(lmb);
+}
+
static struct memory_block *lmb_to_memblock(struct of_drconf_cell *lmb)
{
unsigned long section_nr;
@@ -243,8 +392,8 @@ static int dlpar_remove_lmb(struct of_drconf_cell *lmb)
memblock_remove(lmb->base_addr, block_sz);
dlpar_release_drc(lmb->drc_index);
+ dlpar_remove_device_tree_lmb(lmb);
- lmb->flags &= ~DRCONF_MEM_ASSIGNED;
return 0;
}
@@ -384,43 +533,32 @@ static int dlpar_memory_remove_by_index(u32 drc_index, struct property *prop)
#endif /* CONFIG_MEMORY_HOTREMOVE */
-static int dlpar_add_lmb(struct of_drconf_cell *lmb)
+static int dlpar_add_lmb_memory(struct of_drconf_cell *lmb)
{
struct memory_block *mem_block;
unsigned long block_sz;
int nid, rc;
- if (lmb->flags & DRCONF_MEM_ASSIGNED)
- return -EINVAL;
-
block_sz = memory_block_size_bytes();
- rc = dlpar_acquire_drc(lmb->drc_index);
- if (rc)
- return rc;
-
/* Find the node id for this address */
nid = memory_add_physaddr_to_nid(lmb->base_addr);
/* Add the memory */
rc = add_memory(nid, lmb->base_addr, block_sz);
- if (rc) {
- dlpar_release_drc(lmb->drc_index);
+ if (rc)
return rc;
- }
/* Register this block of memory */
rc = memblock_add(lmb->base_addr, block_sz);
if (rc) {
remove_memory(nid, lmb->base_addr, block_sz);
- dlpar_release_drc(lmb->drc_index);
return rc;
}
mem_block = lmb_to_memblock(lmb);
if (!mem_block) {
remove_memory(nid, lmb->base_addr, block_sz);
- dlpar_release_drc(lmb->drc_index);
return -EINVAL;
}
@@ -428,7 +566,6 @@ static int dlpar_add_lmb(struct of_drconf_cell *lmb)
put_device(&mem_block->dev);
if (rc) {
remove_memory(nid, lmb->base_addr, block_sz);
- dlpar_release_drc(lmb->drc_index);
return rc;
}
@@ -436,6 +573,34 @@ static int dlpar_add_lmb(struct of_drconf_cell *lmb)
return 0;
}
+static int dlpar_add_lmb(struct of_drconf_cell *lmb)
+{
+ int rc;
+
+ if (lmb->flags & DRCONF_MEM_ASSIGNED)
+ return -EINVAL;
+
+ rc = dlpar_acquire_drc(lmb->drc_index);
+ if (rc)
+ return rc;
+
+ rc = dlpar_add_device_tree_lmb(lmb);
+ if (rc) {
+ pr_err("Couldn't update device tree for drc index %x\n",
+ lmb->drc_index);
+ dlpar_release_drc(lmb->drc_index);
+ return rc;
+ }
+
+ rc = dlpar_add_lmb_memory(lmb);
+ if (rc) {
+ dlpar_remove_device_tree_lmb(lmb);
+ dlpar_release_drc(lmb->drc_index);
+ }
+
+ return rc;
+}
+
static int dlpar_memory_add_by_count(u32 lmbs_to_add, struct property *prop)
{
struct of_drconf_cell *lmbs;
@@ -536,31 +701,6 @@ static int dlpar_memory_add_by_index(u32 drc_index, struct property *prop)
return rc;
}
-static void dlpar_update_drconf_property(struct device_node *dn,
- struct property *prop)
-{
- struct of_drconf_cell *lmbs;
- u32 num_lmbs, *p;
- int i;
-
- /* Convert the property back to BE */
- p = prop->value;
- num_lmbs = *p;
- *p = cpu_to_be32(*p);
- p++;
-
- lmbs = (struct of_drconf_cell *)p;
- for (i = 0; i < num_lmbs; i++) {
- lmbs[i].base_addr = cpu_to_be64(lmbs[i].base_addr);
- lmbs[i].drc_index = cpu_to_be32(lmbs[i].drc_index);
- lmbs[i].flags = cpu_to_be32(lmbs[i].flags);
- }
-
- rtas_hp_event = true;
- of_update_property(dn, prop);
- rtas_hp_event = false;
-}
-
int dlpar_memory(struct pseries_hp_errorlog *hp_elog)
{
struct device_node *dn;
@@ -608,10 +748,7 @@ int dlpar_memory(struct pseries_hp_errorlog *hp_elog)
break;
}
- if (rc)
- dlpar_free_drconf_property(prop);
- else
- dlpar_update_drconf_property(dn, prop);
+ dlpar_free_drconf_property(prop);
dlpar_memory_out:
of_node_put(dn);
diff --git a/arch/powerpc/platforms/pseries/lpar.c b/arch/powerpc/platforms/pseries/lpar.c
index 2415a0d31..7f6100d91 100644
--- a/arch/powerpc/platforms/pseries/lpar.c
+++ b/arch/powerpc/platforms/pseries/lpar.c
@@ -89,18 +89,21 @@ void vpa_init(int cpu)
"%lx failed with %ld\n", cpu, hwcpu, addr, ret);
return;
}
+
+#ifdef CONFIG_PPC_STD_MMU_64
/*
* PAPR says this feature is SLB-Buffer but firmware never
* reports that. All SPLPAR support SLB shadow buffer.
*/
- addr = __pa(paca[cpu].slb_shadow_ptr);
- if (firmware_has_feature(FW_FEATURE_SPLPAR)) {
+ if (!radix_enabled() && firmware_has_feature(FW_FEATURE_SPLPAR)) {
+ addr = __pa(paca[cpu].slb_shadow_ptr);
ret = register_slb_shadow(hwcpu, addr);
if (ret)
pr_err("WARNING: SLB shadow buffer registration for "
"cpu %d (hw %d) of area %lx failed with %ld\n",
cpu, hwcpu, addr, ret);
}
+#endif /* CONFIG_PPC_STD_MMU_64 */
/*
* Register dispatch trace log, if one has been allocated.
@@ -123,6 +126,8 @@ void vpa_init(int cpu)
}
}
+#ifdef CONFIG_PPC_STD_MMU_64
+
static long pSeries_lpar_hpte_insert(unsigned long hpte_group,
unsigned long vpn, unsigned long pa,
unsigned long rflags, unsigned long vflags,
@@ -139,7 +144,7 @@ static long pSeries_lpar_hpte_insert(unsigned long hpte_group,
hpte_group, vpn, pa, rflags, vflags, psize);
hpte_v = hpte_encode_v(vpn, psize, apsize, ssize) | vflags | HPTE_V_VALID;
- hpte_r = hpte_encode_r(pa, psize, apsize) | rflags;
+ hpte_r = hpte_encode_r(pa, psize, apsize, ssize) | rflags;
if (!(vflags & HPTE_V_BOLTED))
pr_devel(" hpte_v=%016lx, hpte_r=%016lx\n", hpte_v, hpte_r);
@@ -152,10 +157,6 @@ static long pSeries_lpar_hpte_insert(unsigned long hpte_group,
/* Exact = 0 */
flags = 0;
- /* Make pHyp happy */
- if ((rflags & _PAGE_NO_CACHE) && !(rflags & _PAGE_WRITETHRU))
- hpte_r &= ~HPTE_R_M;
-
if (firmware_has_feature(FW_FEATURE_XCMO) && !(hpte_r & HPTE_R_N))
flags |= H_COALESCE_CAND;
@@ -659,6 +660,8 @@ static void pSeries_set_page_state(struct page *page, int order,
void arch_free_page(struct page *page, int order)
{
+ if (radix_enabled())
+ return;
if (!cmo_free_hint_flag || !firmware_has_feature(FW_FEATURE_CMO))
return;
@@ -666,7 +669,8 @@ void arch_free_page(struct page *page, int order)
}
EXPORT_SYMBOL(arch_free_page);
-#endif
+#endif /* CONFIG_PPC_SMLPAR */
+#endif /* CONFIG_PPC_STD_MMU_64 */
#ifdef CONFIG_TRACEPOINTS
#ifdef HAVE_JUMP_LABEL
diff --git a/arch/powerpc/platforms/pseries/lparcfg.c b/arch/powerpc/platforms/pseries/lparcfg.c
index c9fecf09b..afa05a2cb 100644
--- a/arch/powerpc/platforms/pseries/lparcfg.c
+++ b/arch/powerpc/platforms/pseries/lparcfg.c
@@ -484,8 +484,9 @@ static int pseries_lparcfg_data(struct seq_file *m, void *v)
seq_printf(m, "shared_processor_mode=%d\n",
lppaca_shared_proc(get_lppaca()));
+#ifdef CONFIG_PPC_STD_MMU_64
seq_printf(m, "slb_size=%d\n", mmu_slb_size);
-
+#endif
parse_em_data(m);
return 0;
diff --git a/arch/powerpc/platforms/pseries/mobility.c b/arch/powerpc/platforms/pseries/mobility.c
index ceb18d349..a560a98bc 100644
--- a/arch/powerpc/platforms/pseries/mobility.c
+++ b/arch/powerpc/platforms/pseries/mobility.c
@@ -191,8 +191,8 @@ static int update_dt_node(__be32 phandle, s32 scope)
break;
case 0x80000000:
- prop = of_find_property(dn, prop_name, NULL);
- of_remove_property(dn, prop);
+ of_remove_property(dn, of_find_property(dn,
+ prop_name, NULL));
prop = NULL;
break;
diff --git a/arch/powerpc/platforms/pseries/msi.c b/arch/powerpc/platforms/pseries/msi.c
index 272e9ec1a..543a6386f 100644
--- a/arch/powerpc/platforms/pseries/msi.c
+++ b/arch/powerpc/platforms/pseries/msi.c
@@ -305,7 +305,7 @@ static int msi_quota_for_device(struct pci_dev *dev, int request)
memset(&counts, 0, sizeof(struct msi_counts));
/* Work out how many devices we have below this PE */
- traverse_pci_devices(pe_dn, count_non_bridge_devices, &counts);
+ pci_traverse_device_nodes(pe_dn, count_non_bridge_devices, &counts);
if (counts.num_devices == 0) {
pr_err("rtas_msi: found 0 devices under PE for %s\n",
@@ -320,7 +320,7 @@ static int msi_quota_for_device(struct pci_dev *dev, int request)
/* else, we have some more calculating to do */
counts.requestor = pci_device_to_OF_node(dev);
counts.request = request;
- traverse_pci_devices(pe_dn, count_spare_msis, &counts);
+ pci_traverse_device_nodes(pe_dn, count_spare_msis, &counts);
/* If the quota isn't an integer multiple of the total, we can
* use the remainder as spare MSIs for anyone that wants them. */
diff --git a/arch/powerpc/platforms/pseries/pci_dlpar.c b/arch/powerpc/platforms/pseries/pci_dlpar.c
index 5d4a3df59..906dbaa97 100644
--- a/arch/powerpc/platforms/pseries/pci_dlpar.c
+++ b/arch/powerpc/platforms/pseries/pci_dlpar.c
@@ -34,38 +34,6 @@
#include "pseries.h"
-static struct pci_bus *
-find_bus_among_children(struct pci_bus *bus,
- struct device_node *dn)
-{
- struct pci_bus *child = NULL;
- struct pci_bus *tmp;
- struct device_node *busdn;
-
- busdn = pci_bus_to_OF_node(bus);
- if (busdn == dn)
- return bus;
-
- list_for_each_entry(tmp, &bus->children, node) {
- child = find_bus_among_children(tmp, dn);
- if (child)
- break;
- };
- return child;
-}
-
-struct pci_bus *
-pcibios_find_pci_bus(struct device_node *dn)
-{
- struct pci_dn *pdn = dn->data;
-
- if (!pdn || !pdn->phb || !pdn->phb->bus)
- return NULL;
-
- return find_bus_among_children(pdn->phb->bus, dn);
-}
-EXPORT_SYMBOL_GPL(pcibios_find_pci_bus);
-
struct pci_controller *init_phb_dynamic(struct device_node *dn)
{
struct pci_controller *phb;
diff --git a/arch/powerpc/platforms/pseries/reconfig.c b/arch/powerpc/platforms/pseries/reconfig.c
index 7c7fcc042..cc66c49f0 100644
--- a/arch/powerpc/platforms/pseries/reconfig.c
+++ b/arch/powerpc/platforms/pseries/reconfig.c
@@ -303,7 +303,6 @@ static int do_remove_property(char *buf, size_t bufsize)
{
struct device_node *np;
char *tmp;
- struct property *prop;
buf = parse_node(buf, bufsize, &np);
if (!np)
@@ -316,9 +315,7 @@ static int do_remove_property(char *buf, size_t bufsize)
if (strlen(buf) == 0)
return -EINVAL;
- prop = of_find_property(np, buf, NULL);
-
- return of_remove_property(np, prop);
+ return of_remove_property(np, of_find_property(np, buf, NULL));
}
static int do_update_property(char *buf, size_t bufsize)
diff --git a/arch/powerpc/platforms/pseries/setup.c b/arch/powerpc/platforms/pseries/setup.c
index 6e944fc6e..9883bc7ea 100644
--- a/arch/powerpc/platforms/pseries/setup.c
+++ b/arch/powerpc/platforms/pseries/setup.c
@@ -235,6 +235,8 @@ static void __init pseries_discover_pic(void)
for_each_node_by_name(np, "interrupt-controller") {
typep = of_get_property(np, "compatible", NULL);
+ if (!typep)
+ continue;
if (strstr(typep, "open-pic")) {
pSeries_mpic_node = of_node_get(np);
ppc_md.init_IRQ = pseries_mpic_init_IRQ;
@@ -265,7 +267,7 @@ static int pci_dn_reconfig_notifier(struct notifier_block *nb, unsigned long act
pdn = parent ? PCI_DN(parent) : NULL;
if (pdn) {
/* Create pdn and EEH device */
- update_dn_pci_info(np, pdn->phb);
+ pci_add_device_node_info(pdn->phb, np);
eeh_dev_init(PCI_DN(np), pdn->phb);
}