summaryrefslogtreecommitdiff
path: root/arch/powerpc/platforms/powernv
diff options
context:
space:
mode:
Diffstat (limited to 'arch/powerpc/platforms/powernv')
-rw-r--r--arch/powerpc/platforms/powernv/Makefile2
-rw-r--r--arch/powerpc/platforms/powernv/eeh-powernv.c298
-rw-r--r--arch/powerpc/platforms/powernv/idle.c6
-rw-r--r--arch/powerpc/platforms/powernv/npu-dma.c2
-rw-r--r--arch/powerpc/platforms/powernv/opal-msglog.c34
-rw-r--r--arch/powerpc/platforms/powernv/opal.c7
-rw-r--r--arch/powerpc/platforms/powernv/pci-ioda.c347
-rw-r--r--arch/powerpc/platforms/powernv/pci-p5ioc2.c271
-rw-r--r--arch/powerpc/platforms/powernv/pci.c17
-rw-r--r--arch/powerpc/platforms/powernv/pci.h152
-rw-r--r--arch/powerpc/platforms/powernv/subcore.c2
11 files changed, 553 insertions, 585 deletions
diff --git a/arch/powerpc/platforms/powernv/Makefile b/arch/powerpc/platforms/powernv/Makefile
index f1516b5ec..cd9711e72 100644
--- a/arch/powerpc/platforms/powernv/Makefile
+++ b/arch/powerpc/platforms/powernv/Makefile
@@ -5,7 +5,7 @@ obj-y += opal-msglog.o opal-hmi.o opal-power.o opal-irqchip.o
obj-y += opal-kmsg.o
obj-$(CONFIG_SMP) += smp.o subcore.o subcore-asm.o
-obj-$(CONFIG_PCI) += pci.o pci-p5ioc2.o pci-ioda.o npu-dma.o
+obj-$(CONFIG_PCI) += pci.o pci-ioda.o npu-dma.o
obj-$(CONFIG_EEH) += eeh-powernv.o
obj-$(CONFIG_PPC_SCOM) += opal-xscom.o
obj-$(CONFIG_MEMORY_FAILURE) += opal-memory-errors.o
diff --git a/arch/powerpc/platforms/powernv/eeh-powernv.c b/arch/powerpc/platforms/powernv/eeh-powernv.c
index 87f47e55a..950b3e539 100644
--- a/arch/powerpc/platforms/powernv/eeh-powernv.c
+++ b/arch/powerpc/platforms/powernv/eeh-powernv.c
@@ -167,42 +167,26 @@ static int pnv_eeh_dbgfs_get(void *data, int offset, u64 *val)
return 0;
}
-static int pnv_eeh_outb_dbgfs_set(void *data, u64 val)
-{
- return pnv_eeh_dbgfs_set(data, 0xD10, val);
-}
-
-static int pnv_eeh_outb_dbgfs_get(void *data, u64 *val)
-{
- return pnv_eeh_dbgfs_get(data, 0xD10, val);
-}
-
-static int pnv_eeh_inbA_dbgfs_set(void *data, u64 val)
-{
- return pnv_eeh_dbgfs_set(data, 0xD90, val);
-}
-
-static int pnv_eeh_inbA_dbgfs_get(void *data, u64 *val)
-{
- return pnv_eeh_dbgfs_get(data, 0xD90, val);
-}
-
-static int pnv_eeh_inbB_dbgfs_set(void *data, u64 val)
-{
- return pnv_eeh_dbgfs_set(data, 0xE10, val);
-}
+#define PNV_EEH_DBGFS_ENTRY(name, reg) \
+static int pnv_eeh_dbgfs_set_##name(void *data, u64 val) \
+{ \
+ return pnv_eeh_dbgfs_set(data, reg, val); \
+} \
+ \
+static int pnv_eeh_dbgfs_get_##name(void *data, u64 *val) \
+{ \
+ return pnv_eeh_dbgfs_get(data, reg, val); \
+} \
+ \
+DEFINE_SIMPLE_ATTRIBUTE(pnv_eeh_dbgfs_ops_##name, \
+ pnv_eeh_dbgfs_get_##name, \
+ pnv_eeh_dbgfs_set_##name, \
+ "0x%llx\n")
+
+PNV_EEH_DBGFS_ENTRY(outb, 0xD10);
+PNV_EEH_DBGFS_ENTRY(inbA, 0xD90);
+PNV_EEH_DBGFS_ENTRY(inbB, 0xE10);
-static int pnv_eeh_inbB_dbgfs_get(void *data, u64 *val)
-{
- return pnv_eeh_dbgfs_get(data, 0xE10, val);
-}
-
-DEFINE_SIMPLE_ATTRIBUTE(pnv_eeh_outb_dbgfs_ops, pnv_eeh_outb_dbgfs_get,
- pnv_eeh_outb_dbgfs_set, "0x%llx\n");
-DEFINE_SIMPLE_ATTRIBUTE(pnv_eeh_inbA_dbgfs_ops, pnv_eeh_inbA_dbgfs_get,
- pnv_eeh_inbA_dbgfs_set, "0x%llx\n");
-DEFINE_SIMPLE_ATTRIBUTE(pnv_eeh_inbB_dbgfs_ops, pnv_eeh_inbB_dbgfs_get,
- pnv_eeh_inbB_dbgfs_set, "0x%llx\n");
#endif /* CONFIG_DEBUG_FS */
/**
@@ -268,13 +252,13 @@ static int pnv_eeh_post_init(void)
debugfs_create_file("err_injct_outbound", 0600,
phb->dbgfs, hose,
- &pnv_eeh_outb_dbgfs_ops);
+ &pnv_eeh_dbgfs_ops_outb);
debugfs_create_file("err_injct_inboundA", 0600,
phb->dbgfs, hose,
- &pnv_eeh_inbA_dbgfs_ops);
+ &pnv_eeh_dbgfs_ops_inbA);
debugfs_create_file("err_injct_inboundB", 0600,
phb->dbgfs, hose,
- &pnv_eeh_inbB_dbgfs_ops);
+ &pnv_eeh_dbgfs_ops_inbB);
#endif /* CONFIG_DEBUG_FS */
}
@@ -387,6 +371,7 @@ static void *pnv_eeh_probe(struct pci_dn *pdn, void *data)
edev->mode &= 0xFFFFFF00;
edev->pcix_cap = pnv_eeh_find_cap(pdn, PCI_CAP_ID_PCIX);
edev->pcie_cap = pnv_eeh_find_cap(pdn, PCI_CAP_ID_EXP);
+ edev->af_cap = pnv_eeh_find_cap(pdn, PCI_CAP_ID_AF);
edev->aer_cap = pnv_eeh_find_ecap(pdn, PCI_EXT_CAP_ID_ERR);
if ((edev->class_code >> 8) == PCI_CLASS_BRIDGE_PCI) {
edev->mode |= EEH_DEV_BRIDGE;
@@ -895,6 +880,120 @@ void pnv_pci_reset_secondary_bus(struct pci_dev *dev)
}
}
+static void pnv_eeh_wait_for_pending(struct pci_dn *pdn, const char *type,
+ int pos, u16 mask)
+{
+ struct eeh_dev *edev = pdn_to_eeh_dev(pdn);
+ int i, status = 0;
+
+ /* Wait for Transaction Pending bit to be cleared */
+ for (i = 0; i < 4; i++) {
+ eeh_ops->read_config(pdn, pos, 2, &status);
+ if (!(status & mask))
+ return;
+
+ msleep((1 << i) * 100);
+ }
+
+ pr_warn("%s: Pending transaction while issuing %sFLR to %04x:%02x:%02x.%01x\n",
+ __func__, type,
+ edev->phb->global_number, pdn->busno,
+ PCI_SLOT(pdn->devfn), PCI_FUNC(pdn->devfn));
+}
+
+static int pnv_eeh_do_flr(struct pci_dn *pdn, int option)
+{
+ struct eeh_dev *edev = pdn_to_eeh_dev(pdn);
+ u32 reg = 0;
+
+ if (WARN_ON(!edev->pcie_cap))
+ return -ENOTTY;
+
+ eeh_ops->read_config(pdn, edev->pcie_cap + PCI_EXP_DEVCAP, 4, &reg);
+ if (!(reg & PCI_EXP_DEVCAP_FLR))
+ return -ENOTTY;
+
+ switch (option) {
+ case EEH_RESET_HOT:
+ case EEH_RESET_FUNDAMENTAL:
+ pnv_eeh_wait_for_pending(pdn, "",
+ edev->pcie_cap + PCI_EXP_DEVSTA,
+ PCI_EXP_DEVSTA_TRPND);
+ eeh_ops->read_config(pdn, edev->pcie_cap + PCI_EXP_DEVCTL,
+ 4, &reg);
+ reg |= PCI_EXP_DEVCTL_BCR_FLR;
+ eeh_ops->write_config(pdn, edev->pcie_cap + PCI_EXP_DEVCTL,
+ 4, reg);
+ msleep(EEH_PE_RST_HOLD_TIME);
+ break;
+ case EEH_RESET_DEACTIVATE:
+ eeh_ops->read_config(pdn, edev->pcie_cap + PCI_EXP_DEVCTL,
+ 4, &reg);
+ reg &= ~PCI_EXP_DEVCTL_BCR_FLR;
+ eeh_ops->write_config(pdn, edev->pcie_cap + PCI_EXP_DEVCTL,
+ 4, reg);
+ msleep(EEH_PE_RST_SETTLE_TIME);
+ break;
+ }
+
+ return 0;
+}
+
+static int pnv_eeh_do_af_flr(struct pci_dn *pdn, int option)
+{
+ struct eeh_dev *edev = pdn_to_eeh_dev(pdn);
+ u32 cap = 0;
+
+ if (WARN_ON(!edev->af_cap))
+ return -ENOTTY;
+
+ eeh_ops->read_config(pdn, edev->af_cap + PCI_AF_CAP, 1, &cap);
+ if (!(cap & PCI_AF_CAP_TP) || !(cap & PCI_AF_CAP_FLR))
+ return -ENOTTY;
+
+ switch (option) {
+ case EEH_RESET_HOT:
+ case EEH_RESET_FUNDAMENTAL:
+ /*
+ * Wait for Transaction Pending bit to clear. A word-aligned
+ * test is used, so we use the conrol offset rather than status
+ * and shift the test bit to match.
+ */
+ pnv_eeh_wait_for_pending(pdn, "AF",
+ edev->af_cap + PCI_AF_CTRL,
+ PCI_AF_STATUS_TP << 8);
+ eeh_ops->write_config(pdn, edev->af_cap + PCI_AF_CTRL,
+ 1, PCI_AF_CTRL_FLR);
+ msleep(EEH_PE_RST_HOLD_TIME);
+ break;
+ case EEH_RESET_DEACTIVATE:
+ eeh_ops->write_config(pdn, edev->af_cap + PCI_AF_CTRL, 1, 0);
+ msleep(EEH_PE_RST_SETTLE_TIME);
+ break;
+ }
+
+ return 0;
+}
+
+static int pnv_eeh_reset_vf_pe(struct eeh_pe *pe, int option)
+{
+ struct eeh_dev *edev;
+ struct pci_dn *pdn;
+ int ret;
+
+ /* The VF PE should have only one child device */
+ edev = list_first_entry_or_null(&pe->edevs, struct eeh_dev, list);
+ pdn = eeh_dev_to_pdn(edev);
+ if (!pdn)
+ return -ENXIO;
+
+ ret = pnv_eeh_do_flr(pdn, option);
+ if (!ret)
+ return ret;
+
+ return pnv_eeh_do_af_flr(pdn, option);
+}
+
/**
* pnv_eeh_reset - Reset the specified PE
* @pe: EEH PE
@@ -956,7 +1055,9 @@ static int pnv_eeh_reset(struct eeh_pe *pe, int option)
}
bus = eeh_pe_bus_get(pe);
- if (pci_is_root_bus(bus) ||
+ if (pe->type & EEH_PE_VF)
+ ret = pnv_eeh_reset_vf_pe(pe, option);
+ else if (pci_is_root_bus(bus) ||
pci_is_root_bus(bus->parent))
ret = pnv_eeh_root_reset(hose, option);
else
@@ -1095,6 +1196,14 @@ static inline bool pnv_eeh_cfg_blocked(struct pci_dn *pdn)
if (!edev || !edev->pe)
return false;
+ /*
+ * We will issue FLR or AF FLR to all VFs, which are contained
+ * in VF PE. It relies on the EEH PCI config accessors. So we
+ * can't block them during the window.
+ */
+ if (edev->physfn && (edev->pe->state & EEH_PE_RESET))
+ return false;
+
if (edev->pe->state & EEH_PE_CFG_BLOCKED)
return true;
@@ -1479,6 +1588,65 @@ static int pnv_eeh_next_error(struct eeh_pe **pe)
return ret;
}
+static int pnv_eeh_restore_vf_config(struct pci_dn *pdn)
+{
+ struct eeh_dev *edev = pdn_to_eeh_dev(pdn);
+ u32 devctl, cmd, cap2, aer_capctl;
+ int old_mps;
+
+ if (edev->pcie_cap) {
+ /* Restore MPS */
+ old_mps = (ffs(pdn->mps) - 8) << 5;
+ eeh_ops->read_config(pdn, edev->pcie_cap + PCI_EXP_DEVCTL,
+ 2, &devctl);
+ devctl &= ~PCI_EXP_DEVCTL_PAYLOAD;
+ devctl |= old_mps;
+ eeh_ops->write_config(pdn, edev->pcie_cap + PCI_EXP_DEVCTL,
+ 2, devctl);
+
+ /* Disable Completion Timeout */
+ eeh_ops->read_config(pdn, edev->pcie_cap + PCI_EXP_DEVCAP2,
+ 4, &cap2);
+ if (cap2 & 0x10) {
+ eeh_ops->read_config(pdn,
+ edev->pcie_cap + PCI_EXP_DEVCTL2,
+ 4, &cap2);
+ cap2 |= 0x10;
+ eeh_ops->write_config(pdn,
+ edev->pcie_cap + PCI_EXP_DEVCTL2,
+ 4, cap2);
+ }
+ }
+
+ /* Enable SERR and parity checking */
+ eeh_ops->read_config(pdn, PCI_COMMAND, 2, &cmd);
+ cmd |= (PCI_COMMAND_PARITY | PCI_COMMAND_SERR);
+ eeh_ops->write_config(pdn, PCI_COMMAND, 2, cmd);
+
+ /* Enable report various errors */
+ if (edev->pcie_cap) {
+ eeh_ops->read_config(pdn, edev->pcie_cap + PCI_EXP_DEVCTL,
+ 2, &devctl);
+ devctl &= ~PCI_EXP_DEVCTL_CERE;
+ devctl |= (PCI_EXP_DEVCTL_NFERE |
+ PCI_EXP_DEVCTL_FERE |
+ PCI_EXP_DEVCTL_URRE);
+ eeh_ops->write_config(pdn, edev->pcie_cap + PCI_EXP_DEVCTL,
+ 2, devctl);
+ }
+
+ /* Enable ECRC generation and check */
+ if (edev->pcie_cap && edev->aer_cap) {
+ eeh_ops->read_config(pdn, edev->aer_cap + PCI_ERR_CAP,
+ 4, &aer_capctl);
+ aer_capctl |= (PCI_ERR_CAP_ECRC_GENE | PCI_ERR_CAP_ECRC_CHKE);
+ eeh_ops->write_config(pdn, edev->aer_cap + PCI_ERR_CAP,
+ 4, aer_capctl);
+ }
+
+ return 0;
+}
+
static int pnv_eeh_restore_config(struct pci_dn *pdn)
{
struct eeh_dev *edev = pdn_to_eeh_dev(pdn);
@@ -1488,9 +1656,21 @@ static int pnv_eeh_restore_config(struct pci_dn *pdn)
if (!edev)
return -EEXIST;
- phb = edev->phb->private_data;
- ret = opal_pci_reinit(phb->opal_id,
- OPAL_REINIT_PCI_DEV, edev->config_addr);
+ /*
+ * We have to restore the PCI config space after reset since the
+ * firmware can't see SRIOV VFs.
+ *
+ * FIXME: The MPS, error routing rules, timeout setting are worthy
+ * to be exported by firmware in extendible way.
+ */
+ if (edev->physfn) {
+ ret = pnv_eeh_restore_vf_config(pdn);
+ } else {
+ phb = edev->phb->private_data;
+ ret = opal_pci_reinit(phb->opal_id,
+ OPAL_REINIT_PCI_DEV, edev->config_addr);
+ }
+
if (ret) {
pr_warn("%s: Can't reinit PCI dev 0x%x (%lld)\n",
__func__, edev->config_addr, ret);
@@ -1519,6 +1699,40 @@ static struct eeh_ops pnv_eeh_ops = {
.restore_config = pnv_eeh_restore_config
};
+void pcibios_bus_add_device(struct pci_dev *pdev)
+{
+ struct pci_dn *pdn = pci_get_pdn(pdev);
+
+ if (!pdev->is_virtfn)
+ return;
+
+ /*
+ * The following operations will fail if VF's sysfs files
+ * aren't created or its resources aren't finalized.
+ */
+ eeh_add_device_early(pdn);
+ eeh_add_device_late(pdev);
+ eeh_sysfs_add_device(pdev);
+}
+
+#ifdef CONFIG_PCI_IOV
+static void pnv_pci_fixup_vf_mps(struct pci_dev *pdev)
+{
+ struct pci_dn *pdn = pci_get_pdn(pdev);
+ int parent_mps;
+
+ if (!pdev->is_virtfn)
+ return;
+
+ /* Synchronize MPS for VF and PF */
+ parent_mps = pcie_get_mps(pdev->physfn);
+ if ((128 << pdev->pcie_mpss) >= parent_mps)
+ pcie_set_mps(pdev, parent_mps);
+ pdn->mps = pcie_get_mps(pdev);
+}
+DECLARE_PCI_FIXUP_HEADER(PCI_ANY_ID, PCI_ANY_ID, pnv_pci_fixup_vf_mps);
+#endif /* CONFIG_PCI_IOV */
+
/**
* eeh_powernv_init - Register platform dependent EEH operations
*
diff --git a/arch/powerpc/platforms/powernv/idle.c b/arch/powerpc/platforms/powernv/idle.c
index 15bfbcd5d..fcc8b6861 100644
--- a/arch/powerpc/platforms/powernv/idle.c
+++ b/arch/powerpc/platforms/powernv/idle.c
@@ -35,9 +35,9 @@ int pnv_save_sprs_for_winkle(void)
int rc;
/*
- * hid0, hid1, hid4, hid5, hmeer and lpcr values are symmetric accross
+ * hid0, hid1, hid4, hid5, hmeer and lpcr values are symmetric across
* all cpus at boot. Get these reg values of current cpu and use the
- * same accross all cpus.
+ * same across all cpus.
*/
uint64_t lpcr_val = mfspr(SPRN_LPCR) & ~(u64)LPCR_PECE1;
uint64_t hid0_val = mfspr(SPRN_HID0);
@@ -185,7 +185,7 @@ static ssize_t store_fastsleep_workaround_applyonce(struct device *dev,
* fastsleep workaround needs to be left in 'applied' state on all
* the cores. Do this by-
* 1. Patching out the call to 'undo' workaround in fastsleep exit path
- * 2. Sending ipi to all the cores which have atleast one online thread
+ * 2. Sending ipi to all the cores which have at least one online thread
* 3. Patching out the call to 'apply' workaround in fastsleep entry
* path
* There is no need to send ipi to cores which have all threads
diff --git a/arch/powerpc/platforms/powernv/npu-dma.c b/arch/powerpc/platforms/powernv/npu-dma.c
index e85aa900f..7229acd9b 100644
--- a/arch/powerpc/platforms/powernv/npu-dma.c
+++ b/arch/powerpc/platforms/powernv/npu-dma.c
@@ -278,7 +278,7 @@ static void pnv_npu_disable_bypass(struct pnv_ioda_pe *npe)
/*
* Enable/disable bypass mode on the NPU. The NPU only supports one
- * window per link, so bypass needs to be explicity enabled or
+ * window per link, so bypass needs to be explicitly enabled or
* disabled. Unlike for a PHB3 bypass and non-bypass modes can't be
* active at the same time.
*/
diff --git a/arch/powerpc/platforms/powernv/opal-msglog.c b/arch/powerpc/platforms/powernv/opal-msglog.c
index 44ed78af1..39d6ff9e5 100644
--- a/arch/powerpc/platforms/powernv/opal-msglog.c
+++ b/arch/powerpc/platforms/powernv/opal-msglog.c
@@ -31,26 +31,25 @@ struct memcons {
__be32 in_cons;
};
-static ssize_t opal_msglog_read(struct file *file, struct kobject *kobj,
- struct bin_attribute *bin_attr, char *to,
- loff_t pos, size_t count)
+static struct memcons *opal_memcons = NULL;
+
+ssize_t opal_msglog_copy(char *to, loff_t pos, size_t count)
{
- struct memcons *mc = bin_attr->private;
const char *conbuf;
ssize_t ret;
size_t first_read = 0;
uint32_t out_pos, avail;
- if (!mc)
+ if (!opal_memcons)
return -ENODEV;
- out_pos = be32_to_cpu(ACCESS_ONCE(mc->out_pos));
+ out_pos = be32_to_cpu(ACCESS_ONCE(opal_memcons->out_pos));
/* Now we've read out_pos, put a barrier in before reading the new
* data it points to in conbuf. */
smp_rmb();
- conbuf = phys_to_virt(be64_to_cpu(mc->obuf_phys));
+ conbuf = phys_to_virt(be64_to_cpu(opal_memcons->obuf_phys));
/* When the buffer has wrapped, read from the out_pos marker to the end
* of the buffer, and then read the remaining data as in the un-wrapped
@@ -58,7 +57,7 @@ static ssize_t opal_msglog_read(struct file *file, struct kobject *kobj,
if (out_pos & MEMCONS_OUT_POS_WRAP) {
out_pos &= MEMCONS_OUT_POS_MASK;
- avail = be32_to_cpu(mc->obuf_size) - out_pos;
+ avail = be32_to_cpu(opal_memcons->obuf_size) - out_pos;
ret = memory_read_from_buffer(to, count, &pos,
conbuf + out_pos, avail);
@@ -76,7 +75,7 @@ static ssize_t opal_msglog_read(struct file *file, struct kobject *kobj,
}
/* Sanity check. The firmware should not do this to us. */
- if (out_pos > be32_to_cpu(mc->obuf_size)) {
+ if (out_pos > be32_to_cpu(opal_memcons->obuf_size)) {
pr_err("OPAL: memory console corruption. Aborting read.\n");
return -EINVAL;
}
@@ -91,6 +90,13 @@ out:
return ret;
}
+static ssize_t opal_msglog_read(struct file *file, struct kobject *kobj,
+ struct bin_attribute *bin_attr, char *to,
+ loff_t pos, size_t count)
+{
+ return opal_msglog_copy(to, pos, count);
+}
+
static struct bin_attribute opal_msglog_attr = {
.attr = {.name = "msglog", .mode = 0444},
.read = opal_msglog_read
@@ -117,7 +123,15 @@ void __init opal_msglog_init(void)
return;
}
- opal_msglog_attr.private = mc;
+ opal_memcons = mc;
+}
+
+void __init opal_msglog_sysfs_init(void)
+{
+ if (!opal_memcons) {
+ pr_warn("OPAL: message log initialisation failed, not creating sysfs entry\n");
+ return;
+ }
if (sysfs_create_bin_file(opal_kobj, &opal_msglog_attr) != 0)
pr_warn("OPAL: sysfs file creation failed\n");
diff --git a/arch/powerpc/platforms/powernv/opal.c b/arch/powerpc/platforms/powernv/opal.c
index 4e0da5af9..0256d0729 100644
--- a/arch/powerpc/platforms/powernv/opal.c
+++ b/arch/powerpc/platforms/powernv/opal.c
@@ -724,6 +724,9 @@ static int __init opal_init(void)
of_node_put(leds);
}
+ /* Initialise OPAL message log interface */
+ opal_msglog_init();
+
/* Create "opal" kobject under /sys/firmware */
rc = opal_sysfs_init();
if (rc == 0) {
@@ -739,8 +742,8 @@ static int __init opal_init(void)
opal_platform_dump_init();
/* Setup system parameters interface */
opal_sys_param_init();
- /* Setup message log interface. */
- opal_msglog_init();
+ /* Setup message log sysfs interface. */
+ opal_msglog_sysfs_init();
}
/* Initialize platform devices: IPMI backend, PRD & flash interface */
diff --git a/arch/powerpc/platforms/powernv/pci-ioda.c b/arch/powerpc/platforms/powernv/pci-ioda.c
index f90dc0439..c5baaf3cc 100644
--- a/arch/powerpc/platforms/powernv/pci-ioda.c
+++ b/arch/powerpc/platforms/powernv/pci-ioda.c
@@ -872,9 +872,6 @@ static int pnv_pci_vf_resource_shift(struct pci_dev *dev, int offset)
if (!res->flags || !res->parent)
continue;
- if (!pnv_pci_is_mem_pref_64(res->flags))
- continue;
-
/*
* The actual IOV BAR range is determined by the start address
* and the actual size for num_vfs VFs BAR. This check is to
@@ -903,9 +900,6 @@ static int pnv_pci_vf_resource_shift(struct pci_dev *dev, int offset)
if (!res->flags || !res->parent)
continue;
- if (!pnv_pci_is_mem_pref_64(res->flags))
- continue;
-
size = pci_iov_resource_size(dev, i + PCI_IOV_RESOURCES);
res2 = *res;
res->start += size * offset;
@@ -1196,29 +1190,36 @@ static void pnv_pci_ioda_setup_PEs(void)
}
#ifdef CONFIG_PCI_IOV
-static int pnv_pci_vf_release_m64(struct pci_dev *pdev)
+static int pnv_pci_vf_release_m64(struct pci_dev *pdev, u16 num_vfs)
{
struct pci_bus *bus;
struct pci_controller *hose;
struct pnv_phb *phb;
struct pci_dn *pdn;
int i, j;
+ int m64_bars;
bus = pdev->bus;
hose = pci_bus_to_host(bus);
phb = hose->private_data;
pdn = pci_get_pdn(pdev);
+ if (pdn->m64_single_mode)
+ m64_bars = num_vfs;
+ else
+ m64_bars = 1;
+
for (i = 0; i < PCI_SRIOV_NUM_BARS; i++)
- for (j = 0; j < M64_PER_IOV; j++) {
- if (pdn->m64_wins[i][j] == IODA_INVALID_M64)
+ for (j = 0; j < m64_bars; j++) {
+ if (pdn->m64_map[j][i] == IODA_INVALID_M64)
continue;
opal_pci_phb_mmio_enable(phb->opal_id,
- OPAL_M64_WINDOW_TYPE, pdn->m64_wins[i][j], 0);
- clear_bit(pdn->m64_wins[i][j], &phb->ioda.m64_bar_alloc);
- pdn->m64_wins[i][j] = IODA_INVALID_M64;
+ OPAL_M64_WINDOW_TYPE, pdn->m64_map[j][i], 0);
+ clear_bit(pdn->m64_map[j][i], &phb->ioda.m64_bar_alloc);
+ pdn->m64_map[j][i] = IODA_INVALID_M64;
}
+ kfree(pdn->m64_map);
return 0;
}
@@ -1235,8 +1236,7 @@ static int pnv_pci_vf_assign_m64(struct pci_dev *pdev, u16 num_vfs)
int total_vfs;
resource_size_t size, start;
int pe_num;
- int vf_groups;
- int vf_per_group;
+ int m64_bars;
bus = pdev->bus;
hose = pci_bus_to_host(bus);
@@ -1244,29 +1244,26 @@ static int pnv_pci_vf_assign_m64(struct pci_dev *pdev, u16 num_vfs)
pdn = pci_get_pdn(pdev);
total_vfs = pci_sriov_get_totalvfs(pdev);
- /* Initialize the m64_wins to IODA_INVALID_M64 */
- for (i = 0; i < PCI_SRIOV_NUM_BARS; i++)
- for (j = 0; j < M64_PER_IOV; j++)
- pdn->m64_wins[i][j] = IODA_INVALID_M64;
+ if (pdn->m64_single_mode)
+ m64_bars = num_vfs;
+ else
+ m64_bars = 1;
+
+ pdn->m64_map = kmalloc(sizeof(*pdn->m64_map) * m64_bars, GFP_KERNEL);
+ if (!pdn->m64_map)
+ return -ENOMEM;
+ /* Initialize the m64_map to IODA_INVALID_M64 */
+ for (i = 0; i < m64_bars ; i++)
+ for (j = 0; j < PCI_SRIOV_NUM_BARS; j++)
+ pdn->m64_map[i][j] = IODA_INVALID_M64;
- if (pdn->m64_per_iov == M64_PER_IOV) {
- vf_groups = (num_vfs <= M64_PER_IOV) ? num_vfs: M64_PER_IOV;
- vf_per_group = (num_vfs <= M64_PER_IOV)? 1:
- roundup_pow_of_two(num_vfs) / pdn->m64_per_iov;
- } else {
- vf_groups = 1;
- vf_per_group = 1;
- }
for (i = 0; i < PCI_SRIOV_NUM_BARS; i++) {
res = &pdev->resource[i + PCI_IOV_RESOURCES];
if (!res->flags || !res->parent)
continue;
- if (!pnv_pci_is_mem_pref_64(res->flags))
- continue;
-
- for (j = 0; j < vf_groups; j++) {
+ for (j = 0; j < m64_bars; j++) {
do {
win = find_next_zero_bit(&phb->ioda.m64_bar_alloc,
phb->ioda.m64_bar_idx + 1, 0);
@@ -1275,12 +1272,11 @@ static int pnv_pci_vf_assign_m64(struct pci_dev *pdev, u16 num_vfs)
goto m64_failed;
} while (test_and_set_bit(win, &phb->ioda.m64_bar_alloc));
- pdn->m64_wins[i][j] = win;
+ pdn->m64_map[j][i] = win;
- if (pdn->m64_per_iov == M64_PER_IOV) {
+ if (pdn->m64_single_mode) {
size = pci_iov_resource_size(pdev,
PCI_IOV_RESOURCES + i);
- size = size * vf_per_group;
start = res->start + size * j;
} else {
size = resource_size(res);
@@ -1288,16 +1284,16 @@ static int pnv_pci_vf_assign_m64(struct pci_dev *pdev, u16 num_vfs)
}
/* Map the M64 here */
- if (pdn->m64_per_iov == M64_PER_IOV) {
- pe_num = pdn->offset + j;
+ if (pdn->m64_single_mode) {
+ pe_num = pdn->pe_num_map[j];
rc = opal_pci_map_pe_mmio_window(phb->opal_id,
pe_num, OPAL_M64_WINDOW_TYPE,
- pdn->m64_wins[i][j], 0);
+ pdn->m64_map[j][i], 0);
}
rc = opal_pci_set_phb_mem_window(phb->opal_id,
OPAL_M64_WINDOW_TYPE,
- pdn->m64_wins[i][j],
+ pdn->m64_map[j][i],
start,
0, /* unused */
size);
@@ -1309,12 +1305,12 @@ static int pnv_pci_vf_assign_m64(struct pci_dev *pdev, u16 num_vfs)
goto m64_failed;
}
- if (pdn->m64_per_iov == M64_PER_IOV)
+ if (pdn->m64_single_mode)
rc = opal_pci_phb_mmio_enable(phb->opal_id,
- OPAL_M64_WINDOW_TYPE, pdn->m64_wins[i][j], 2);
+ OPAL_M64_WINDOW_TYPE, pdn->m64_map[j][i], 2);
else
rc = opal_pci_phb_mmio_enable(phb->opal_id,
- OPAL_M64_WINDOW_TYPE, pdn->m64_wins[i][j], 1);
+ OPAL_M64_WINDOW_TYPE, pdn->m64_map[j][i], 1);
if (rc != OPAL_SUCCESS) {
dev_err(&pdev->dev, "Failed to enable M64 window #%d: %llx\n",
@@ -1326,7 +1322,7 @@ static int pnv_pci_vf_assign_m64(struct pci_dev *pdev, u16 num_vfs)
return 0;
m64_failed:
- pnv_pci_vf_release_m64(pdev);
+ pnv_pci_vf_release_m64(pdev, num_vfs);
return -EBUSY;
}
@@ -1353,15 +1349,13 @@ static void pnv_pci_ioda2_release_dma_pe(struct pci_dev *dev, struct pnv_ioda_pe
iommu_free_table(tbl, of_node_full_name(dev->dev.of_node));
}
-static void pnv_ioda_release_vf_PE(struct pci_dev *pdev, u16 num_vfs)
+static void pnv_ioda_release_vf_PE(struct pci_dev *pdev)
{
struct pci_bus *bus;
struct pci_controller *hose;
struct pnv_phb *phb;
struct pnv_ioda_pe *pe, *pe_n;
struct pci_dn *pdn;
- u16 vf_index;
- int64_t rc;
bus = pdev->bus;
hose = pci_bus_to_host(bus);
@@ -1371,35 +1365,6 @@ static void pnv_ioda_release_vf_PE(struct pci_dev *pdev, u16 num_vfs)
if (!pdev->is_physfn)
return;
- if (pdn->m64_per_iov == M64_PER_IOV && num_vfs > M64_PER_IOV) {
- int vf_group;
- int vf_per_group;
- int vf_index1;
-
- vf_per_group = roundup_pow_of_two(num_vfs) / pdn->m64_per_iov;
-
- for (vf_group = 0; vf_group < M64_PER_IOV; vf_group++)
- for (vf_index = vf_group * vf_per_group;
- vf_index < (vf_group + 1) * vf_per_group &&
- vf_index < num_vfs;
- vf_index++)
- for (vf_index1 = vf_group * vf_per_group;
- vf_index1 < (vf_group + 1) * vf_per_group &&
- vf_index1 < num_vfs;
- vf_index1++){
-
- rc = opal_pci_set_peltv(phb->opal_id,
- pdn->offset + vf_index,
- pdn->offset + vf_index1,
- OPAL_REMOVE_PE_FROM_DOMAIN);
-
- if (rc)
- dev_warn(&pdev->dev, "%s: Failed to unlink same group PE#%d(%lld)\n",
- __func__,
- pdn->offset + vf_index1, rc);
- }
- }
-
list_for_each_entry_safe(pe, pe_n, &phb->ioda.pe_list, list) {
if (pe->parent_dev != pdev)
continue;
@@ -1424,7 +1389,7 @@ void pnv_pci_sriov_disable(struct pci_dev *pdev)
struct pnv_phb *phb;
struct pci_dn *pdn;
struct pci_sriov *iov;
- u16 num_vfs;
+ u16 num_vfs, i;
bus = pdev->bus;
hose = pci_bus_to_host(bus);
@@ -1434,18 +1399,25 @@ void pnv_pci_sriov_disable(struct pci_dev *pdev)
num_vfs = pdn->num_vfs;
/* Release VF PEs */
- pnv_ioda_release_vf_PE(pdev, num_vfs);
+ pnv_ioda_release_vf_PE(pdev);
if (phb->type == PNV_PHB_IODA2) {
- if (pdn->m64_per_iov == 1)
- pnv_pci_vf_resource_shift(pdev, -pdn->offset);
+ if (!pdn->m64_single_mode)
+ pnv_pci_vf_resource_shift(pdev, -*pdn->pe_num_map);
/* Release M64 windows */
- pnv_pci_vf_release_m64(pdev);
+ pnv_pci_vf_release_m64(pdev, num_vfs);
/* Release PE numbers */
- bitmap_clear(phb->ioda.pe_alloc, pdn->offset, num_vfs);
- pdn->offset = 0;
+ if (pdn->m64_single_mode) {
+ for (i = 0; i < num_vfs; i++) {
+ if (pdn->pe_num_map[i] != IODA_INVALID_PE)
+ pnv_ioda_free_pe(phb, pdn->pe_num_map[i]);
+ }
+ } else
+ bitmap_clear(phb->ioda.pe_alloc, *pdn->pe_num_map, num_vfs);
+ /* Releasing pe_num_map */
+ kfree(pdn->pe_num_map);
}
}
@@ -1460,7 +1432,6 @@ static void pnv_ioda_setup_vf_PE(struct pci_dev *pdev, u16 num_vfs)
int pe_num;
u16 vf_index;
struct pci_dn *pdn;
- int64_t rc;
bus = pdev->bus;
hose = pci_bus_to_host(bus);
@@ -1472,7 +1443,10 @@ static void pnv_ioda_setup_vf_PE(struct pci_dev *pdev, u16 num_vfs)
/* Reserve PE for each VF */
for (vf_index = 0; vf_index < num_vfs; vf_index++) {
- pe_num = pdn->offset + vf_index;
+ if (pdn->m64_single_mode)
+ pe_num = pdn->pe_num_map[vf_index];
+ else
+ pe_num = *pdn->pe_num_map + vf_index;
pe = &phb->ioda.pe_array[pe_num];
pe->pe_number = pe_num;
@@ -1505,37 +1479,6 @@ static void pnv_ioda_setup_vf_PE(struct pci_dev *pdev, u16 num_vfs)
pnv_pci_ioda2_setup_dma_pe(phb, pe);
}
-
- if (pdn->m64_per_iov == M64_PER_IOV && num_vfs > M64_PER_IOV) {
- int vf_group;
- int vf_per_group;
- int vf_index1;
-
- vf_per_group = roundup_pow_of_two(num_vfs) / pdn->m64_per_iov;
-
- for (vf_group = 0; vf_group < M64_PER_IOV; vf_group++) {
- for (vf_index = vf_group * vf_per_group;
- vf_index < (vf_group + 1) * vf_per_group &&
- vf_index < num_vfs;
- vf_index++) {
- for (vf_index1 = vf_group * vf_per_group;
- vf_index1 < (vf_group + 1) * vf_per_group &&
- vf_index1 < num_vfs;
- vf_index1++) {
-
- rc = opal_pci_set_peltv(phb->opal_id,
- pdn->offset + vf_index,
- pdn->offset + vf_index1,
- OPAL_ADD_PE_TO_DOMAIN);
-
- if (rc)
- dev_warn(&pdev->dev, "%s: Failed to link same group PE#%d(%lld)\n",
- __func__,
- pdn->offset + vf_index1, rc);
- }
- }
- }
- }
}
int pnv_pci_sriov_enable(struct pci_dev *pdev, u16 num_vfs)
@@ -1545,6 +1488,7 @@ int pnv_pci_sriov_enable(struct pci_dev *pdev, u16 num_vfs)
struct pnv_phb *phb;
struct pci_dn *pdn;
int ret;
+ u16 i;
bus = pdev->bus;
hose = pci_bus_to_host(bus);
@@ -1552,20 +1496,59 @@ int pnv_pci_sriov_enable(struct pci_dev *pdev, u16 num_vfs)
pdn = pci_get_pdn(pdev);
if (phb->type == PNV_PHB_IODA2) {
+ if (!pdn->vfs_expanded) {
+ dev_info(&pdev->dev, "don't support this SRIOV device"
+ " with non 64bit-prefetchable IOV BAR\n");
+ return -ENOSPC;
+ }
+
+ /*
+ * When M64 BARs functions in Single PE mode, the number of VFs
+ * could be enabled must be less than the number of M64 BARs.
+ */
+ if (pdn->m64_single_mode && num_vfs > phb->ioda.m64_bar_idx) {
+ dev_info(&pdev->dev, "Not enough M64 BAR for VFs\n");
+ return -EBUSY;
+ }
+
+ /* Allocating pe_num_map */
+ if (pdn->m64_single_mode)
+ pdn->pe_num_map = kmalloc(sizeof(*pdn->pe_num_map) * num_vfs,
+ GFP_KERNEL);
+ else
+ pdn->pe_num_map = kmalloc(sizeof(*pdn->pe_num_map), GFP_KERNEL);
+
+ if (!pdn->pe_num_map)
+ return -ENOMEM;
+
+ if (pdn->m64_single_mode)
+ for (i = 0; i < num_vfs; i++)
+ pdn->pe_num_map[i] = IODA_INVALID_PE;
+
/* Calculate available PE for required VFs */
- mutex_lock(&phb->ioda.pe_alloc_mutex);
- pdn->offset = bitmap_find_next_zero_area(
- phb->ioda.pe_alloc, phb->ioda.total_pe,
- 0, num_vfs, 0);
- if (pdn->offset >= phb->ioda.total_pe) {
+ if (pdn->m64_single_mode) {
+ for (i = 0; i < num_vfs; i++) {
+ pdn->pe_num_map[i] = pnv_ioda_alloc_pe(phb);
+ if (pdn->pe_num_map[i] == IODA_INVALID_PE) {
+ ret = -EBUSY;
+ goto m64_failed;
+ }
+ }
+ } else {
+ mutex_lock(&phb->ioda.pe_alloc_mutex);
+ *pdn->pe_num_map = bitmap_find_next_zero_area(
+ phb->ioda.pe_alloc, phb->ioda.total_pe,
+ 0, num_vfs, 0);
+ if (*pdn->pe_num_map >= phb->ioda.total_pe) {
+ mutex_unlock(&phb->ioda.pe_alloc_mutex);
+ dev_info(&pdev->dev, "Failed to enable VF%d\n", num_vfs);
+ kfree(pdn->pe_num_map);
+ return -EBUSY;
+ }
+ bitmap_set(phb->ioda.pe_alloc, *pdn->pe_num_map, num_vfs);
mutex_unlock(&phb->ioda.pe_alloc_mutex);
- dev_info(&pdev->dev, "Failed to enable VF%d\n", num_vfs);
- pdn->offset = 0;
- return -EBUSY;
}
- bitmap_set(phb->ioda.pe_alloc, pdn->offset, num_vfs);
pdn->num_vfs = num_vfs;
- mutex_unlock(&phb->ioda.pe_alloc_mutex);
/* Assign M64 window accordingly */
ret = pnv_pci_vf_assign_m64(pdev, num_vfs);
@@ -1579,8 +1562,8 @@ int pnv_pci_sriov_enable(struct pci_dev *pdev, u16 num_vfs)
* the IOV BAR according to the PE# allocated to the VFs.
* Otherwise, the PE# for the VF will conflict with others.
*/
- if (pdn->m64_per_iov == 1) {
- ret = pnv_pci_vf_resource_shift(pdev, pdn->offset);
+ if (!pdn->m64_single_mode) {
+ ret = pnv_pci_vf_resource_shift(pdev, *pdn->pe_num_map);
if (ret)
goto m64_failed;
}
@@ -1592,8 +1575,16 @@ int pnv_pci_sriov_enable(struct pci_dev *pdev, u16 num_vfs)
return 0;
m64_failed:
- bitmap_clear(phb->ioda.pe_alloc, pdn->offset, num_vfs);
- pdn->offset = 0;
+ if (pdn->m64_single_mode) {
+ for (i = 0; i < num_vfs; i++) {
+ if (pdn->pe_num_map[i] != IODA_INVALID_PE)
+ pnv_ioda_free_pe(phb, pdn->pe_num_map[i]);
+ }
+ } else
+ bitmap_clear(phb->ioda.pe_alloc, *pdn->pe_num_map, num_vfs);
+
+ /* Releasing pe_num_map */
+ kfree(pdn->pe_num_map);
return ret;
}
@@ -1612,8 +1603,7 @@ int pcibios_sriov_enable(struct pci_dev *pdev, u16 num_vfs)
/* Allocate PCI data */
add_dev_pci_data(pdev);
- pnv_pci_sriov_enable(pdev, num_vfs);
- return 0;
+ return pnv_pci_sriov_enable(pdev, num_vfs);
}
#endif /* CONFIG_PCI_IOV */
@@ -2851,45 +2841,58 @@ static void pnv_pci_init_ioda_msis(struct pnv_phb *phb) { }
#ifdef CONFIG_PCI_IOV
static void pnv_pci_ioda_fixup_iov_resources(struct pci_dev *pdev)
{
- struct pci_controller *hose;
- struct pnv_phb *phb;
+ struct pci_controller *hose = pci_bus_to_host(pdev->bus);
+ struct pnv_phb *phb = hose->private_data;
+ const resource_size_t gate = phb->ioda.m64_segsize >> 2;
struct resource *res;
int i;
- resource_size_t size;
+ resource_size_t size, total_vf_bar_sz;
struct pci_dn *pdn;
int mul, total_vfs;
if (!pdev->is_physfn || pdev->is_added)
return;
- hose = pci_bus_to_host(pdev->bus);
- phb = hose->private_data;
-
pdn = pci_get_pdn(pdev);
pdn->vfs_expanded = 0;
+ pdn->m64_single_mode = false;
total_vfs = pci_sriov_get_totalvfs(pdev);
- pdn->m64_per_iov = 1;
mul = phb->ioda.total_pe;
+ total_vf_bar_sz = 0;
for (i = 0; i < PCI_SRIOV_NUM_BARS; i++) {
res = &pdev->resource[i + PCI_IOV_RESOURCES];
if (!res->flags || res->parent)
continue;
if (!pnv_pci_is_mem_pref_64(res->flags)) {
- dev_warn(&pdev->dev, " non M64 VF BAR%d: %pR\n",
+ dev_warn(&pdev->dev, "Don't support SR-IOV with"
+ " non M64 VF BAR%d: %pR. \n",
i, res);
- continue;
+ goto truncate_iov;
}
- size = pci_iov_resource_size(pdev, i + PCI_IOV_RESOURCES);
+ total_vf_bar_sz += pci_iov_resource_size(pdev,
+ i + PCI_IOV_RESOURCES);
- /* bigger than 64M */
- if (size > (1 << 26)) {
- dev_info(&pdev->dev, "PowerNV: VF BAR%d: %pR IOV size is bigger than 64M, roundup power2\n",
- i, res);
- pdn->m64_per_iov = M64_PER_IOV;
+ /*
+ * If bigger than quarter of M64 segment size, just round up
+ * power of two.
+ *
+ * Generally, one M64 BAR maps one IOV BAR. To avoid conflict
+ * with other devices, IOV BAR size is expanded to be
+ * (total_pe * VF_BAR_size). When VF_BAR_size is half of M64
+ * segment size , the expanded size would equal to half of the
+ * whole M64 space size, which will exhaust the M64 Space and
+ * limit the system flexibility. This is a design decision to
+ * set the boundary to quarter of the M64 segment size.
+ */
+ if (total_vf_bar_sz > gate) {
mul = roundup_pow_of_two(total_vfs);
+ dev_info(&pdev->dev,
+ "VF BAR Total IOV size %llx > %llx, roundup to %d VFs\n",
+ total_vf_bar_sz, gate, mul);
+ pdn->m64_single_mode = true;
break;
}
}
@@ -2898,20 +2901,31 @@ static void pnv_pci_ioda_fixup_iov_resources(struct pci_dev *pdev)
res = &pdev->resource[i + PCI_IOV_RESOURCES];
if (!res->flags || res->parent)
continue;
- if (!pnv_pci_is_mem_pref_64(res->flags)) {
- dev_warn(&pdev->dev, "Skipping expanding VF BAR%d: %pR\n",
- i, res);
- continue;
- }
- dev_dbg(&pdev->dev, " Fixing VF BAR%d: %pR to\n", i, res);
size = pci_iov_resource_size(pdev, i + PCI_IOV_RESOURCES);
+ /*
+ * On PHB3, the minimum size alignment of M64 BAR in single
+ * mode is 32MB.
+ */
+ if (pdn->m64_single_mode && (size < SZ_32M))
+ goto truncate_iov;
+ dev_dbg(&pdev->dev, " Fixing VF BAR%d: %pR to\n", i, res);
res->end = res->start + size * mul - 1;
dev_dbg(&pdev->dev, " %pR\n", res);
dev_info(&pdev->dev, "VF BAR%d: %pR (expanded to %d VFs for PE alignment)",
i, res, mul);
}
pdn->vfs_expanded = mul;
+
+ return;
+
+truncate_iov:
+ /* To save MMIO space, IOV BAR is truncated. */
+ for (i = 0; i < PCI_SRIOV_NUM_BARS; i++) {
+ res = &pdev->resource[i + PCI_IOV_RESOURCES];
+ res->flags = 0;
+ res->end = res->start - 1;
+ }
}
#endif /* CONFIG_PCI_IOV */
@@ -3125,18 +3139,35 @@ static resource_size_t pnv_pci_window_alignment(struct pci_bus *bus,
static resource_size_t pnv_pci_iov_resource_alignment(struct pci_dev *pdev,
int resno)
{
+ struct pci_controller *hose = pci_bus_to_host(pdev->bus);
+ struct pnv_phb *phb = hose->private_data;
struct pci_dn *pdn = pci_get_pdn(pdev);
- resource_size_t align, iov_align;
-
- iov_align = resource_size(&pdev->resource[resno]);
- if (iov_align)
- return iov_align;
+ resource_size_t align;
+ /*
+ * On PowerNV platform, IOV BAR is mapped by M64 BAR to enable the
+ * SR-IOV. While from hardware perspective, the range mapped by M64
+ * BAR should be size aligned.
+ *
+ * When IOV BAR is mapped with M64 BAR in Single PE mode, the extra
+ * powernv-specific hardware restriction is gone. But if just use the
+ * VF BAR size as the alignment, PF BAR / VF BAR may be allocated with
+ * in one segment of M64 #15, which introduces the PE conflict between
+ * PF and VF. Based on this, the minimum alignment of an IOV BAR is
+ * m64_segsize.
+ *
+ * This function returns the total IOV BAR size if M64 BAR is in
+ * Shared PE mode or just VF BAR size if not.
+ * If the M64 BAR is in Single PE mode, return the VF BAR size or
+ * M64 segment size if IOV BAR size is less.
+ */
align = pci_iov_resource_size(pdev, resno);
- if (pdn->vfs_expanded)
- return pdn->vfs_expanded * align;
+ if (!pdn->vfs_expanded)
+ return align;
+ if (pdn->m64_single_mode)
+ return max(align, (resource_size_t)phb->ioda.m64_segsize);
- return align;
+ return pdn->vfs_expanded * align;
}
#endif /* CONFIG_PCI_IOV */
diff --git a/arch/powerpc/platforms/powernv/pci-p5ioc2.c b/arch/powerpc/platforms/powernv/pci-p5ioc2.c
deleted file mode 100644
index f2bdfea3b..000000000
--- a/arch/powerpc/platforms/powernv/pci-p5ioc2.c
+++ /dev/null
@@ -1,271 +0,0 @@
-/*
- * Support PCI/PCIe on PowerNV platforms
- *
- * Currently supports only P5IOC2
- *
- * Copyright 2011 Benjamin Herrenschmidt, IBM Corp.
- *
- * This program is free software; you can redistribute it and/or
- * modify it under the terms of the GNU General Public License
- * as published by the Free Software Foundation; either version
- * 2 of the License, or (at your option) any later version.
- */
-
-#include <linux/kernel.h>
-#include <linux/pci.h>
-#include <linux/delay.h>
-#include <linux/string.h>
-#include <linux/init.h>
-#include <linux/bootmem.h>
-#include <linux/irq.h>
-#include <linux/io.h>
-#include <linux/msi.h>
-
-#include <asm/sections.h>
-#include <asm/io.h>
-#include <asm/prom.h>
-#include <asm/pci-bridge.h>
-#include <asm/machdep.h>
-#include <asm/msi_bitmap.h>
-#include <asm/ppc-pci.h>
-#include <asm/opal.h>
-#include <asm/iommu.h>
-#include <asm/tce.h>
-
-#include "powernv.h"
-#include "pci.h"
-
-/* For now, use a fixed amount of TCE memory for each p5ioc2
- * hub, 16M will do
- */
-#define P5IOC2_TCE_MEMORY 0x01000000
-
-#ifdef CONFIG_PCI_MSI
-static int pnv_pci_p5ioc2_msi_setup(struct pnv_phb *phb, struct pci_dev *dev,
- unsigned int hwirq, unsigned int virq,
- unsigned int is_64, struct msi_msg *msg)
-{
- if (WARN_ON(!is_64))
- return -ENXIO;
- msg->data = hwirq - phb->msi_base;
- msg->address_hi = 0x10000000;
- msg->address_lo = 0;
-
- return 0;
-}
-
-static void pnv_pci_init_p5ioc2_msis(struct pnv_phb *phb)
-{
- unsigned int count;
- const __be32 *prop = of_get_property(phb->hose->dn,
- "ibm,opal-msi-ranges", NULL);
- if (!prop)
- return;
-
- /* Don't do MSI's on p5ioc2 PCI-X are they are not properly
- * verified in HW
- */
- if (of_device_is_compatible(phb->hose->dn, "ibm,p5ioc2-pcix"))
- return;
- phb->msi_base = be32_to_cpup(prop);
- count = be32_to_cpup(prop + 1);
- if (msi_bitmap_alloc(&phb->msi_bmp, count, phb->hose->dn)) {
- pr_err("PCI %d: Failed to allocate MSI bitmap !\n",
- phb->hose->global_number);
- return;
- }
- phb->msi_setup = pnv_pci_p5ioc2_msi_setup;
- phb->msi32_support = 0;
- pr_info(" Allocated bitmap for %d MSIs (base IRQ 0x%x)\n",
- count, phb->msi_base);
-}
-#else
-static void pnv_pci_init_p5ioc2_msis(struct pnv_phb *phb) { }
-#endif /* CONFIG_PCI_MSI */
-
-static struct iommu_table_ops pnv_p5ioc2_iommu_ops = {
- .set = pnv_tce_build,
-#ifdef CONFIG_IOMMU_API
- .exchange = pnv_tce_xchg,
-#endif
- .clear = pnv_tce_free,
- .get = pnv_tce_get,
-};
-
-static void pnv_pci_p5ioc2_dma_dev_setup(struct pnv_phb *phb,
- struct pci_dev *pdev)
-{
- struct iommu_table *tbl = phb->p5ioc2.table_group.tables[0];
-
- if (!tbl->it_map) {
- tbl->it_ops = &pnv_p5ioc2_iommu_ops;
- iommu_init_table(tbl, phb->hose->node);
- iommu_register_group(&phb->p5ioc2.table_group,
- pci_domain_nr(phb->hose->bus), phb->opal_id);
- INIT_LIST_HEAD_RCU(&tbl->it_group_list);
- pnv_pci_link_table_and_group(phb->hose->node, 0,
- tbl, &phb->p5ioc2.table_group);
- }
-
- set_iommu_table_base(&pdev->dev, tbl);
- iommu_add_device(&pdev->dev);
-}
-
-static const struct pci_controller_ops pnv_pci_p5ioc2_controller_ops = {
- .dma_dev_setup = pnv_pci_dma_dev_setup,
-#ifdef CONFIG_PCI_MSI
- .setup_msi_irqs = pnv_setup_msi_irqs,
- .teardown_msi_irqs = pnv_teardown_msi_irqs,
-#endif
-};
-
-static void __init pnv_pci_init_p5ioc2_phb(struct device_node *np, u64 hub_id,
- void *tce_mem, u64 tce_size)
-{
- struct pnv_phb *phb;
- const __be64 *prop64;
- u64 phb_id;
- int64_t rc;
- static int primary = 1;
- struct iommu_table_group *table_group;
- struct iommu_table *tbl;
-
- pr_info(" Initializing p5ioc2 PHB %s\n", np->full_name);
-
- prop64 = of_get_property(np, "ibm,opal-phbid", NULL);
- if (!prop64) {
- pr_err(" Missing \"ibm,opal-phbid\" property !\n");
- return;
- }
- phb_id = be64_to_cpup(prop64);
- pr_devel(" PHB-ID : 0x%016llx\n", phb_id);
- pr_devel(" TCE AT : 0x%016lx\n", __pa(tce_mem));
- pr_devel(" TCE SZ : 0x%016llx\n", tce_size);
-
- rc = opal_pci_set_phb_tce_memory(phb_id, __pa(tce_mem), tce_size);
- if (rc != OPAL_SUCCESS) {
- pr_err(" Failed to set TCE memory, OPAL error %lld\n", rc);
- return;
- }
-
- phb = memblock_virt_alloc(sizeof(struct pnv_phb), 0);
- phb->hose = pcibios_alloc_controller(np);
- if (!phb->hose) {
- pr_err(" Failed to allocate PCI controller\n");
- return;
- }
-
- spin_lock_init(&phb->lock);
- phb->hose->first_busno = 0;
- phb->hose->last_busno = 0xff;
- phb->hose->private_data = phb;
- phb->hose->controller_ops = pnv_pci_p5ioc2_controller_ops;
- phb->hub_id = hub_id;
- phb->opal_id = phb_id;
- phb->type = PNV_PHB_P5IOC2;
- phb->model = PNV_PHB_MODEL_P5IOC2;
-
- phb->regs = of_iomap(np, 0);
-
- if (phb->regs == NULL)
- pr_err(" Failed to map registers !\n");
- else {
- pr_devel(" P_BUID = 0x%08x\n", in_be32(phb->regs + 0x100));
- pr_devel(" P_IOSZ = 0x%08x\n", in_be32(phb->regs + 0x1b0));
- pr_devel(" P_IO_ST = 0x%08x\n", in_be32(phb->regs + 0x1e0));
- pr_devel(" P_MEM1_H = 0x%08x\n", in_be32(phb->regs + 0x1a0));
- pr_devel(" P_MEM1_L = 0x%08x\n", in_be32(phb->regs + 0x190));
- pr_devel(" P_MSZ1_L = 0x%08x\n", in_be32(phb->regs + 0x1c0));
- pr_devel(" P_MEM_ST = 0x%08x\n", in_be32(phb->regs + 0x1d0));
- pr_devel(" P_MEM2_H = 0x%08x\n", in_be32(phb->regs + 0x2c0));
- pr_devel(" P_MEM2_L = 0x%08x\n", in_be32(phb->regs + 0x2b0));
- pr_devel(" P_MSZ2_H = 0x%08x\n", in_be32(phb->regs + 0x2d0));
- pr_devel(" P_MSZ2_L = 0x%08x\n", in_be32(phb->regs + 0x2e0));
- }
-
- /* Interpret the "ranges" property */
- /* This also maps the I/O region and sets isa_io/mem_base */
- pci_process_bridge_OF_ranges(phb->hose, np, primary);
- primary = 0;
-
- phb->hose->ops = &pnv_pci_ops;
-
- /* Setup MSI support */
- pnv_pci_init_p5ioc2_msis(phb);
-
- /* Setup TCEs */
- phb->dma_dev_setup = pnv_pci_p5ioc2_dma_dev_setup;
- pnv_pci_setup_iommu_table(&phb->p5ioc2.iommu_table,
- tce_mem, tce_size, 0,
- IOMMU_PAGE_SHIFT_4K);
- /*
- * We do not allocate iommu_table as we do not support
- * hotplug or SRIOV on P5IOC2 and therefore iommu_free_table()
- * should not be called for phb->p5ioc2.table_group.tables[0] ever.
- */
- tbl = phb->p5ioc2.table_group.tables[0] = &phb->p5ioc2.iommu_table;
- table_group = &phb->p5ioc2.table_group;
- table_group->tce32_start = tbl->it_offset << tbl->it_page_shift;
- table_group->tce32_size = tbl->it_size << tbl->it_page_shift;
-}
-
-void __init pnv_pci_init_p5ioc2_hub(struct device_node *np)
-{
- struct device_node *phbn;
- const __be64 *prop64;
- u64 hub_id;
- void *tce_mem;
- uint64_t tce_per_phb;
- int64_t rc;
- int phb_count = 0;
-
- pr_info("Probing p5ioc2 IO-Hub %s\n", np->full_name);
-
- prop64 = of_get_property(np, "ibm,opal-hubid", NULL);
- if (!prop64) {
- pr_err(" Missing \"ibm,opal-hubid\" property !\n");
- return;
- }
- hub_id = be64_to_cpup(prop64);
- pr_info(" HUB-ID : 0x%016llx\n", hub_id);
-
- /* Count child PHBs and calculate TCE space per PHB */
- for_each_child_of_node(np, phbn) {
- if (of_device_is_compatible(phbn, "ibm,p5ioc2-pcix") ||
- of_device_is_compatible(phbn, "ibm,p5ioc2-pciex"))
- phb_count++;
- }
-
- if (phb_count <= 0) {
- pr_info(" No PHBs for Hub %s\n", np->full_name);
- return;
- }
-
- tce_per_phb = __rounddown_pow_of_two(P5IOC2_TCE_MEMORY / phb_count);
- pr_info(" Allocating %lld MB of TCE memory per PHB\n",
- tce_per_phb >> 20);
-
- /* Currently allocate 16M of TCE memory for every Hub
- *
- * XXX TODO: Make it chip local if possible
- */
- tce_mem = memblock_virt_alloc(P5IOC2_TCE_MEMORY, P5IOC2_TCE_MEMORY);
- pr_debug(" TCE : 0x%016lx..0x%016lx\n",
- __pa(tce_mem), __pa(tce_mem) + P5IOC2_TCE_MEMORY - 1);
- rc = opal_pci_set_hub_tce_memory(hub_id, __pa(tce_mem),
- P5IOC2_TCE_MEMORY);
- if (rc != OPAL_SUCCESS) {
- pr_err(" Failed to allocate TCE memory, OPAL error %lld\n", rc);
- return;
- }
-
- /* Initialize PHBs */
- for_each_child_of_node(np, phbn) {
- if (of_device_is_compatible(phbn, "ibm,p5ioc2-pcix") ||
- of_device_is_compatible(phbn, "ibm,p5ioc2-pciex")) {
- pnv_pci_init_p5ioc2_phb(phbn, hub_id,
- tce_mem, tce_per_phb);
- tce_mem += tce_per_phb;
- }
- }
-}
diff --git a/arch/powerpc/platforms/powernv/pci.c b/arch/powerpc/platforms/powernv/pci.c
index b1ef84a6c..73c8dc2a3 100644
--- a/arch/powerpc/platforms/powernv/pci.c
+++ b/arch/powerpc/platforms/powernv/pci.c
@@ -380,10 +380,7 @@ static void pnv_pci_config_check_eeh(struct pci_dn *pdn)
*/
pe_no = pdn->pe_number;
if (pe_no == IODA_INVALID_PE) {
- if (phb->type == PNV_PHB_P5IOC2)
- pe_no = 0;
- else
- pe_no = phb->ioda.reserved_pe;
+ pe_no = phb->ioda.reserved_pe;
}
/*
@@ -805,7 +802,6 @@ DECLARE_PCI_FIXUP_EARLY(PCI_VENDOR_ID_IBM, 0x3b9, pnv_p7ioc_rc_quirk);
void __init pnv_pci_init(void)
{
struct device_node *np;
- bool found_ioda = false;
pci_add_flags(PCI_CAN_SKIP_ISA_ALIGN);
@@ -813,20 +809,11 @@ void __init pnv_pci_init(void)
if (!firmware_has_feature(FW_FEATURE_OPAL))
return;
- /* Look for IODA IO-Hubs. We don't support mixing IODA
- * and p5ioc2 due to the need to change some global
- * probing flags
- */
+ /* Look for IODA IO-Hubs. */
for_each_compatible_node(np, NULL, "ibm,ioda-hub") {
pnv_pci_init_ioda_hub(np);
- found_ioda = true;
}
- /* Look for p5ioc2 IO-Hubs */
- if (!found_ioda)
- for_each_compatible_node(np, NULL, "ibm,p5ioc2")
- pnv_pci_init_p5ioc2_hub(np);
-
/* Look for ioda2 built-in PHB3's */
for_each_compatible_node(np, NULL, "ibm,ioda2-phb")
pnv_pci_init_ioda2_phb(np);
diff --git a/arch/powerpc/platforms/powernv/pci.h b/arch/powerpc/platforms/powernv/pci.h
index 00691a9b9..3f814f382 100644
--- a/arch/powerpc/platforms/powernv/pci.h
+++ b/arch/powerpc/platforms/powernv/pci.h
@@ -4,16 +4,14 @@
struct pci_dn;
enum pnv_phb_type {
- PNV_PHB_P5IOC2 = 0,
- PNV_PHB_IODA1 = 1,
- PNV_PHB_IODA2 = 2,
- PNV_PHB_NPU = 3,
+ PNV_PHB_IODA1 = 0,
+ PNV_PHB_IODA2 = 1,
+ PNV_PHB_NPU = 2,
};
/* Precise PHB model for error management */
enum pnv_phb_model {
PNV_PHB_MODEL_UNKNOWN,
- PNV_PHB_MODEL_P5IOC2,
PNV_PHB_MODEL_P7IOC,
PNV_PHB_MODEL_PHB3,
PNV_PHB_MODEL_NPU,
@@ -121,81 +119,74 @@ struct pnv_phb {
void (*freeze_pe)(struct pnv_phb *phb, int pe_no);
int (*unfreeze_pe)(struct pnv_phb *phb, int pe_no, int opt);
- union {
- struct {
- struct iommu_table iommu_table;
- struct iommu_table_group table_group;
- } p5ioc2;
-
- struct {
- /* Global bridge info */
- unsigned int total_pe;
- unsigned int reserved_pe;
-
- /* 32-bit MMIO window */
- unsigned int m32_size;
- unsigned int m32_segsize;
- unsigned int m32_pci_base;
-
- /* 64-bit MMIO window */
- unsigned int m64_bar_idx;
- unsigned long m64_size;
- unsigned long m64_segsize;
- unsigned long m64_base;
- unsigned long m64_bar_alloc;
-
- /* IO ports */
- unsigned int io_size;
- unsigned int io_segsize;
- unsigned int io_pci_base;
-
- /* PE allocation bitmap */
- unsigned long *pe_alloc;
- /* PE allocation mutex */
- struct mutex pe_alloc_mutex;
-
- /* M32 & IO segment maps */
- unsigned int *m32_segmap;
- unsigned int *io_segmap;
- struct pnv_ioda_pe *pe_array;
-
- /* IRQ chip */
- int irq_chip_init;
- struct irq_chip irq_chip;
-
- /* Sorted list of used PE's based
- * on the sequence of creation
- */
- struct list_head pe_list;
- struct mutex pe_list_mutex;
-
- /* Reverse map of PEs, will have to extend if
- * we are to support more than 256 PEs, indexed
- * bus { bus, devfn }
- */
- unsigned char pe_rmap[0x10000];
-
- /* 32-bit TCE tables allocation */
- unsigned long tce32_count;
-
- /* Total "weight" for the sake of DMA resources
- * allocation
- */
- unsigned int dma_weight;
- unsigned int dma_pe_count;
-
- /* Sorted list of used PE's, sorted at
- * boot for resource allocation purposes
- */
- struct list_head pe_dma_list;
-
- /* TCE cache invalidate registers (physical and
- * remapped)
- */
- phys_addr_t tce_inval_reg_phys;
- __be64 __iomem *tce_inval_reg;
- } ioda;
- };
+ struct {
+ /* Global bridge info */
+ unsigned int total_pe;
+ unsigned int reserved_pe;
+
+ /* 32-bit MMIO window */
+ unsigned int m32_size;
+ unsigned int m32_segsize;
+ unsigned int m32_pci_base;
+
+ /* 64-bit MMIO window */
+ unsigned int m64_bar_idx;
+ unsigned long m64_size;
+ unsigned long m64_segsize;
+ unsigned long m64_base;
+ unsigned long m64_bar_alloc;
+
+ /* IO ports */
+ unsigned int io_size;
+ unsigned int io_segsize;
+ unsigned int io_pci_base;
+
+ /* PE allocation bitmap */
+ unsigned long *pe_alloc;
+ /* PE allocation mutex */
+ struct mutex pe_alloc_mutex;
+
+ /* M32 & IO segment maps */
+ unsigned int *m32_segmap;
+ unsigned int *io_segmap;
+ struct pnv_ioda_pe *pe_array;
+
+ /* IRQ chip */
+ int irq_chip_init;
+ struct irq_chip irq_chip;
+
+ /* Sorted list of used PE's based
+ * on the sequence of creation
+ */
+ struct list_head pe_list;
+ struct mutex pe_list_mutex;
+
+ /* Reverse map of PEs, will have to extend if
+ * we are to support more than 256 PEs, indexed
+ * bus { bus, devfn }
+ */
+ unsigned char pe_rmap[0x10000];
+
+ /* 32-bit TCE tables allocation */
+ unsigned long tce32_count;
+
+ /* Total "weight" for the sake of DMA resources
+ * allocation
+ */
+ unsigned int dma_weight;
+ unsigned int dma_pe_count;
+
+ /* Sorted list of used PE's, sorted at
+ * boot for resource allocation purposes
+ */
+ struct list_head pe_dma_list;
+
+ /* TCE cache invalidate registers (physical and
+ * remapped)
+ */
+ phys_addr_t tce_inval_reg_phys;
+ __be64 __iomem *tce_inval_reg;
+ } ioda;
/* PHB and hub status structure */
union {
@@ -232,7 +223,6 @@ extern void pnv_pci_unlink_table_and_group(struct iommu_table *tbl,
extern void pnv_pci_setup_iommu_table(struct iommu_table *tbl,
void *tce_mem, u64 tce_size,
u64 dma_offset, unsigned page_shift);
-extern void pnv_pci_init_p5ioc2_hub(struct device_node *np);
extern void pnv_pci_init_ioda_hub(struct device_node *np);
extern void pnv_pci_init_ioda2_phb(struct device_node *np);
extern void pnv_pci_init_npu_phb(struct device_node *np);
diff --git a/arch/powerpc/platforms/powernv/subcore.c b/arch/powerpc/platforms/powernv/subcore.c
index 503a73f59..0babef111 100644
--- a/arch/powerpc/platforms/powernv/subcore.c
+++ b/arch/powerpc/platforms/powernv/subcore.c
@@ -407,7 +407,7 @@ static DEVICE_ATTR(subcores_per_core, 0644,
static int subcore_init(void)
{
- if (!cpu_has_feature(CPU_FTR_ARCH_207S))
+ if (!cpu_has_feature(CPU_FTR_SUBCORE))
return 0;
/*