From d0b2f91bede3bd5e3d24dd6803e56eee959c1797 Mon Sep 17 00:00:00 2001 From: André Fabian Silva Delgado Date: Thu, 20 Oct 2016 00:10:27 -0300 Subject: Linux-libre 4.8.2-gnu --- arch/powerpc/platforms/powernv/pci-ioda.c | 963 +++++++++++++++++------------- 1 file changed, 560 insertions(+), 403 deletions(-) (limited to 'arch/powerpc/platforms/powernv/pci-ioda.c') diff --git a/arch/powerpc/platforms/powernv/pci-ioda.c b/arch/powerpc/platforms/powernv/pci-ioda.c index 9e160fa74..38a5c657f 100644 --- a/arch/powerpc/platforms/powernv/pci-ioda.c +++ b/arch/powerpc/platforms/powernv/pci-ioda.c @@ -55,6 +55,7 @@ #define POWERNV_IOMMU_DEFAULT_LEVELS 1 #define POWERNV_IOMMU_MAX_LEVELS 5 +static const char * const pnv_phb_names[] = { "IODA1", "IODA2", "NPU" }; static void pnv_pci_ioda2_table_free_pages(struct iommu_table *tbl); void pe_level_printk(const struct pnv_ioda_pe *pe, const char *level, @@ -110,10 +111,24 @@ static int __init iommu_setup(char *str) } early_param("iommu", iommu_setup); -static inline bool pnv_pci_is_mem_pref_64(unsigned long flags) +static inline bool pnv_pci_is_m64(struct pnv_phb *phb, struct resource *r) { - return ((flags & (IORESOURCE_MEM_64 | IORESOURCE_PREFETCH)) == - (IORESOURCE_MEM_64 | IORESOURCE_PREFETCH)); + /* + * WARNING: We cannot rely on the resource flags. The Linux PCI + * allocation code sometimes decides to put a 64-bit prefetchable + * BAR in the 32-bit window, so we have to compare the addresses. + * + * For simplicity we only test resource start. + */ + return (r->start >= phb->ioda.m64_base && + r->start < (phb->ioda.m64_base + phb->ioda.m64_size)); +} + +static inline bool pnv_pci_is_m64_flags(unsigned long resource_flags) +{ + unsigned long flags = (IORESOURCE_MEM_64 | IORESOURCE_PREFETCH); + + return (resource_flags & flags) == flags; } static struct pnv_ioda_pe *pnv_ioda_init_pe(struct pnv_phb *phb, int pe_no) @@ -141,16 +156,14 @@ static void pnv_ioda_reserve_pe(struct pnv_phb *phb, int pe_no) static struct pnv_ioda_pe *pnv_ioda_alloc_pe(struct pnv_phb *phb) { - unsigned long pe; + long pe; - do { - pe = find_next_zero_bit(phb->ioda.pe_alloc, - phb->ioda.total_pe_num, 0); - if (pe >= phb->ioda.total_pe_num) - return NULL; - } while(test_and_set_bit(pe, phb->ioda.pe_alloc)); + for (pe = phb->ioda.total_pe_num - 1; pe >= 0; pe--) { + if (!test_and_set_bit(pe, phb->ioda.pe_alloc)) + return pnv_ioda_init_pe(phb, pe); + } - return pnv_ioda_init_pe(phb, pe); + return NULL; } static void pnv_ioda_free_pe(struct pnv_ioda_pe *pe) @@ -193,18 +206,15 @@ static int pnv_ioda2_init_m64(struct pnv_phb *phb) goto fail; } - /* Mark the M64 BAR assigned */ - set_bit(phb->ioda.m64_bar_idx, &phb->ioda.m64_bar_alloc); - /* - * Strip off the segment used by the reserved PE, which is - * expected to be 0 or last one of PE capabicity. + * Exclude the segments for reserved and root bus PE, which + * are first or last two PEs. */ r = &phb->hose->mem_resources[1]; if (phb->ioda.reserved_pe_idx == 0) - r->start += phb->ioda.m64_segsize; + r->start += (2 * phb->ioda.m64_segsize); else if (phb->ioda.reserved_pe_idx == (phb->ioda.total_pe_num - 1)) - r->end -= phb->ioda.m64_segsize; + r->end -= (2 * phb->ioda.m64_segsize); else pr_warn(" Cannot strip M64 segment for reserved PE#%d\n", phb->ioda.reserved_pe_idx); @@ -234,7 +244,7 @@ static void pnv_ioda_reserve_dev_m64_pe(struct pci_dev *pdev, sgsz = phb->ioda.m64_segsize; for (i = 0; i <= PCI_ROM_RESOURCE; i++) { r = &pdev->resource[i]; - if (!r->parent || !pnv_pci_is_mem_pref_64(r->flags)) + if (!r->parent || !pnv_pci_is_m64(phb, r)) continue; start = _ALIGN_DOWN(r->start - base, sgsz); @@ -284,14 +294,14 @@ static int pnv_ioda1_init_m64(struct pnv_phb *phb) } /* - * Exclude the segment used by the reserved PE, which - * is expected to be 0 or last supported PE#. + * Exclude the segments for reserved and root bus PE, which + * are first or last two PEs. */ r = &phb->hose->mem_resources[1]; if (phb->ioda.reserved_pe_idx == 0) - r->start += phb->ioda.m64_segsize; + r->start += (2 * phb->ioda.m64_segsize); else if (phb->ioda.reserved_pe_idx == (phb->ioda.total_pe_num - 1)) - r->end -= phb->ioda.m64_segsize; + r->end -= (2 * phb->ioda.m64_segsize); else WARN(1, "Wrong reserved PE#%d on PHB#%d\n", phb->ioda.reserved_pe_idx, phb->hose->global_number); @@ -406,6 +416,7 @@ static void __init pnv_ioda_parse_m64_window(struct pnv_phb *phb) struct pci_controller *hose = phb->hose; struct device_node *dn = hose->dn; struct resource *res; + u32 m64_range[2], i; const u32 *r; u64 pci_addr; @@ -426,6 +437,30 @@ static void __init pnv_ioda_parse_m64_window(struct pnv_phb *phb) return; } + /* + * Find the available M64 BAR range and pickup the last one for + * covering the whole 64-bits space. We support only one range. + */ + if (of_property_read_u32_array(dn, "ibm,opal-available-m64-ranges", + m64_range, 2)) { + /* In absence of the property, assume 0..15 */ + m64_range[0] = 0; + m64_range[1] = 16; + } + /* We only support 64 bits in our allocator */ + if (m64_range[1] > 63) { + pr_warn("%s: Limiting M64 range to 63 (from %d) on PHB#%x\n", + __func__, m64_range[1], phb->hose->global_number); + m64_range[1] = 63; + } + /* Empty range, no m64 */ + if (m64_range[1] <= m64_range[0]) { + pr_warn("%s: M64 empty, disabling M64 usage on PHB#%x\n", + __func__, phb->hose->global_number); + return; + } + + /* Configure M64 informations */ res = &hose->mem_resources[1]; res->name = dn->full_name; res->start = of_translate_address(dn, r + 2); @@ -438,11 +473,28 @@ static void __init pnv_ioda_parse_m64_window(struct pnv_phb *phb) phb->ioda.m64_segsize = phb->ioda.m64_size / phb->ioda.total_pe_num; phb->ioda.m64_base = pci_addr; - pr_info(" MEM64 0x%016llx..0x%016llx -> 0x%016llx\n", - res->start, res->end, pci_addr); + /* This lines up nicely with the display from processing OF ranges */ + pr_info(" MEM 0x%016llx..0x%016llx -> 0x%016llx (M64 #%d..%d)\n", + res->start, res->end, pci_addr, m64_range[0], + m64_range[0] + m64_range[1] - 1); + + /* Mark all M64 used up by default */ + phb->ioda.m64_bar_alloc = (unsigned long)-1; /* Use last M64 BAR to cover M64 window */ - phb->ioda.m64_bar_idx = 15; + m64_range[1]--; + phb->ioda.m64_bar_idx = m64_range[0] + m64_range[1]; + + pr_info(" Using M64 #%d as default window\n", phb->ioda.m64_bar_idx); + + /* Mark remaining ones free */ + for (i = m64_range[0]; i < m64_range[1]; i++) + clear_bit(i, &phb->ioda.m64_bar_alloc); + + /* + * Setup init functions for M64 based on IODA version, IODA3 uses + * the IODA2 code. + */ if (phb->type == PNV_PHB_IODA1) phb->init_m64 = pnv_ioda1_init_m64; else @@ -597,7 +649,7 @@ static int pnv_ioda_get_pe_state(struct pnv_phb *phb, int pe_no) * but in the meantime, we need to protect them to avoid warnings */ #ifdef CONFIG_PCI_MSI -static struct pnv_ioda_pe *pnv_ioda_get_pe(struct pci_dev *dev) +struct pnv_ioda_pe *pnv_ioda_get_pe(struct pci_dev *dev) { struct pci_controller *hose = pci_bus_to_host(dev->bus); struct pnv_phb *phb = hose->private_data; @@ -715,7 +767,6 @@ static int pnv_ioda_set_peltv(struct pnv_phb *phb, return 0; } -#ifdef CONFIG_PCI_IOV static int pnv_ioda_deconfigure_pe(struct pnv_phb *phb, struct pnv_ioda_pe *pe) { struct pci_dev *parent; @@ -750,9 +801,11 @@ static int pnv_ioda_deconfigure_pe(struct pnv_phb *phb, struct pnv_ioda_pe *pe) } rid_end = pe->rid + (count << 8); } else { +#ifdef CONFIG_PCI_IOV if (pe->flags & PNV_IODA_PE_VF) parent = pe->parent_dev; else +#endif parent = pe->pdev->bus->self; bcomp = OpalPciBusAll; dcomp = OPAL_COMPARE_RID_DEVICE_NUMBER; @@ -762,7 +815,7 @@ static int pnv_ioda_deconfigure_pe(struct pnv_phb *phb, struct pnv_ioda_pe *pe) /* Clear the reverse map */ for (rid = pe->rid; rid < rid_end; rid++) - phb->ioda.pe_rmap[rid] = 0; + phb->ioda.pe_rmap[rid] = IODA_INVALID_PE; /* Release from all parents PELT-V */ while (parent) { @@ -790,11 +843,12 @@ static int pnv_ioda_deconfigure_pe(struct pnv_phb *phb, struct pnv_ioda_pe *pe) pe->pbus = NULL; pe->pdev = NULL; +#ifdef CONFIG_PCI_IOV pe->parent_dev = NULL; +#endif return 0; } -#endif /* CONFIG_PCI_IOV */ static int pnv_ioda_configure_pe(struct pnv_phb *phb, struct pnv_ioda_pe *pe) { @@ -1025,6 +1079,16 @@ static void pnv_ioda_setup_same_PE(struct pci_bus *bus, struct pnv_ioda_pe *pe) pci_name(dev)); continue; } + + /* + * In partial hotplug case, the PCI device might be still + * associated with the PE and needn't attach it to the PE + * again. + */ + if (pdn->pe_number != IODA_INVALID_PE) + continue; + + pe->device_count++; pdn->pcidev = dev; pdn->pe_number = pe->pe_number; if ((pe->flags & PNV_IODA_PE_BUS_ALL) && dev->subordinate) @@ -1043,9 +1107,26 @@ static struct pnv_ioda_pe *pnv_ioda_setup_bus_PE(struct pci_bus *bus, bool all) struct pci_controller *hose = pci_bus_to_host(bus); struct pnv_phb *phb = hose->private_data; struct pnv_ioda_pe *pe = NULL; + unsigned int pe_num; + + /* + * In partial hotplug case, the PE instance might be still alive. + * We should reuse it instead of allocating a new one. + */ + pe_num = phb->ioda.pe_rmap[bus->number << 8]; + if (pe_num != IODA_INVALID_PE) { + pe = &phb->ioda.pe_array[pe_num]; + pnv_ioda_setup_same_PE(bus, pe); + return NULL; + } + + /* PE number for root bus should have been reserved */ + if (pci_is_root_bus(bus) && + phb->ioda.root_pe_idx != IODA_INVALID_PE) + pe = &phb->ioda.pe_array[phb->ioda.root_pe_idx]; /* Check if PE is determined by M64 */ - if (phb->pick_m64_pe) + if (!pe && phb->pick_m64_pe) pe = phb->pick_m64_pe(bus, all); /* The PE number isn't pinned by M64 */ @@ -1157,30 +1238,6 @@ static void pnv_ioda_setup_npu_PEs(struct pci_bus *bus) pnv_ioda_setup_npu_PE(pdev); } -static void pnv_ioda_setup_PEs(struct pci_bus *bus) -{ - struct pci_dev *dev; - - pnv_ioda_setup_bus_PE(bus, false); - - list_for_each_entry(dev, &bus->devices, bus_list) { - if (dev->subordinate) { - if (pci_pcie_type(dev) == PCI_EXP_TYPE_PCI_BRIDGE) - pnv_ioda_setup_bus_PE(dev->subordinate, true); - else - pnv_ioda_setup_PEs(dev->subordinate); - } - } -} - -/* - * Configure PEs so that the downstream PCI buses and devices - * could have their associated PE#. Unfortunately, we didn't - * figure out the way to identify the PLX bridge yet. So we - * simply put the PCI bus and the subordinate behind the root - * port to PE# here. The game rule here is expected to be changed - * as soon as we can detected PLX bridge correctly. - */ static void pnv_pci_ioda_setup_PEs(void) { struct pci_controller *hose, *tmp; @@ -1188,22 +1245,11 @@ static void pnv_pci_ioda_setup_PEs(void) list_for_each_entry_safe(hose, tmp, &hose_list, list_node) { phb = hose->private_data; - - /* M64 layout might affect PE allocation */ - if (phb->reserve_m64_pe) - phb->reserve_m64_pe(hose->bus, NULL, true); - - /* - * On NPU PHB, we expect separate PEs for individual PCI - * functions. PCI bus dependent PEs are required for the - * remaining types of PHBs. - */ if (phb->type == PNV_PHB_NPU) { /* PE#0 is needed for error reporting */ pnv_ioda_reserve_pe(phb, 0); pnv_ioda_setup_npu_PEs(hose->bus); - } else - pnv_ioda_setup_PEs(hose->bus); + } } } @@ -1729,7 +1775,14 @@ static void pnv_ioda_setup_bus_dma(struct pnv_ioda_pe *pe, } } -static void pnv_pci_ioda1_tce_invalidate(struct iommu_table *tbl, +static inline __be64 __iomem *pnv_ioda_get_inval_reg(struct pnv_phb *phb, + bool real_mode) +{ + return real_mode ? (__be64 __iomem *)(phb->regs_phys + 0x210) : + (phb->regs + 0x210); +} + +static void pnv_pci_p7ioc_tce_invalidate(struct iommu_table *tbl, unsigned long index, unsigned long npages, bool rm) { struct iommu_table_group_link *tgl = list_first_entry_or_null( @@ -1737,33 +1790,17 @@ static void pnv_pci_ioda1_tce_invalidate(struct iommu_table *tbl, next); struct pnv_ioda_pe *pe = container_of(tgl->table_group, struct pnv_ioda_pe, table_group); - __be64 __iomem *invalidate = rm ? - (__be64 __iomem *)pe->phb->ioda.tce_inval_reg_phys : - pe->phb->ioda.tce_inval_reg; + __be64 __iomem *invalidate = pnv_ioda_get_inval_reg(pe->phb, rm); unsigned long start, end, inc; - const unsigned shift = tbl->it_page_shift; start = __pa(((__be64 *)tbl->it_base) + index - tbl->it_offset); end = __pa(((__be64 *)tbl->it_base) + index - tbl->it_offset + npages - 1); - /* BML uses this case for p6/p7/galaxy2: Shift addr and put in node */ - if (tbl->it_busno) { - start <<= shift; - end <<= shift; - inc = 128ull << shift; - start |= tbl->it_busno; - end |= tbl->it_busno; - } else if (tbl->it_type & TCE_PCI_SWINV_PAIR) { - /* p7ioc-style invalidation, 2 TCEs per write */ - start |= (1ull << 63); - end |= (1ull << 63); - inc = 16; - } else { - /* Default (older HW) */ - inc = 128; - } - + /* p7ioc-style invalidation, 2 TCEs per write */ + start |= (1ull << 63); + end |= (1ull << 63); + inc = 16; end |= inc - 1; /* round up end to be different than start */ mb(); /* Ensure above stores are visible */ @@ -1784,13 +1821,13 @@ static void pnv_pci_ioda1_tce_invalidate(struct iommu_table *tbl, static int pnv_ioda1_tce_build(struct iommu_table *tbl, long index, long npages, unsigned long uaddr, enum dma_data_direction direction, - struct dma_attrs *attrs) + unsigned long attrs) { int ret = pnv_tce_build(tbl, index, npages, uaddr, direction, attrs); - if (!ret && (tbl->it_type & TCE_PCI_SWINV_CREATE)) - pnv_pci_ioda1_tce_invalidate(tbl, index, npages, false); + if (!ret) + pnv_pci_p7ioc_tce_invalidate(tbl, index, npages, false); return ret; } @@ -1801,9 +1838,8 @@ static int pnv_ioda1_tce_xchg(struct iommu_table *tbl, long index, { long ret = pnv_tce_xchg(tbl, index, hpa, direction); - if (!ret && (tbl->it_type & - (TCE_PCI_SWINV_CREATE | TCE_PCI_SWINV_FREE))) - pnv_pci_ioda1_tce_invalidate(tbl, index, 1, false); + if (!ret) + pnv_pci_p7ioc_tce_invalidate(tbl, index, 1, false); return ret; } @@ -1814,8 +1850,7 @@ static void pnv_ioda1_tce_free(struct iommu_table *tbl, long index, { pnv_tce_free(tbl, index, npages); - if (tbl->it_type & TCE_PCI_SWINV_FREE) - pnv_pci_ioda1_tce_invalidate(tbl, index, npages, false); + pnv_pci_p7ioc_tce_invalidate(tbl, index, npages, false); } static struct iommu_table_ops pnv_ioda1_iommu_ops = { @@ -1827,45 +1862,42 @@ static struct iommu_table_ops pnv_ioda1_iommu_ops = { .get = pnv_tce_get, }; -#define TCE_KILL_INVAL_ALL PPC_BIT(0) -#define TCE_KILL_INVAL_PE PPC_BIT(1) -#define TCE_KILL_INVAL_TCE PPC_BIT(2) +#define PHB3_TCE_KILL_INVAL_ALL PPC_BIT(0) +#define PHB3_TCE_KILL_INVAL_PE PPC_BIT(1) +#define PHB3_TCE_KILL_INVAL_ONE PPC_BIT(2) -void pnv_pci_ioda2_tce_invalidate_entire(struct pnv_phb *phb, bool rm) +void pnv_pci_phb3_tce_invalidate_entire(struct pnv_phb *phb, bool rm) { - const unsigned long val = TCE_KILL_INVAL_ALL; + __be64 __iomem *invalidate = pnv_ioda_get_inval_reg(phb, rm); + const unsigned long val = PHB3_TCE_KILL_INVAL_ALL; mb(); /* Ensure previous TCE table stores are visible */ if (rm) - __raw_rm_writeq(cpu_to_be64(val), - (__be64 __iomem *) - phb->ioda.tce_inval_reg_phys); + __raw_rm_writeq(cpu_to_be64(val), invalidate); else - __raw_writeq(cpu_to_be64(val), phb->ioda.tce_inval_reg); + __raw_writeq(cpu_to_be64(val), invalidate); } -static inline void pnv_pci_ioda2_tce_invalidate_pe(struct pnv_ioda_pe *pe) +static inline void pnv_pci_phb3_tce_invalidate_pe(struct pnv_ioda_pe *pe) { /* 01xb - invalidate TCEs that match the specified PE# */ - unsigned long val = TCE_KILL_INVAL_PE | (pe->pe_number & 0xFF); - struct pnv_phb *phb = pe->phb; - - if (!phb->ioda.tce_inval_reg) - return; + __be64 __iomem *invalidate = pnv_ioda_get_inval_reg(pe->phb, false); + unsigned long val = PHB3_TCE_KILL_INVAL_PE | (pe->pe_number & 0xFF); mb(); /* Ensure above stores are visible */ - __raw_writeq(cpu_to_be64(val), phb->ioda.tce_inval_reg); + __raw_writeq(cpu_to_be64(val), invalidate); } -static void pnv_pci_ioda2_do_tce_invalidate(unsigned pe_number, bool rm, - __be64 __iomem *invalidate, unsigned shift, - unsigned long index, unsigned long npages) +static void pnv_pci_phb3_tce_invalidate(struct pnv_ioda_pe *pe, bool rm, + unsigned shift, unsigned long index, + unsigned long npages) { + __be64 __iomem *invalidate = pnv_ioda_get_inval_reg(pe->phb, rm); unsigned long start, end, inc; /* We'll invalidate DMA address in PE scope */ - start = TCE_KILL_INVAL_TCE; - start |= (pe_number & 0xFF); + start = PHB3_TCE_KILL_INVAL_ONE; + start |= (pe->pe_number & 0xFF); end = start; /* Figure out the start, end and step */ @@ -1883,6 +1915,17 @@ static void pnv_pci_ioda2_do_tce_invalidate(unsigned pe_number, bool rm, } } +static inline void pnv_pci_ioda2_tce_invalidate_pe(struct pnv_ioda_pe *pe) +{ + struct pnv_phb *phb = pe->phb; + + if (phb->model == PNV_PHB_MODEL_PHB3 && phb->regs) + pnv_pci_phb3_tce_invalidate_pe(pe); + else + opal_pci_tce_kill(phb->opal_id, OPAL_PCI_TCE_KILL_PE, + pe->pe_number, 0, 0, 0); +} + static void pnv_pci_ioda2_tce_invalidate(struct iommu_table *tbl, unsigned long index, unsigned long npages, bool rm) { @@ -1891,34 +1934,43 @@ static void pnv_pci_ioda2_tce_invalidate(struct iommu_table *tbl, list_for_each_entry_rcu(tgl, &tbl->it_group_list, next) { struct pnv_ioda_pe *pe = container_of(tgl->table_group, struct pnv_ioda_pe, table_group); - __be64 __iomem *invalidate = rm ? - (__be64 __iomem *)pe->phb->ioda.tce_inval_reg_phys : - pe->phb->ioda.tce_inval_reg; + struct pnv_phb *phb = pe->phb; + unsigned int shift = tbl->it_page_shift; - if (pe->phb->type == PNV_PHB_NPU) { + if (phb->type == PNV_PHB_NPU) { /* * The NVLink hardware does not support TCE kill * per TCE entry so we have to invalidate * the entire cache for it. */ - pnv_pci_ioda2_tce_invalidate_entire(pe->phb, rm); + pnv_pci_phb3_tce_invalidate_entire(phb, rm); continue; } - pnv_pci_ioda2_do_tce_invalidate(pe->pe_number, rm, - invalidate, tbl->it_page_shift, - index, npages); + if (phb->model == PNV_PHB_MODEL_PHB3 && phb->regs) + pnv_pci_phb3_tce_invalidate(pe, rm, shift, + index, npages); + else if (rm) + opal_rm_pci_tce_kill(phb->opal_id, + OPAL_PCI_TCE_KILL_PAGES, + pe->pe_number, 1u << shift, + index << shift, npages); + else + opal_pci_tce_kill(phb->opal_id, + OPAL_PCI_TCE_KILL_PAGES, + pe->pe_number, 1u << shift, + index << shift, npages); } } static int pnv_ioda2_tce_build(struct iommu_table *tbl, long index, long npages, unsigned long uaddr, enum dma_data_direction direction, - struct dma_attrs *attrs) + unsigned long attrs) { int ret = pnv_tce_build(tbl, index, npages, uaddr, direction, attrs); - if (!ret && (tbl->it_type & TCE_PCI_SWINV_CREATE)) + if (!ret) pnv_pci_ioda2_tce_invalidate(tbl, index, npages, false); return ret; @@ -1930,8 +1982,7 @@ static int pnv_ioda2_tce_xchg(struct iommu_table *tbl, long index, { long ret = pnv_tce_xchg(tbl, index, hpa, direction); - if (!ret && (tbl->it_type & - (TCE_PCI_SWINV_CREATE | TCE_PCI_SWINV_FREE))) + if (!ret) pnv_pci_ioda2_tce_invalidate(tbl, index, 1, false); return ret; @@ -1943,8 +1994,7 @@ static void pnv_ioda2_tce_free(struct iommu_table *tbl, long index, { pnv_tce_free(tbl, index, npages); - if (tbl->it_type & TCE_PCI_SWINV_FREE) - pnv_pci_ioda2_tce_invalidate(tbl, index, npages, false); + pnv_pci_ioda2_tce_invalidate(tbl, index, npages, false); } static void pnv_ioda2_table_free(struct iommu_table *tbl) @@ -2113,12 +2163,6 @@ found: base * PNV_IODA1_DMA32_SEGSIZE, IOMMU_PAGE_SHIFT_4K); - /* OPAL variant of P7IOC SW invalidated TCEs */ - if (phb->ioda.tce_inval_reg) - tbl->it_type |= (TCE_PCI_SWINV_CREATE | - TCE_PCI_SWINV_FREE | - TCE_PCI_SWINV_PAIR); - tbl->it_ops = &pnv_ioda1_iommu_ops; pe->table_group.tce32_start = tbl->it_offset << tbl->it_page_shift; pe->table_group.tce32_size = tbl->it_size << tbl->it_page_shift; @@ -2241,8 +2285,6 @@ static long pnv_pci_ioda2_create_table(struct iommu_table_group *table_group, } tbl->it_ops = &pnv_ioda2_iommu_ops; - if (pe->phb->ioda.tce_inval_reg) - tbl->it_type |= (TCE_PCI_SWINV_CREATE | TCE_PCI_SWINV_FREE); *ptbl = tbl; @@ -2291,10 +2333,6 @@ static long pnv_pci_ioda2_setup_default_config(struct pnv_ioda_pe *pe) if (!pnv_iommu_bypass_disabled) pnv_pci_ioda2_set_bypass(pe, true); - /* OPAL variant of PHB3 invalidated TCEs */ - if (pe->phb->ioda.tce_inval_reg) - tbl->it_type |= (TCE_PCI_SWINV_CREATE | TCE_PCI_SWINV_FREE); - /* * Setting table base here only for carrying iommu_group * further down to let iommu_add_device() do the job. @@ -2505,19 +2543,6 @@ static void pnv_pci_ioda_setup_iommu_api(void) static void pnv_pci_ioda_setup_iommu_api(void) { }; #endif -static void pnv_pci_ioda_setup_opal_tce_kill(struct pnv_phb *phb) -{ - const __be64 *swinvp; - - /* OPAL variant of PHB3 invalidated TCEs */ - swinvp = of_get_property(phb->hose->dn, "ibm,opal-tce-kill", NULL); - if (!swinvp) - return; - - phb->ioda.tce_inval_reg_phys = be64_to_cpup(swinvp); - phb->ioda.tce_inval_reg = ioremap(phb->ioda.tce_inval_reg_phys, 8); -} - static __be64 *pnv_pci_ioda2_table_do_alloc_pages(int nid, unsigned shift, unsigned levels, unsigned long limit, unsigned long *current_offset, unsigned long *total_allocated) @@ -2658,6 +2683,9 @@ static void pnv_pci_ioda2_setup_dma_pe(struct pnv_phb *phb, { int64_t rc; + if (!pnv_pci_ioda_pe_dma_weight(pe)) + return; + /* TVE #1 is selected by PCI address bit 59 */ pe->tce_bypass_base = 1ull << 59; @@ -2689,49 +2717,6 @@ static void pnv_pci_ioda2_setup_dma_pe(struct pnv_phb *phb, pnv_ioda_setup_bus_dma(pe, pe->pbus); } -static void pnv_ioda_setup_dma(struct pnv_phb *phb) -{ - struct pci_controller *hose = phb->hose; - struct pnv_ioda_pe *pe; - unsigned int weight; - - /* If we have more PE# than segments available, hand out one - * per PE until we run out and let the rest fail. If not, - * then we assign at least one segment per PE, plus more based - * on the amount of devices under that PE - */ - pr_info("PCI: Domain %04x has %d available 32-bit DMA segments\n", - hose->global_number, phb->ioda.dma32_count); - - pnv_pci_ioda_setup_opal_tce_kill(phb); - - /* Walk our PE list and configure their DMA segments */ - list_for_each_entry(pe, &phb->ioda.pe_list, list) { - weight = pnv_pci_ioda_pe_dma_weight(pe); - if (!weight) - continue; - - /* - * For IODA2 compliant PHB3, we needn't care about the weight. - * The all available 32-bits DMA space will be assigned to - * the specific PE. - */ - if (phb->type == PNV_PHB_IODA1) { - pnv_pci_ioda1_setup_dma_pe(phb, pe); - } else if (phb->type == PNV_PHB_IODA2) { - pe_info(pe, "Assign DMA32 space\n"); - pnv_pci_ioda2_setup_dma_pe(phb, pe); - } else if (phb->type == PNV_PHB_NPU) { - /* - * We initialise the DMA space for an NPU PHB - * after setup of the PHB is complete as we - * point the NPU TVT to the the same location - * as the PHB3 TVT. - */ - } - } -} - #ifdef CONFIG_PCI_MSI static void pnv_ioda2_msi_eoi(struct irq_data *d) { @@ -2748,12 +2733,13 @@ static void pnv_ioda2_msi_eoi(struct irq_data *d) } -static void set_msi_irq_chip(struct pnv_phb *phb, unsigned int virq) +void pnv_set_msi_irq_chip(struct pnv_phb *phb, unsigned int virq) { struct irq_data *idata; struct irq_chip *ichip; - if (phb->type != PNV_PHB_IODA2) + /* The MSI EOI OPAL call is only needed on PHB3 */ + if (phb->model != PNV_PHB_MODEL_PHB3) return; if (!phb->ioda.irq_chip_init) { @@ -2770,157 +2756,6 @@ static void set_msi_irq_chip(struct pnv_phb *phb, unsigned int virq) irq_set_chip(virq, &phb->ioda.irq_chip); } -#ifdef CONFIG_CXL_BASE - -struct device_node *pnv_pci_get_phb_node(struct pci_dev *dev) -{ - struct pci_controller *hose = pci_bus_to_host(dev->bus); - - return of_node_get(hose->dn); -} -EXPORT_SYMBOL(pnv_pci_get_phb_node); - -int pnv_phb_to_cxl_mode(struct pci_dev *dev, uint64_t mode) -{ - struct pci_controller *hose = pci_bus_to_host(dev->bus); - struct pnv_phb *phb = hose->private_data; - struct pnv_ioda_pe *pe; - int rc; - - pe = pnv_ioda_get_pe(dev); - if (!pe) - return -ENODEV; - - pe_info(pe, "Switching PHB to CXL\n"); - - rc = opal_pci_set_phb_cxl_mode(phb->opal_id, mode, pe->pe_number); - if (rc) - dev_err(&dev->dev, "opal_pci_set_phb_cxl_mode failed: %i\n", rc); - - return rc; -} -EXPORT_SYMBOL(pnv_phb_to_cxl_mode); - -/* Find PHB for cxl dev and allocate MSI hwirqs? - * Returns the absolute hardware IRQ number - */ -int pnv_cxl_alloc_hwirqs(struct pci_dev *dev, int num) -{ - struct pci_controller *hose = pci_bus_to_host(dev->bus); - struct pnv_phb *phb = hose->private_data; - int hwirq = msi_bitmap_alloc_hwirqs(&phb->msi_bmp, num); - - if (hwirq < 0) { - dev_warn(&dev->dev, "Failed to find a free MSI\n"); - return -ENOSPC; - } - - return phb->msi_base + hwirq; -} -EXPORT_SYMBOL(pnv_cxl_alloc_hwirqs); - -void pnv_cxl_release_hwirqs(struct pci_dev *dev, int hwirq, int num) -{ - struct pci_controller *hose = pci_bus_to_host(dev->bus); - struct pnv_phb *phb = hose->private_data; - - msi_bitmap_free_hwirqs(&phb->msi_bmp, hwirq - phb->msi_base, num); -} -EXPORT_SYMBOL(pnv_cxl_release_hwirqs); - -void pnv_cxl_release_hwirq_ranges(struct cxl_irq_ranges *irqs, - struct pci_dev *dev) -{ - struct pci_controller *hose = pci_bus_to_host(dev->bus); - struct pnv_phb *phb = hose->private_data; - int i, hwirq; - - for (i = 1; i < CXL_IRQ_RANGES; i++) { - if (!irqs->range[i]) - continue; - pr_devel("cxl release irq range 0x%x: offset: 0x%lx limit: %ld\n", - i, irqs->offset[i], - irqs->range[i]); - hwirq = irqs->offset[i] - phb->msi_base; - msi_bitmap_free_hwirqs(&phb->msi_bmp, hwirq, - irqs->range[i]); - } -} -EXPORT_SYMBOL(pnv_cxl_release_hwirq_ranges); - -int pnv_cxl_alloc_hwirq_ranges(struct cxl_irq_ranges *irqs, - struct pci_dev *dev, int num) -{ - struct pci_controller *hose = pci_bus_to_host(dev->bus); - struct pnv_phb *phb = hose->private_data; - int i, hwirq, try; - - memset(irqs, 0, sizeof(struct cxl_irq_ranges)); - - /* 0 is reserved for the multiplexed PSL DSI interrupt */ - for (i = 1; i < CXL_IRQ_RANGES && num; i++) { - try = num; - while (try) { - hwirq = msi_bitmap_alloc_hwirqs(&phb->msi_bmp, try); - if (hwirq >= 0) - break; - try /= 2; - } - if (!try) - goto fail; - - irqs->offset[i] = phb->msi_base + hwirq; - irqs->range[i] = try; - pr_devel("cxl alloc irq range 0x%x: offset: 0x%lx limit: %li\n", - i, irqs->offset[i], irqs->range[i]); - num -= try; - } - if (num) - goto fail; - - return 0; -fail: - pnv_cxl_release_hwirq_ranges(irqs, dev); - return -ENOSPC; -} -EXPORT_SYMBOL(pnv_cxl_alloc_hwirq_ranges); - -int pnv_cxl_get_irq_count(struct pci_dev *dev) -{ - struct pci_controller *hose = pci_bus_to_host(dev->bus); - struct pnv_phb *phb = hose->private_data; - - return phb->msi_bmp.irq_count; -} -EXPORT_SYMBOL(pnv_cxl_get_irq_count); - -int pnv_cxl_ioda_msi_setup(struct pci_dev *dev, unsigned int hwirq, - unsigned int virq) -{ - struct pci_controller *hose = pci_bus_to_host(dev->bus); - struct pnv_phb *phb = hose->private_data; - unsigned int xive_num = hwirq - phb->msi_base; - struct pnv_ioda_pe *pe; - int rc; - - if (!(pe = pnv_ioda_get_pe(dev))) - return -ENODEV; - - /* Assign XIVE to PE */ - rc = opal_pci_set_xive_pe(phb->opal_id, pe->pe_number, xive_num); - if (rc) { - pe_warn(pe, "%s: OPAL error %d setting msi_base 0x%x " - "hwirq 0x%x XIVE 0x%x PE\n", - pci_name(dev), rc, phb->msi_base, hwirq, xive_num); - return -EIO; - } - set_msi_irq_chip(phb, virq); - - return 0; -} -EXPORT_SYMBOL(pnv_cxl_ioda_msi_setup); -#endif - static int pnv_pci_ioda_msi_setup(struct pnv_phb *phb, struct pci_dev *dev, unsigned int hwirq, unsigned int virq, unsigned int is_64, struct msi_msg *msg) @@ -2977,7 +2812,7 @@ static int pnv_pci_ioda_msi_setup(struct pnv_phb *phb, struct pci_dev *dev, } msg->data = be32_to_cpu(data); - set_msi_irq_chip(phb, virq); + pnv_set_msi_irq_chip(phb, virq); pr_devel("%s: %s-bit MSI on hwirq %x (xive #%d)," " address=%x_%08x data=%x PE# %d\n", @@ -3043,7 +2878,7 @@ static void pnv_pci_ioda_fixup_iov_resources(struct pci_dev *pdev) res = &pdev->resource[i + PCI_IOV_RESOURCES]; if (!res->flags || res->parent) continue; - if (!pnv_pci_is_mem_pref_64(res->flags)) { + if (!pnv_pci_is_m64_flags(res->flags)) { dev_warn(&pdev->dev, "Don't support SR-IOV with" " non M64 VF BAR%d: %pR. \n", i, res); @@ -3138,7 +2973,7 @@ static void pnv_ioda_setup_pe_res(struct pnv_ioda_pe *pe, index++; } } else if ((res->flags & IORESOURCE_MEM) && - !pnv_pci_is_mem_pref_64(res->flags)) { + !pnv_pci_is_m64(phb, res)) { region.start = res->start - phb->hose->mem_offset[0] - phb->ioda.m32_pci_base; @@ -3198,41 +3033,6 @@ static void pnv_ioda_setup_pe_seg(struct pnv_ioda_pe *pe) } } -static void pnv_pci_ioda_setup_seg(void) -{ - struct pci_controller *tmp, *hose; - struct pnv_phb *phb; - struct pnv_ioda_pe *pe; - - list_for_each_entry_safe(hose, tmp, &hose_list, list_node) { - phb = hose->private_data; - - /* NPU PHB does not support IO or MMIO segmentation */ - if (phb->type == PNV_PHB_NPU) - continue; - - list_for_each_entry(pe, &phb->ioda.pe_list, list) { - pnv_ioda_setup_pe_seg(pe); - } - } -} - -static void pnv_pci_ioda_setup_DMA(void) -{ - struct pci_controller *hose, *tmp; - struct pnv_phb *phb; - - list_for_each_entry_safe(hose, tmp, &hose_list, list_node) { - pnv_ioda_setup_dma(hose->private_data); - - /* Mark the PHB initialization done */ - phb = hose->private_data; - phb->initialized = 1; - } - - pnv_pci_ioda_setup_iommu_api(); -} - static void pnv_pci_ioda_create_dbgfs(void) { #ifdef CONFIG_DEBUG_FS @@ -3243,6 +3043,9 @@ static void pnv_pci_ioda_create_dbgfs(void) list_for_each_entry_safe(hose, tmp, &hose_list, list_node) { phb = hose->private_data; + /* Notify initialization of PHB done */ + phb->initialized = 1; + sprintf(name, "PCI%04x", hose->global_number); phb->dbgfs = debugfs_create_dir(name, powerpc_debugfs_root); if (!phb->dbgfs) @@ -3255,9 +3058,7 @@ static void pnv_pci_ioda_create_dbgfs(void) static void pnv_pci_ioda_fixup(void) { pnv_pci_ioda_setup_PEs(); - pnv_pci_ioda_setup_seg(); - pnv_pci_ioda_setup_DMA(); - + pnv_pci_ioda_setup_iommu_api(); pnv_pci_ioda_create_dbgfs(); #ifdef CONFIG_EEH @@ -3297,9 +3098,12 @@ static resource_size_t pnv_pci_window_alignment(struct pci_bus *bus, bridge = bridge->bus->self; } - /* We fail back to M32 if M64 isn't supported */ - if (phb->ioda.m64_segsize && - pnv_pci_is_mem_pref_64(type)) + /* + * We fall back to M32 if M64 isn't supported. We enforce the M64 + * alignment for any 64-bit resource, PCIe doesn't care and + * bridges only do 64-bit prefetchable anyway. + */ + if (phb->ioda.m64_segsize && pnv_pci_is_m64_flags(type)) return phb->ioda.m64_segsize; if (type & IORESOURCE_MEM) return phb->ioda.m32_segsize; @@ -3307,6 +3111,115 @@ static resource_size_t pnv_pci_window_alignment(struct pci_bus *bus, return phb->ioda.io_segsize; } +/* + * We are updating root port or the upstream port of the + * bridge behind the root port with PHB's windows in order + * to accommodate the changes on required resources during + * PCI (slot) hotplug, which is connected to either root + * port or the downstream ports of PCIe switch behind the + * root port. + */ +static void pnv_pci_fixup_bridge_resources(struct pci_bus *bus, + unsigned long type) +{ + struct pci_controller *hose = pci_bus_to_host(bus); + struct pnv_phb *phb = hose->private_data; + struct pci_dev *bridge = bus->self; + struct resource *r, *w; + bool msi_region = false; + int i; + + /* Check if we need apply fixup to the bridge's windows */ + if (!pci_is_root_bus(bridge->bus) && + !pci_is_root_bus(bridge->bus->self->bus)) + return; + + /* Fixup the resources */ + for (i = 0; i < PCI_BRIDGE_RESOURCE_NUM; i++) { + r = &bridge->resource[PCI_BRIDGE_RESOURCES + i]; + if (!r->flags || !r->parent) + continue; + + w = NULL; + if (r->flags & type & IORESOURCE_IO) + w = &hose->io_resource; + else if (pnv_pci_is_m64(phb, r) && + (type & IORESOURCE_PREFETCH) && + phb->ioda.m64_segsize) + w = &hose->mem_resources[1]; + else if (r->flags & type & IORESOURCE_MEM) { + w = &hose->mem_resources[0]; + msi_region = true; + } + + r->start = w->start; + r->end = w->end; + + /* The 64KB 32-bits MSI region shouldn't be included in + * the 32-bits bridge window. Otherwise, we can see strange + * issues. One of them is EEH error observed on Garrison. + * + * Exclude top 1MB region which is the minimal alignment of + * 32-bits bridge window. + */ + if (msi_region) { + r->end += 0x10000; + r->end -= 0x100000; + } + } +} + +static void pnv_pci_setup_bridge(struct pci_bus *bus, unsigned long type) +{ + struct pci_controller *hose = pci_bus_to_host(bus); + struct pnv_phb *phb = hose->private_data; + struct pci_dev *bridge = bus->self; + struct pnv_ioda_pe *pe; + bool all = (pci_pcie_type(bridge) == PCI_EXP_TYPE_PCI_BRIDGE); + + /* Extend bridge's windows if necessary */ + pnv_pci_fixup_bridge_resources(bus, type); + + /* The PE for root bus should be realized before any one else */ + if (!phb->ioda.root_pe_populated) { + pe = pnv_ioda_setup_bus_PE(phb->hose->bus, false); + if (pe) { + phb->ioda.root_pe_idx = pe->pe_number; + phb->ioda.root_pe_populated = true; + } + } + + /* Don't assign PE to PCI bus, which doesn't have subordinate devices */ + if (list_empty(&bus->devices)) + return; + + /* Reserve PEs according to used M64 resources */ + if (phb->reserve_m64_pe) + phb->reserve_m64_pe(bus, NULL, all); + + /* + * Assign PE. We might run here because of partial hotplug. + * For the case, we just pick up the existing PE and should + * not allocate resources again. + */ + pe = pnv_ioda_setup_bus_PE(bus, all); + if (!pe) + return; + + pnv_ioda_setup_pe_seg(pe); + switch (phb->type) { + case PNV_PHB_IODA1: + pnv_pci_ioda1_setup_dma_pe(phb, pe); + break; + case PNV_PHB_IODA2: + pnv_pci_ioda2_setup_dma_pe(phb, pe); + break; + default: + pr_warn("%s: No DMA for PHB#%d (type %d)\n", + __func__, phb->hose->global_number, phb->type); + } +} + #ifdef CONFIG_PCI_IOV static resource_size_t pnv_pci_iov_resource_alignment(struct pci_dev *pdev, int resno) @@ -3346,7 +3259,7 @@ static resource_size_t pnv_pci_iov_resource_alignment(struct pci_dev *pdev, /* Prevent enabling devices for which we couldn't properly * assign a PE */ -static bool pnv_pci_enable_device_hook(struct pci_dev *dev) +bool pnv_pci_enable_device_hook(struct pci_dev *dev) { struct pci_controller *hose = pci_bus_to_host(dev->bus); struct pnv_phb *phb = hose->private_data; @@ -3367,6 +3280,201 @@ static bool pnv_pci_enable_device_hook(struct pci_dev *dev) return true; } +static long pnv_pci_ioda1_unset_window(struct iommu_table_group *table_group, + int num) +{ + struct pnv_ioda_pe *pe = container_of(table_group, + struct pnv_ioda_pe, table_group); + struct pnv_phb *phb = pe->phb; + unsigned int idx; + long rc; + + pe_info(pe, "Removing DMA window #%d\n", num); + for (idx = 0; idx < phb->ioda.dma32_count; idx++) { + if (phb->ioda.dma32_segmap[idx] != pe->pe_number) + continue; + + rc = opal_pci_map_pe_dma_window(phb->opal_id, pe->pe_number, + idx, 0, 0ul, 0ul, 0ul); + if (rc != OPAL_SUCCESS) { + pe_warn(pe, "Failure %ld unmapping DMA32 segment#%d\n", + rc, idx); + return rc; + } + + phb->ioda.dma32_segmap[idx] = IODA_INVALID_PE; + } + + pnv_pci_unlink_table_and_group(table_group->tables[num], table_group); + return OPAL_SUCCESS; +} + +static void pnv_pci_ioda1_release_pe_dma(struct pnv_ioda_pe *pe) +{ + unsigned int weight = pnv_pci_ioda_pe_dma_weight(pe); + struct iommu_table *tbl = pe->table_group.tables[0]; + int64_t rc; + + if (!weight) + return; + + rc = pnv_pci_ioda1_unset_window(&pe->table_group, 0); + if (rc != OPAL_SUCCESS) + return; + + pnv_pci_p7ioc_tce_invalidate(tbl, tbl->it_offset, tbl->it_size, false); + if (pe->table_group.group) { + iommu_group_put(pe->table_group.group); + WARN_ON(pe->table_group.group); + } + + free_pages(tbl->it_base, get_order(tbl->it_size << 3)); + iommu_free_table(tbl, "pnv"); +} + +static void pnv_pci_ioda2_release_pe_dma(struct pnv_ioda_pe *pe) +{ + struct iommu_table *tbl = pe->table_group.tables[0]; + unsigned int weight = pnv_pci_ioda_pe_dma_weight(pe); +#ifdef CONFIG_IOMMU_API + int64_t rc; +#endif + + if (!weight) + return; + +#ifdef CONFIG_IOMMU_API + rc = pnv_pci_ioda2_unset_window(&pe->table_group, 0); + if (rc) + pe_warn(pe, "OPAL error %ld release DMA window\n", rc); +#endif + + pnv_pci_ioda2_set_bypass(pe, false); + if (pe->table_group.group) { + iommu_group_put(pe->table_group.group); + WARN_ON(pe->table_group.group); + } + + pnv_pci_ioda2_table_free_pages(tbl); + iommu_free_table(tbl, "pnv"); +} + +static void pnv_ioda_free_pe_seg(struct pnv_ioda_pe *pe, + unsigned short win, + unsigned int *map) +{ + struct pnv_phb *phb = pe->phb; + int idx; + int64_t rc; + + for (idx = 0; idx < phb->ioda.total_pe_num; idx++) { + if (map[idx] != pe->pe_number) + continue; + + if (win == OPAL_M64_WINDOW_TYPE) + rc = opal_pci_map_pe_mmio_window(phb->opal_id, + phb->ioda.reserved_pe_idx, win, + idx / PNV_IODA1_M64_SEGS, + idx % PNV_IODA1_M64_SEGS); + else + rc = opal_pci_map_pe_mmio_window(phb->opal_id, + phb->ioda.reserved_pe_idx, win, 0, idx); + + if (rc != OPAL_SUCCESS) + pe_warn(pe, "Error %ld unmapping (%d) segment#%d\n", + rc, win, idx); + + map[idx] = IODA_INVALID_PE; + } +} + +static void pnv_ioda_release_pe_seg(struct pnv_ioda_pe *pe) +{ + struct pnv_phb *phb = pe->phb; + + if (phb->type == PNV_PHB_IODA1) { + pnv_ioda_free_pe_seg(pe, OPAL_IO_WINDOW_TYPE, + phb->ioda.io_segmap); + pnv_ioda_free_pe_seg(pe, OPAL_M32_WINDOW_TYPE, + phb->ioda.m32_segmap); + pnv_ioda_free_pe_seg(pe, OPAL_M64_WINDOW_TYPE, + phb->ioda.m64_segmap); + } else if (phb->type == PNV_PHB_IODA2) { + pnv_ioda_free_pe_seg(pe, OPAL_M32_WINDOW_TYPE, + phb->ioda.m32_segmap); + } +} + +static void pnv_ioda_release_pe(struct pnv_ioda_pe *pe) +{ + struct pnv_phb *phb = pe->phb; + struct pnv_ioda_pe *slave, *tmp; + + list_del(&pe->list); + switch (phb->type) { + case PNV_PHB_IODA1: + pnv_pci_ioda1_release_pe_dma(pe); + break; + case PNV_PHB_IODA2: + pnv_pci_ioda2_release_pe_dma(pe); + break; + default: + WARN_ON(1); + } + + pnv_ioda_release_pe_seg(pe); + pnv_ioda_deconfigure_pe(pe->phb, pe); + + /* Release slave PEs in the compound PE */ + if (pe->flags & PNV_IODA_PE_MASTER) { + list_for_each_entry_safe(slave, tmp, &pe->slaves, list) { + list_del(&slave->list); + pnv_ioda_free_pe(slave); + } + } + + /* + * The PE for root bus can be removed because of hotplug in EEH + * recovery for fenced PHB error. We need to mark the PE dead so + * that it can be populated again in PCI hot add path. The PE + * shouldn't be destroyed as it's the global reserved resource. + */ + if (phb->ioda.root_pe_populated && + phb->ioda.root_pe_idx == pe->pe_number) + phb->ioda.root_pe_populated = false; + else + pnv_ioda_free_pe(pe); +} + +static void pnv_pci_release_device(struct pci_dev *pdev) +{ + struct pci_controller *hose = pci_bus_to_host(pdev->bus); + struct pnv_phb *phb = hose->private_data; + struct pci_dn *pdn = pci_get_pdn(pdev); + struct pnv_ioda_pe *pe; + + if (pdev->is_virtfn) + return; + + if (!pdn || pdn->pe_number == IODA_INVALID_PE) + return; + + /* + * PCI hotplug can happen as part of EEH error recovery. The @pdn + * isn't removed and added afterwards in this scenario. We should + * set the PE number in @pdn to an invalid one. Otherwise, the PE's + * device count is decreased on removing devices while failing to + * be increased on adding devices. It leads to unbalanced PE's device + * count and eventually make normal PCI hotplug path broken. + */ + pe = &phb->ioda.pe_array[pdn->pe_number]; + pdn->pe_number = IODA_INVALID_PE; + + WARN_ON(--pe->device_count < 0); + if (pe->device_count == 0) + pnv_ioda_release_pe(pe); +} + static void pnv_pci_ioda_shutdown(struct pci_controller *hose) { struct pnv_phb *phb = hose->private_data; @@ -3383,7 +3491,9 @@ static const struct pci_controller_ops pnv_pci_ioda_controller_ops = { .teardown_msi_irqs = pnv_teardown_msi_irqs, #endif .enable_device_hook = pnv_pci_enable_device_hook, + .release_device = pnv_pci_release_device, .window_alignment = pnv_pci_window_alignment, + .setup_bridge = pnv_pci_setup_bridge, .reset_secondary_bus = pnv_pci_reset_secondary_bus, .dma_set_mask = pnv_pci_ioda_dma_set_mask, .dma_get_required_mask = pnv_pci_ioda_dma_get_required_mask, @@ -3411,6 +3521,26 @@ static const struct pci_controller_ops pnv_npu_ioda_controller_ops = { .shutdown = pnv_pci_ioda_shutdown, }; +#ifdef CONFIG_CXL_BASE +const struct pci_controller_ops pnv_cxl_cx4_ioda_controller_ops = { + .dma_dev_setup = pnv_pci_dma_dev_setup, + .dma_bus_setup = pnv_pci_dma_bus_setup, +#ifdef CONFIG_PCI_MSI + .setup_msi_irqs = pnv_cxl_cx4_setup_msi_irqs, + .teardown_msi_irqs = pnv_cxl_cx4_teardown_msi_irqs, +#endif + .enable_device_hook = pnv_cxl_enable_device_hook, + .disable_device = pnv_cxl_disable_device, + .release_device = pnv_pci_release_device, + .window_alignment = pnv_pci_window_alignment, + .setup_bridge = pnv_pci_setup_bridge, + .reset_secondary_bus = pnv_pci_reset_secondary_bus, + .dma_set_mask = pnv_pci_ioda_dma_set_mask, + .dma_get_required_mask = pnv_pci_ioda_dma_get_required_mask, + .shutdown = pnv_pci_ioda_shutdown, +}; +#endif + static void __init pnv_pci_init_ioda_phb(struct device_node *np, u64 hub_id, int ioda_type) { @@ -3418,6 +3548,7 @@ static void __init pnv_pci_init_ioda_phb(struct device_node *np, struct pnv_phb *phb; unsigned long size, m64map_off, m32map_off, pemap_off; unsigned long iomap_off = 0, dma32map_off = 0; + struct resource r; const __be64 *prop64; const __be32 *prop32; int len; @@ -3426,7 +3557,11 @@ static void __init pnv_pci_init_ioda_phb(struct device_node *np, void *aux; long rc; - pr_info("Initializing IODA%d OPAL PHB %s\n", ioda_type, np->full_name); + if (!of_device_is_available(np)) + return; + + pr_info("Initializing %s PHB (%s)\n", + pnv_phb_names[ioda_type], of_node_full_name(np)); prop64 = of_get_property(np, "ibm,opal-phbid", NULL); if (!prop64) { @@ -3477,9 +3612,12 @@ static void __init pnv_pci_init_ioda_phb(struct device_node *np, pci_process_bridge_OF_ranges(hose, np, !hose->global_number); /* Get registers */ - phb->regs = of_iomap(np, 0); - if (phb->regs == NULL) - pr_err(" Failed to map registers !\n"); + if (!of_address_to_resource(np, 0, &r)) { + phb->regs_phys = r.start; + phb->regs = ioremap(r.start, resource_size(&r)); + if (phb->regs == NULL) + pr_err(" Failed to map registers !\n"); + } /* Initialize more IODA stuff */ phb->ioda.total_pe_num = 1; @@ -3490,6 +3628,10 @@ static void __init pnv_pci_init_ioda_phb(struct device_node *np, if (prop32) phb->ioda.reserved_pe_idx = be32_to_cpup(prop32); + /* Invalidate RID to PE# mapping */ + for (segno = 0; segno < ARRAY_SIZE(phb->ioda.pe_rmap); segno++) + phb->ioda.pe_rmap[segno] = IODA_INVALID_PE; + /* Parse 64-bit MMIO range */ pnv_ioda_parse_m64_window(phb); @@ -3541,7 +3683,22 @@ static void __init pnv_pci_init_ioda_phb(struct device_node *np, phb->ioda.dma32_segmap[segno] = IODA_INVALID_PE; } phb->ioda.pe_array = aux + pemap_off; - set_bit(phb->ioda.reserved_pe_idx, phb->ioda.pe_alloc); + + /* + * Choose PE number for root bus, which shouldn't have + * M64 resources consumed by its child devices. To pick + * the PE number adjacent to the reserved one if possible. + */ + pnv_ioda_reserve_pe(phb, phb->ioda.reserved_pe_idx); + if (phb->ioda.reserved_pe_idx == 0) { + phb->ioda.root_pe_idx = 1; + pnv_ioda_reserve_pe(phb, phb->ioda.root_pe_idx); + } else if (phb->ioda.reserved_pe_idx == (phb->ioda.total_pe_num - 1)) { + phb->ioda.root_pe_idx = phb->ioda.reserved_pe_idx - 1; + pnv_ioda_reserve_pe(phb, phb->ioda.root_pe_idx); + } else { + phb->ioda.root_pe_idx = IODA_INVALID_PE; + } INIT_LIST_HEAD(&phb->ioda.pe_list); mutex_init(&phb->ioda.pe_list_mutex); -- cgit v1.2.3-54-g00ecf