From d0b2f91bede3bd5e3d24dd6803e56eee959c1797 Mon Sep 17 00:00:00 2001 From: André Fabian Silva Delgado Date: Thu, 20 Oct 2016 00:10:27 -0300 Subject: Linux-libre 4.8.2-gnu --- arch/powerpc/mm/8xx_mmu.c | 131 ++++++++++++++------- arch/powerpc/mm/copro_fault.c | 2 +- arch/powerpc/mm/fault.c | 4 +- arch/powerpc/mm/hash64_4k.c | 18 +-- arch/powerpc/mm/hash64_64k.c | 39 ++++--- arch/powerpc/mm/hash_native_64.c | 42 ++++--- arch/powerpc/mm/hash_utils_64.c | 206 +++++++++++++++++---------------- arch/powerpc/mm/hugepage-hash64.c | 17 +-- arch/powerpc/mm/hugetlbpage-hash64.c | 4 +- arch/powerpc/mm/hugetlbpage-radix.c | 39 +++---- arch/powerpc/mm/hugetlbpage.c | 7 ++ arch/powerpc/mm/init_32.c | 5 +- arch/powerpc/mm/init_64.c | 22 ++++ arch/powerpc/mm/mem.c | 18 ++- arch/powerpc/mm/mmu_context_book3s64.c | 5 +- arch/powerpc/mm/mmu_decl.h | 3 +- arch/powerpc/mm/numa.c | 84 +++++++------- arch/powerpc/mm/pgtable-book3s64.c | 7 +- arch/powerpc/mm/pgtable-radix.c | 20 ++-- arch/powerpc/mm/pgtable.c | 2 +- arch/powerpc/mm/pgtable_32.c | 2 +- arch/powerpc/mm/tlb-radix.c | 155 +++++++++++++++++++++---- arch/powerpc/mm/tlb_hash32.c | 11 -- arch/powerpc/mm/tlb_nohash.c | 6 - 24 files changed, 534 insertions(+), 315 deletions(-) (limited to 'arch/powerpc/mm') diff --git a/arch/powerpc/mm/8xx_mmu.c b/arch/powerpc/mm/8xx_mmu.c index 949100577..6c5025e81 100644 --- a/arch/powerpc/mm/8xx_mmu.c +++ b/arch/powerpc/mm/8xx_mmu.c @@ -13,62 +13,115 @@ */ #include +#include +#include #include "mmu_decl.h" +#define IMMR_SIZE (FIX_IMMR_SIZE << PAGE_SHIFT) + extern int __map_without_ltlbs; + /* - * MMU_init_hw does the chip-specific initialization of the MMU hardware. + * Return PA for this VA if it is in IMMR area, or 0 */ -void __init MMU_init_hw(void) +phys_addr_t v_block_mapped(unsigned long va) { - /* Nothing to do for the time being but keep it similar to other PPC */ + unsigned long p = PHYS_IMMR_BASE; + + if (__map_without_ltlbs) + return 0; + if (va >= VIRT_IMMR_BASE && va < VIRT_IMMR_BASE + IMMR_SIZE) + return p + va - VIRT_IMMR_BASE; + return 0; +} + +/* + * Return VA for a given PA or 0 if not mapped + */ +unsigned long p_block_mapped(phys_addr_t pa) +{ + unsigned long p = PHYS_IMMR_BASE; + + if (__map_without_ltlbs) + return 0; + if (pa >= p && pa < p + IMMR_SIZE) + return VIRT_IMMR_BASE + pa - p; + return 0; } -#define LARGE_PAGE_SIZE_4M (1<<22) #define LARGE_PAGE_SIZE_8M (1<<23) -#define LARGE_PAGE_SIZE_64M (1<<26) -unsigned long __init mmu_mapin_ram(unsigned long top) +/* + * MMU_init_hw does the chip-specific initialization of the MMU hardware. + */ +void __init MMU_init_hw(void) { - unsigned long v, s, mapped; - phys_addr_t p; + /* PIN up to the 3 first 8Mb after IMMR in DTLB table */ +#ifdef CONFIG_PIN_TLB + unsigned long ctr = mfspr(SPRN_MD_CTR) & 0xfe000000; + unsigned long flags = 0xf0 | MD_SPS16K | _PAGE_SHARED | _PAGE_DIRTY; +#ifdef CONFIG_PIN_TLB_IMMR + int i = 29; +#else + int i = 28; +#endif + unsigned long addr = 0; + unsigned long mem = total_lowmem; + + for (; i < 32 && mem >= LARGE_PAGE_SIZE_8M; i++) { + mtspr(SPRN_MD_CTR, ctr | (i << 8)); + mtspr(SPRN_MD_EPN, (unsigned long)__va(addr) | MD_EVALID); + mtspr(SPRN_MD_TWC, MD_PS8MEG | MD_SVALID); + mtspr(SPRN_MD_RPN, addr | flags | _PAGE_PRESENT); + addr += LARGE_PAGE_SIZE_8M; + mem -= LARGE_PAGE_SIZE_8M; + } +#endif +} - v = KERNELBASE; - p = 0; - s = top; +static void mmu_mapin_immr(void) +{ + unsigned long p = PHYS_IMMR_BASE; + unsigned long v = VIRT_IMMR_BASE; + unsigned long f = pgprot_val(PAGE_KERNEL_NCG); + int offset; - if (__map_without_ltlbs) - return 0; + for (offset = 0; offset < IMMR_SIZE; offset += PAGE_SIZE) + map_page(v + offset, p + offset, f); +} -#ifdef CONFIG_PPC_4K_PAGES - while (s >= LARGE_PAGE_SIZE_8M) { - pmd_t *pmdp; - unsigned long val = p | MD_PS8MEG; +/* Address of instructions to patch */ +#ifndef CONFIG_PIN_TLB_IMMR +extern unsigned int DTLBMiss_jmp; +#endif +extern unsigned int DTLBMiss_cmp, FixupDAR_cmp; - pmdp = pmd_offset(pud_offset(pgd_offset_k(v), v), v); - *pmdp++ = __pmd(val); - *pmdp++ = __pmd(val + LARGE_PAGE_SIZE_4M); +void mmu_patch_cmp_limit(unsigned int *addr, unsigned long mapped) +{ + unsigned int instr = *addr; - v += LARGE_PAGE_SIZE_8M; - p += LARGE_PAGE_SIZE_8M; - s -= LARGE_PAGE_SIZE_8M; - } -#else /* CONFIG_PPC_16K_PAGES */ - while (s >= LARGE_PAGE_SIZE_64M) { - pmd_t *pmdp; - unsigned long val = p | MD_PS8MEG; + instr &= 0xffff0000; + instr |= (unsigned long)__va(mapped) >> 16; + patch_instruction(addr, instr); +} - pmdp = pmd_offset(pud_offset(pgd_offset_k(v), v), v); - *pmdp++ = __pmd(val); +unsigned long __init mmu_mapin_ram(unsigned long top) +{ + unsigned long mapped; - v += LARGE_PAGE_SIZE_64M; - p += LARGE_PAGE_SIZE_64M; - s -= LARGE_PAGE_SIZE_64M; - } + if (__map_without_ltlbs) { + mapped = 0; + mmu_mapin_immr(); +#ifndef CONFIG_PIN_TLB_IMMR + patch_instruction(&DTLBMiss_jmp, PPC_INST_NOP); #endif + } else { + mapped = top & ~(LARGE_PAGE_SIZE_8M - 1); + } - mapped = top - s; + mmu_patch_cmp_limit(&DTLBMiss_cmp, mapped); + mmu_patch_cmp_limit(&FixupDAR_cmp, mapped); /* If the size of RAM is not an exact power of two, we may not * have covered RAM in its entirety with 8 MiB @@ -77,7 +130,8 @@ unsigned long __init mmu_mapin_ram(unsigned long top) * coverage with normal-sized pages (or other reasons) do not * attempt to allocate outside the allowed range. */ - memblock_set_current_limit(mapped); + if (mapped) + memblock_set_current_limit(mapped); return mapped; } @@ -90,13 +144,8 @@ void setup_initial_memory_limit(phys_addr_t first_memblock_base, */ BUG_ON(first_memblock_base != 0); -#ifdef CONFIG_PIN_TLB /* 8xx can only access 24MB at the moment */ memblock_set_current_limit(min_t(u64, first_memblock_size, 0x01800000)); -#else - /* 8xx can only access 8MB at the moment */ - memblock_set_current_limit(min_t(u64, first_memblock_size, 0x00800000)); -#endif } /* diff --git a/arch/powerpc/mm/copro_fault.c b/arch/powerpc/mm/copro_fault.c index 6527882ce..bb0354222 100644 --- a/arch/powerpc/mm/copro_fault.c +++ b/arch/powerpc/mm/copro_fault.c @@ -75,7 +75,7 @@ int copro_handle_mm_fault(struct mm_struct *mm, unsigned long ea, } ret = 0; - *flt = handle_mm_fault(mm, vma, ea, is_write ? FAULT_FLAG_WRITE : 0); + *flt = handle_mm_fault(vma, ea, is_write ? FAULT_FLAG_WRITE : 0); if (unlikely(*flt & VM_FAULT_ERROR)) { if (*flt & VM_FAULT_OOM) { ret = -ENOMEM; diff --git a/arch/powerpc/mm/fault.c b/arch/powerpc/mm/fault.c index a67c6d781..bb1ffc559 100644 --- a/arch/powerpc/mm/fault.c +++ b/arch/powerpc/mm/fault.c @@ -26,7 +26,7 @@ #include #include #include -#include +#include #include #include #include @@ -429,7 +429,7 @@ good_area: * make sure we exit gracefully rather than endlessly redo * the fault. */ - fault = handle_mm_fault(mm, vma, address, flags); + fault = handle_mm_fault(vma, address, flags); if (unlikely(fault & (VM_FAULT_RETRY|VM_FAULT_ERROR))) { if (fault & VM_FAULT_SIGSEGV) goto bad_area; diff --git a/arch/powerpc/mm/hash64_4k.c b/arch/powerpc/mm/hash64_4k.c index 6333b273d..42c702b3b 100644 --- a/arch/powerpc/mm/hash64_4k.c +++ b/arch/powerpc/mm/hash64_4k.c @@ -70,8 +70,8 @@ int __hash_page_4K(unsigned long ea, unsigned long access, unsigned long vsid, slot = (hash & htab_hash_mask) * HPTES_PER_GROUP; slot += (old_pte & H_PAGE_F_GIX) >> H_PAGE_F_GIX_SHIFT; - if (ppc_md.hpte_updatepp(slot, rflags, vpn, MMU_PAGE_4K, - MMU_PAGE_4K, ssize, flags) == -1) + if (mmu_hash_ops.hpte_updatepp(slot, rflags, vpn, MMU_PAGE_4K, + MMU_PAGE_4K, ssize, flags) == -1) old_pte &= ~_PAGE_HPTEFLAGS; } @@ -84,21 +84,23 @@ repeat: hpte_group = ((hash & htab_hash_mask) * HPTES_PER_GROUP) & ~0x7UL; /* Insert into the hash table, primary slot */ - slot = ppc_md.hpte_insert(hpte_group, vpn, pa, rflags, 0, - MMU_PAGE_4K, MMU_PAGE_4K, ssize); + slot = mmu_hash_ops.hpte_insert(hpte_group, vpn, pa, rflags, 0, + MMU_PAGE_4K, MMU_PAGE_4K, ssize); /* * Primary is full, try the secondary */ if (unlikely(slot == -1)) { hpte_group = ((~hash & htab_hash_mask) * HPTES_PER_GROUP) & ~0x7UL; - slot = ppc_md.hpte_insert(hpte_group, vpn, pa, - rflags, HPTE_V_SECONDARY, - MMU_PAGE_4K, MMU_PAGE_4K, ssize); + slot = mmu_hash_ops.hpte_insert(hpte_group, vpn, pa, + rflags, + HPTE_V_SECONDARY, + MMU_PAGE_4K, + MMU_PAGE_4K, ssize); if (slot == -1) { if (mftb() & 0x1) hpte_group = ((hash & htab_hash_mask) * HPTES_PER_GROUP) & ~0x7UL; - ppc_md.hpte_remove(hpte_group); + mmu_hash_ops.hpte_remove(hpte_group); /* * FIXME!! Should be try the group from which we removed ? */ diff --git a/arch/powerpc/mm/hash64_64k.c b/arch/powerpc/mm/hash64_64k.c index 16644e1f4..3bbbea073 100644 --- a/arch/powerpc/mm/hash64_64k.c +++ b/arch/powerpc/mm/hash64_64k.c @@ -133,9 +133,9 @@ int __hash_page_4K(unsigned long ea, unsigned long access, unsigned long vsid, slot = (hash & htab_hash_mask) * HPTES_PER_GROUP; slot += hidx & _PTEIDX_GROUP_IX; - ret = ppc_md.hpte_updatepp(slot, rflags, vpn, - MMU_PAGE_4K, MMU_PAGE_4K, - ssize, flags); + ret = mmu_hash_ops.hpte_updatepp(slot, rflags, vpn, + MMU_PAGE_4K, MMU_PAGE_4K, + ssize, flags); /* *if we failed because typically the HPTE wasn't really here * we try an insertion. @@ -166,21 +166,22 @@ repeat: hpte_group = ((hash & htab_hash_mask) * HPTES_PER_GROUP) & ~0x7UL; /* Insert into the hash table, primary slot */ - slot = ppc_md.hpte_insert(hpte_group, vpn, pa, rflags, 0, - MMU_PAGE_4K, MMU_PAGE_4K, ssize); + slot = mmu_hash_ops.hpte_insert(hpte_group, vpn, pa, rflags, 0, + MMU_PAGE_4K, MMU_PAGE_4K, ssize); /* * Primary is full, try the secondary */ if (unlikely(slot == -1)) { hpte_group = ((~hash & htab_hash_mask) * HPTES_PER_GROUP) & ~0x7UL; - slot = ppc_md.hpte_insert(hpte_group, vpn, pa, - rflags, HPTE_V_SECONDARY, - MMU_PAGE_4K, MMU_PAGE_4K, ssize); + slot = mmu_hash_ops.hpte_insert(hpte_group, vpn, pa, + rflags, HPTE_V_SECONDARY, + MMU_PAGE_4K, MMU_PAGE_4K, + ssize); if (slot == -1) { if (mftb() & 0x1) hpte_group = ((hash & htab_hash_mask) * HPTES_PER_GROUP) & ~0x7UL; - ppc_md.hpte_remove(hpte_group); + mmu_hash_ops.hpte_remove(hpte_group); /* * FIXME!! Should be try the group from which we removed ? */ @@ -272,8 +273,9 @@ int __hash_page_64K(unsigned long ea, unsigned long access, slot = (hash & htab_hash_mask) * HPTES_PER_GROUP; slot += (old_pte & H_PAGE_F_GIX) >> H_PAGE_F_GIX_SHIFT; - if (ppc_md.hpte_updatepp(slot, rflags, vpn, MMU_PAGE_64K, - MMU_PAGE_64K, ssize, flags) == -1) + if (mmu_hash_ops.hpte_updatepp(slot, rflags, vpn, MMU_PAGE_64K, + MMU_PAGE_64K, ssize, + flags) == -1) old_pte &= ~_PAGE_HPTEFLAGS; } @@ -286,21 +288,24 @@ repeat: hpte_group = ((hash & htab_hash_mask) * HPTES_PER_GROUP) & ~0x7UL; /* Insert into the hash table, primary slot */ - slot = ppc_md.hpte_insert(hpte_group, vpn, pa, rflags, 0, - MMU_PAGE_64K, MMU_PAGE_64K, ssize); + slot = mmu_hash_ops.hpte_insert(hpte_group, vpn, pa, rflags, 0, + MMU_PAGE_64K, MMU_PAGE_64K, + ssize); /* * Primary is full, try the secondary */ if (unlikely(slot == -1)) { hpte_group = ((~hash & htab_hash_mask) * HPTES_PER_GROUP) & ~0x7UL; - slot = ppc_md.hpte_insert(hpte_group, vpn, pa, - rflags, HPTE_V_SECONDARY, - MMU_PAGE_64K, MMU_PAGE_64K, ssize); + slot = mmu_hash_ops.hpte_insert(hpte_group, vpn, pa, + rflags, + HPTE_V_SECONDARY, + MMU_PAGE_64K, + MMU_PAGE_64K, ssize); if (slot == -1) { if (mftb() & 0x1) hpte_group = ((hash & htab_hash_mask) * HPTES_PER_GROUP) & ~0x7UL; - ppc_md.hpte_remove(hpte_group); + mmu_hash_ops.hpte_remove(hpte_group); /* * FIXME!! Should be try the group from which we removed ? */ diff --git a/arch/powerpc/mm/hash_native_64.c b/arch/powerpc/mm/hash_native_64.c index f8a871a72..0e4e9654b 100644 --- a/arch/powerpc/mm/hash_native_64.c +++ b/arch/powerpc/mm/hash_native_64.c @@ -55,7 +55,7 @@ static inline void __tlbie(unsigned long vpn, int psize, int apsize, int ssize) * We need 14 to 65 bits of va for a tlibe of 4K page * With vpn we ignore the lower VPN_SHIFT bits already. * And top two bits are already ignored because we can - * only accomadate 76 bits in a 64 bit vpn with a VPN_SHIFT + * only accomodate 76 bits in a 64 bit vpn with a VPN_SHIFT * of 12. */ va = vpn << VPN_SHIFT; @@ -64,15 +64,15 @@ static inline void __tlbie(unsigned long vpn, int psize, int apsize, int ssize) * Older versions of the architecture (2.02 and earler) require the * masking of the top 16 bits. */ - va &= ~(0xffffULL << 48); + if (mmu_has_feature(MMU_FTR_TLBIE_CROP_VA)) + va &= ~(0xffffULL << 48); switch (psize) { case MMU_PAGE_4K: /* clear out bits after (52) [0....52.....63] */ va &= ~((1ul << (64 - 52)) - 1); va |= ssize << 8; - sllp = ((mmu_psize_defs[apsize].sllp & SLB_VSID_L) >> 6) | - ((mmu_psize_defs[apsize].sllp & SLB_VSID_LP) >> 4); + sllp = get_sllp_encoding(apsize); va |= sllp << 5; asm volatile(ASM_FTR_IFCLR("tlbie %0,0", PPC_TLBIE(%1,%0), %2) : : "r" (va), "r"(0), "i" (CPU_FTR_ARCH_206) @@ -113,15 +113,15 @@ static inline void __tlbiel(unsigned long vpn, int psize, int apsize, int ssize) * Older versions of the architecture (2.02 and earler) require the * masking of the top 16 bits. */ - va &= ~(0xffffULL << 48); + if (mmu_has_feature(MMU_FTR_TLBIE_CROP_VA)) + va &= ~(0xffffULL << 48); switch (psize) { case MMU_PAGE_4K: /* clear out bits after(52) [0....52.....63] */ va &= ~((1ul << (64 - 52)) - 1); va |= ssize << 8; - sllp = ((mmu_psize_defs[apsize].sllp & SLB_VSID_L) >> 6) | - ((mmu_psize_defs[apsize].sllp & SLB_VSID_LP) >> 4); + sllp = get_sllp_encoding(apsize); va |= sllp << 5; asm volatile(".long 0x7c000224 | (%0 << 11) | (0 << 21)" : : "r"(va) : "memory"); @@ -605,7 +605,7 @@ static void hpte_decode(struct hash_pte *hpte, unsigned long slot, * crashdump and all bets are off anyway. * * TODO: add batching support when enabled. remember, no dynamic memory here, - * athough there is the control page available... + * although there is the control page available... */ static void native_hpte_clear(void) { @@ -723,23 +723,29 @@ static void native_flush_hash_range(unsigned long number, int local) local_irq_restore(flags); } -static int native_update_partition_table(u64 patb1) +static int native_register_proc_table(unsigned long base, unsigned long page_size, + unsigned long table_size) { + unsigned long patb1 = base << 25; /* VSID */ + + patb1 |= (page_size << 5); /* sllp */ + patb1 |= table_size; + partition_tb->patb1 = cpu_to_be64(patb1); return 0; } void __init hpte_init_native(void) { - ppc_md.hpte_invalidate = native_hpte_invalidate; - ppc_md.hpte_updatepp = native_hpte_updatepp; - ppc_md.hpte_updateboltedpp = native_hpte_updateboltedpp; - ppc_md.hpte_insert = native_hpte_insert; - ppc_md.hpte_remove = native_hpte_remove; - ppc_md.hpte_clear_all = native_hpte_clear; - ppc_md.flush_hash_range = native_flush_hash_range; - ppc_md.hugepage_invalidate = native_hugepage_invalidate; + mmu_hash_ops.hpte_invalidate = native_hpte_invalidate; + mmu_hash_ops.hpte_updatepp = native_hpte_updatepp; + mmu_hash_ops.hpte_updateboltedpp = native_hpte_updateboltedpp; + mmu_hash_ops.hpte_insert = native_hpte_insert; + mmu_hash_ops.hpte_remove = native_hpte_remove; + mmu_hash_ops.hpte_clear_all = native_hpte_clear; + mmu_hash_ops.flush_hash_range = native_flush_hash_range; + mmu_hash_ops.hugepage_invalidate = native_hugepage_invalidate; if (cpu_has_feature(CPU_FTR_ARCH_300)) - ppc_md.update_partition_table = native_update_partition_table; + register_process_table = native_register_proc_table; } diff --git a/arch/powerpc/mm/hash_utils_64.c b/arch/powerpc/mm/hash_utils_64.c index 2971ea18c..0821556e1 100644 --- a/arch/powerpc/mm/hash_utils_64.c +++ b/arch/powerpc/mm/hash_utils_64.c @@ -34,6 +34,7 @@ #include #include #include +#include #include #include @@ -58,6 +59,7 @@ #include #include #include +#include #ifdef DEBUG #define DBG(fmt...) udbg_printf(fmt) @@ -87,10 +89,6 @@ * */ -#ifdef CONFIG_U3_DART -extern unsigned long dart_tablebase; -#endif /* CONFIG_U3_DART */ - static unsigned long _SDR1; struct mmu_psize_def mmu_psize_defs[MMU_PAGE_COUNT]; EXPORT_SYMBOL_GPL(mmu_psize_defs); @@ -120,6 +118,8 @@ static u8 *linear_map_hash_slots; static unsigned long linear_map_hash_count; static DEFINE_SPINLOCK(linear_map_hash_lock); #endif /* CONFIG_DEBUG_PAGEALLOC */ +struct mmu_hash_ops mmu_hash_ops; +EXPORT_SYMBOL(mmu_hash_ops); /* There are definitions of page sizes arrays to be used when none * is provided by the firmware. @@ -278,9 +278,10 @@ int htab_bolt_mapping(unsigned long vstart, unsigned long vend, hash = hpt_hash(vpn, shift, ssize); hpteg = ((hash & htab_hash_mask) * HPTES_PER_GROUP); - BUG_ON(!ppc_md.hpte_insert); - ret = ppc_md.hpte_insert(hpteg, vpn, paddr, tprot, - HPTE_V_BOLTED, psize, psize, ssize); + BUG_ON(!mmu_hash_ops.hpte_insert); + ret = mmu_hash_ops.hpte_insert(hpteg, vpn, paddr, tprot, + HPTE_V_BOLTED, psize, psize, + ssize); if (ret < 0) break; @@ -305,11 +306,11 @@ int htab_remove_mapping(unsigned long vstart, unsigned long vend, shift = mmu_psize_defs[psize].shift; step = 1 << shift; - if (!ppc_md.hpte_removebolted) + if (!mmu_hash_ops.hpte_removebolted) return -ENODEV; for (vaddr = vstart; vaddr < vend; vaddr += step) { - rc = ppc_md.hpte_removebolted(vaddr, psize, ssize); + rc = mmu_hash_ops.hpte_removebolted(vaddr, psize, ssize); if (rc == -ENOENT) { ret = -ENOENT; continue; @@ -321,6 +322,15 @@ int htab_remove_mapping(unsigned long vstart, unsigned long vend, return ret; } +static bool disable_1tb_segments = false; + +static int __init parse_disable_1tb_segments(char *p) +{ + disable_1tb_segments = true; + return 0; +} +early_param("disable_1tb_segments", parse_disable_1tb_segments); + static int __init htab_dt_scan_seg_sizes(unsigned long node, const char *uname, int depth, void *data) @@ -339,6 +349,12 @@ static int __init htab_dt_scan_seg_sizes(unsigned long node, for (; size >= 4; size -= 4, ++prop) { if (be32_to_cpu(prop[0]) == 40) { DBG("1T segment support detected\n"); + + if (disable_1tb_segments) { + DBG("1T segments disabled by command line\n"); + break; + } + cur_cpu_spec->mmu_features |= MMU_FTR_1T_SEGMENT; return 1; } @@ -347,11 +363,6 @@ static int __init htab_dt_scan_seg_sizes(unsigned long node, return 0; } -static void __init htab_init_seg_sizes(void) -{ - of_scan_flat_dt(htab_dt_scan_seg_sizes, NULL); -} - static int __init get_idx_from_shift(unsigned int shift) { int idx = -1; @@ -514,7 +525,8 @@ static bool might_have_hea(void) * we will never see an HEA ethernet device. */ #ifdef CONFIG_IBMEBUS - return !cpu_has_feature(CPU_FTR_ARCH_207S); + return !cpu_has_feature(CPU_FTR_ARCH_207S) && + !firmware_has_feature(FW_FEATURE_SPLPAR); #else return false; #endif @@ -522,7 +534,7 @@ static bool might_have_hea(void) #endif /* #ifdef CONFIG_PPC_64K_PAGES */ -static void __init htab_init_page_sizes(void) +static void __init htab_scan_page_sizes(void) { int rc; @@ -537,17 +549,23 @@ static void __init htab_init_page_sizes(void) * Try to find the available page sizes in the device-tree */ rc = of_scan_flat_dt(htab_dt_scan_page_sizes, NULL); - if (rc != 0) /* Found */ - goto found; - - /* - * Not in the device-tree, let's fallback on known size - * list for 16M capable GP & GR - */ - if (mmu_has_feature(MMU_FTR_16M_PAGE)) + if (rc == 0 && early_mmu_has_feature(MMU_FTR_16M_PAGE)) { + /* + * Nothing in the device-tree, but the CPU supports 16M pages, + * so let's fallback on a known size list for 16M capable CPUs. + */ memcpy(mmu_psize_defs, mmu_psize_defaults_gp, sizeof(mmu_psize_defaults_gp)); -found: + } + +#ifdef CONFIG_HUGETLB_PAGE + /* Reserve 16G huge page memory sections for huge pages */ + of_scan_flat_dt(htab_dt_scan_hugepage_blocks, NULL); +#endif /* CONFIG_HUGETLB_PAGE */ +} + +static void __init htab_init_page_sizes(void) +{ if (!debug_pagealloc_enabled()) { /* * Pick a size for the linear mapping. Currently, we only @@ -580,7 +598,7 @@ found: * would stop us accessing the HEA ethernet. So if we * have the chance of ever seeing one, stay at 4k. */ - if (!might_have_hea() || !machine_is(pseries)) + if (!might_have_hea()) mmu_io_psize = MMU_PAGE_64K; } else mmu_ci_restrictions = 1; @@ -613,11 +631,6 @@ found: ,mmu_psize_defs[mmu_vmemmap_psize].shift #endif ); - -#ifdef CONFIG_HUGETLB_PAGE - /* Reserve 16G huge page memory sections for huge pages */ - of_scan_flat_dt(htab_dt_scan_hugepage_blocks, NULL); -#endif /* CONFIG_HUGETLB_PAGE */ } static int __init htab_dt_scan_pftsize(unsigned long node, @@ -699,10 +712,9 @@ int remove_section_mapping(unsigned long start, unsigned long end) #endif /* CONFIG_MEMORY_HOTPLUG */ static void __init hash_init_partition_table(phys_addr_t hash_table, - unsigned long pteg_count) + unsigned long htab_size) { unsigned long ps_field; - unsigned long htab_size; unsigned long patb_size = 1UL << PATB_SIZE_SHIFT; /* @@ -710,7 +722,7 @@ static void __init hash_init_partition_table(phys_addr_t hash_table, * We can ignore that for lpid 0 */ ps_field = 0; - htab_size = __ilog2(pteg_count) - 11; + htab_size = __ilog2(htab_size) - 18; BUILD_BUG_ON_MSG((PATB_SIZE_SHIFT > 24), "Partition table size too large."); partition_tb = __va(memblock_alloc_base(patb_size, patb_size, @@ -724,7 +736,7 @@ static void __init hash_init_partition_table(phys_addr_t hash_table, * For now UPRT is 0 for us. */ partition_tb->patb1 = 0; - DBG("Partition table %p\n", partition_tb); + pr_info("Partition table %p\n", partition_tb); /* * update partition table control register, * 64 K size. @@ -738,17 +750,11 @@ static void __init htab_initialize(void) unsigned long table; unsigned long pteg_count; unsigned long prot; - unsigned long base = 0, size = 0, limit; + unsigned long base = 0, size = 0; struct memblock_region *reg; DBG(" -> htab_initialize()\n"); - /* Initialize segment sizes */ - htab_init_seg_sizes(); - - /* Initialize page sizes */ - htab_init_page_sizes(); - if (mmu_has_feature(MMU_FTR_1T_SEGMENT)) { mmu_kernel_ssize = MMU_SEGSIZE_1T; mmu_highuser_ssize = MMU_SEGSIZE_1T; @@ -764,7 +770,8 @@ static void __init htab_initialize(void) htab_hash_mask = pteg_count - 1; - if (firmware_has_feature(FW_FEATURE_LPAR)) { + if (firmware_has_feature(FW_FEATURE_LPAR) || + firmware_has_feature(FW_FEATURE_PS3_LV1)) { /* Using a hypervisor which owns the htab */ htab_address = NULL; _SDR1 = 0; @@ -775,20 +782,26 @@ static void __init htab_initialize(void) * Clear the htab if firmware assisted dump is active so * that we dont end up using old mappings. */ - if (is_fadump_active() && ppc_md.hpte_clear_all) - ppc_md.hpte_clear_all(); + if (is_fadump_active() && mmu_hash_ops.hpte_clear_all) + mmu_hash_ops.hpte_clear_all(); #endif } else { - /* Find storage for the HPT. Must be contiguous in - * the absolute address space. On cell we want it to be - * in the first 2 Gig so we can use it for IOMMU hacks. + unsigned long limit = MEMBLOCK_ALLOC_ANYWHERE; + +#ifdef CONFIG_PPC_CELL + /* + * Cell may require the hash table down low when using the + * Axon IOMMU in order to fit the dynamic region over it, see + * comments in cell/iommu.c */ - if (machine_is(cell)) + if (fdt_subnode_offset(initial_boot_params, 0, "axon") > 0) { limit = 0x80000000; - else - limit = MEMBLOCK_ALLOC_ANYWHERE; + pr_info("Hash table forced below 2G for Axon IOMMU\n"); + } +#endif /* CONFIG_PPC_CELL */ - table = memblock_alloc_base(htab_size_bytes, htab_size_bytes, limit); + table = memblock_alloc_base(htab_size_bytes, htab_size_bytes, + limit); DBG("Hash table allocated at %lx, size: %lx\n", table, htab_size_bytes); @@ -796,7 +809,7 @@ static void __init htab_initialize(void) htab_address = __va(table); /* htab absolute addr + encoded htabsize */ - _SDR1 = table + __ilog2(pteg_count) - 11; + _SDR1 = table + __ilog2(htab_size_bytes) - 18; /* Initialize the HPT with no entries */ memset((void *)table, 0, htab_size_bytes); @@ -805,7 +818,7 @@ static void __init htab_initialize(void) /* Set SDR1 */ mtspr(SPRN_SDR1, _SDR1); else - hash_init_partition_table(table, pteg_count); + hash_init_partition_table(table, htab_size_bytes); } prot = pgprot_val(PAGE_KERNEL); @@ -832,34 +845,6 @@ static void __init htab_initialize(void) DBG("creating mapping for region: %lx..%lx (prot: %lx)\n", base, size, prot); -#ifdef CONFIG_U3_DART - /* Do not map the DART space. Fortunately, it will be aligned - * in such a way that it will not cross two memblock regions and - * will fit within a single 16Mb page. - * The DART space is assumed to be a full 16Mb region even if - * we only use 2Mb of that space. We will use more of it later - * for AGP GART. We have to use a full 16Mb large page. - */ - DBG("DART base: %lx\n", dart_tablebase); - - if (dart_tablebase != 0 && dart_tablebase >= base - && dart_tablebase < (base + size)) { - unsigned long dart_table_end = dart_tablebase + 16 * MB; - if (base != dart_tablebase) - BUG_ON(htab_bolt_mapping(base, dart_tablebase, - __pa(base), prot, - mmu_linear_psize, - mmu_kernel_ssize)); - if ((base + size) > dart_table_end) - BUG_ON(htab_bolt_mapping(dart_tablebase+16*MB, - base + size, - __pa(dart_table_end), - prot, - mmu_linear_psize, - mmu_kernel_ssize)); - continue; - } -#endif /* CONFIG_U3_DART */ BUG_ON(htab_bolt_mapping(base, base + size, __pa(base), prot, mmu_linear_psize, mmu_kernel_ssize)); } @@ -890,8 +875,19 @@ static void __init htab_initialize(void) #undef KB #undef MB +void __init hash__early_init_devtree(void) +{ + /* Initialize segment sizes */ + of_scan_flat_dt(htab_dt_scan_seg_sizes, NULL); + + /* Initialize page sizes */ + htab_scan_page_sizes(); +} + void __init hash__early_init_mmu(void) { + htab_init_page_sizes(); + /* * initialize page table size */ @@ -926,12 +922,24 @@ void __init hash__early_init_mmu(void) pci_io_base = ISA_IO_BASE; #endif + /* Select appropriate backend */ + if (firmware_has_feature(FW_FEATURE_PS3_LV1)) + ps3_early_mm_init(); + else if (firmware_has_feature(FW_FEATURE_LPAR)) + hpte_init_pseries(); + else if (IS_ENABLED(CONFIG_PPC_NATIVE)) + hpte_init_native(); + + if (!mmu_hash_ops.hpte_insert) + panic("hash__early_init_mmu: No MMU hash ops defined!\n"); + /* Initialize the MMU Hash table and create the linear mapping * of memory. Has to be done before SLB initialization as this is * currently where the page size encoding is obtained. */ htab_initialize(); + pr_info("Initializing hash mmu with SLB\n"); /* Initialize SLB management */ slb_initialize(); } @@ -1474,7 +1482,8 @@ void flush_hash_page(unsigned long vpn, real_pte_t pte, int psize, int ssize, * We use same base page size and actual psize, because we don't * use these functions for hugepage */ - ppc_md.hpte_invalidate(slot, vpn, psize, psize, ssize, local); + mmu_hash_ops.hpte_invalidate(slot, vpn, psize, psize, + ssize, local); } pte_iterate_hashed_end(); #ifdef CONFIG_PPC_TRANSACTIONAL_MEM @@ -1515,9 +1524,9 @@ void flush_hash_hugepage(unsigned long vsid, unsigned long addr, if (!hpte_slot_array) return; - if (ppc_md.hugepage_invalidate) { - ppc_md.hugepage_invalidate(vsid, s_addr, hpte_slot_array, - psize, ssize, local); + if (mmu_hash_ops.hugepage_invalidate) { + mmu_hash_ops.hugepage_invalidate(vsid, s_addr, hpte_slot_array, + psize, ssize, local); goto tm_abort; } /* @@ -1544,8 +1553,8 @@ void flush_hash_hugepage(unsigned long vsid, unsigned long addr, slot = (hash & htab_hash_mask) * HPTES_PER_GROUP; slot += hidx & _PTEIDX_GROUP_IX; - ppc_md.hpte_invalidate(slot, vpn, psize, - MMU_PAGE_16M, ssize, local); + mmu_hash_ops.hpte_invalidate(slot, vpn, psize, + MMU_PAGE_16M, ssize, local); } tm_abort: #ifdef CONFIG_PPC_TRANSACTIONAL_MEM @@ -1569,8 +1578,8 @@ tm_abort: void flush_hash_range(unsigned long number, int local) { - if (ppc_md.flush_hash_range) - ppc_md.flush_hash_range(number, local); + if (mmu_hash_ops.flush_hash_range) + mmu_hash_ops.flush_hash_range(number, local); else { int i; struct ppc64_tlb_batch *batch = @@ -1615,22 +1624,22 @@ repeat: HPTES_PER_GROUP) & ~0x7UL; /* Insert into the hash table, primary slot */ - slot = ppc_md.hpte_insert(hpte_group, vpn, pa, rflags, vflags, - psize, psize, ssize); + slot = mmu_hash_ops.hpte_insert(hpte_group, vpn, pa, rflags, vflags, + psize, psize, ssize); /* Primary is full, try the secondary */ if (unlikely(slot == -1)) { hpte_group = ((~hash & htab_hash_mask) * HPTES_PER_GROUP) & ~0x7UL; - slot = ppc_md.hpte_insert(hpte_group, vpn, pa, rflags, - vflags | HPTE_V_SECONDARY, - psize, psize, ssize); + slot = mmu_hash_ops.hpte_insert(hpte_group, vpn, pa, rflags, + vflags | HPTE_V_SECONDARY, + psize, psize, ssize); if (slot == -1) { if (mftb() & 0x1) hpte_group = ((hash & htab_hash_mask) * HPTES_PER_GROUP)&~0x7UL; - ppc_md.hpte_remove(hpte_group); + mmu_hash_ops.hpte_remove(hpte_group); goto repeat; } } @@ -1680,8 +1689,9 @@ static void kernel_unmap_linear_page(unsigned long vaddr, unsigned long lmi) hash = ~hash; slot = (hash & htab_hash_mask) * HPTES_PER_GROUP; slot += hidx & _PTEIDX_GROUP_IX; - ppc_md.hpte_invalidate(slot, vpn, mmu_linear_psize, mmu_linear_psize, - mmu_kernel_ssize, 0); + mmu_hash_ops.hpte_invalidate(slot, vpn, mmu_linear_psize, + mmu_linear_psize, + mmu_kernel_ssize, 0); } void __kernel_map_pages(struct page *page, int numpages, int enable) diff --git a/arch/powerpc/mm/hugepage-hash64.c b/arch/powerpc/mm/hugepage-hash64.c index ba3fc2294..f20d16f84 100644 --- a/arch/powerpc/mm/hugepage-hash64.c +++ b/arch/powerpc/mm/hugepage-hash64.c @@ -103,8 +103,8 @@ int __hash_page_thp(unsigned long ea, unsigned long access, unsigned long vsid, slot = (hash & htab_hash_mask) * HPTES_PER_GROUP; slot += hidx & _PTEIDX_GROUP_IX; - ret = ppc_md.hpte_updatepp(slot, rflags, vpn, - psize, lpsize, ssize, flags); + ret = mmu_hash_ops.hpte_updatepp(slot, rflags, vpn, + psize, lpsize, ssize, flags); /* * We failed to update, try to insert a new entry. */ @@ -131,23 +131,24 @@ repeat: hpte_group = ((hash & htab_hash_mask) * HPTES_PER_GROUP) & ~0x7UL; /* Insert into the hash table, primary slot */ - slot = ppc_md.hpte_insert(hpte_group, vpn, pa, rflags, 0, - psize, lpsize, ssize); + slot = mmu_hash_ops.hpte_insert(hpte_group, vpn, pa, rflags, 0, + psize, lpsize, ssize); /* * Primary is full, try the secondary */ if (unlikely(slot == -1)) { hpte_group = ((~hash & htab_hash_mask) * HPTES_PER_GROUP) & ~0x7UL; - slot = ppc_md.hpte_insert(hpte_group, vpn, pa, - rflags, HPTE_V_SECONDARY, - psize, lpsize, ssize); + slot = mmu_hash_ops.hpte_insert(hpte_group, vpn, pa, + rflags, + HPTE_V_SECONDARY, + psize, lpsize, ssize); if (slot == -1) { if (mftb() & 0x1) hpte_group = ((hash & htab_hash_mask) * HPTES_PER_GROUP) & ~0x7UL; - ppc_md.hpte_remove(hpte_group); + mmu_hash_ops.hpte_remove(hpte_group); goto repeat; } } diff --git a/arch/powerpc/mm/hugetlbpage-hash64.c b/arch/powerpc/mm/hugetlbpage-hash64.c index 3058560b6..d5026f380 100644 --- a/arch/powerpc/mm/hugetlbpage-hash64.c +++ b/arch/powerpc/mm/hugetlbpage-hash64.c @@ -79,8 +79,8 @@ int __hash_page_huge(unsigned long ea, unsigned long access, unsigned long vsid, slot = (hash & htab_hash_mask) * HPTES_PER_GROUP; slot += (old_pte & H_PAGE_F_GIX) >> H_PAGE_F_GIX_SHIFT; - if (ppc_md.hpte_updatepp(slot, rflags, vpn, mmu_psize, - mmu_psize, ssize, flags) == -1) + if (mmu_hash_ops.hpte_updatepp(slot, rflags, vpn, mmu_psize, + mmu_psize, ssize, flags) == -1) old_pte &= ~_PAGE_HPTEFLAGS; } diff --git a/arch/powerpc/mm/hugetlbpage-radix.c b/arch/powerpc/mm/hugetlbpage-radix.c index 1e11559e1..35254a678 100644 --- a/arch/powerpc/mm/hugetlbpage-radix.c +++ b/arch/powerpc/mm/hugetlbpage-radix.c @@ -5,39 +5,34 @@ #include #include #include +#include void radix__flush_hugetlb_page(struct vm_area_struct *vma, unsigned long vmaddr) { - unsigned long ap, shift; + int psize; struct hstate *hstate = hstate_file(vma->vm_file); - shift = huge_page_shift(hstate); - if (shift == mmu_psize_defs[MMU_PAGE_2M].shift) - ap = mmu_get_ap(MMU_PAGE_2M); - else if (shift == mmu_psize_defs[MMU_PAGE_1G].shift) - ap = mmu_get_ap(MMU_PAGE_1G); - else { - WARN(1, "Wrong huge page shift\n"); - return ; - } - radix___flush_tlb_page(vma->vm_mm, vmaddr, ap, 0); + psize = hstate_get_psize(hstate); + radix__flush_tlb_page_psize(vma->vm_mm, vmaddr, psize); } void radix__local_flush_hugetlb_page(struct vm_area_struct *vma, unsigned long vmaddr) { - unsigned long ap, shift; + int psize; struct hstate *hstate = hstate_file(vma->vm_file); - shift = huge_page_shift(hstate); - if (shift == mmu_psize_defs[MMU_PAGE_2M].shift) - ap = mmu_get_ap(MMU_PAGE_2M); - else if (shift == mmu_psize_defs[MMU_PAGE_1G].shift) - ap = mmu_get_ap(MMU_PAGE_1G); - else { - WARN(1, "Wrong huge page shift\n"); - return ; - } - radix___local_flush_tlb_page(vma->vm_mm, vmaddr, ap, 0); + psize = hstate_get_psize(hstate); + radix__local_flush_tlb_page_psize(vma->vm_mm, vmaddr, psize); +} + +void radix__flush_hugetlb_tlb_range(struct vm_area_struct *vma, unsigned long start, + unsigned long end) +{ + int psize; + struct hstate *hstate = hstate_file(vma->vm_file); + + psize = hstate_get_psize(hstate); + radix__flush_tlb_range_psize(vma->vm_mm, start, end, psize); } /* diff --git a/arch/powerpc/mm/hugetlbpage.c b/arch/powerpc/mm/hugetlbpage.c index 119d18611..7372ee13e 100644 --- a/arch/powerpc/mm/hugetlbpage.c +++ b/arch/powerpc/mm/hugetlbpage.c @@ -81,6 +81,13 @@ static int __hugepte_alloc(struct mm_struct *mm, hugepd_t *hpdp, if (! new) return -ENOMEM; + /* + * Make sure other cpus find the hugepd set only after a + * properly initialized page table is visible to them. + * For more details look for comment in __pte_alloc(). + */ + smp_wmb(); + spin_lock(&mm->page_table_lock); #ifdef CONFIG_PPC_FSL_BOOK3E /* diff --git a/arch/powerpc/mm/init_32.c b/arch/powerpc/mm/init_32.c index c899fe340..448685fbf 100644 --- a/arch/powerpc/mm/init_32.c +++ b/arch/powerpc/mm/init_32.c @@ -64,7 +64,7 @@ EXPORT_SYMBOL(memstart_addr); phys_addr_t kernstart_addr; EXPORT_SYMBOL(kernstart_addr); -#ifdef CONFIG_RELOCATABLE_PPC32 +#ifdef CONFIG_RELOCATABLE /* Used in __va()/__pa() */ long long virt_phys_offset; EXPORT_SYMBOL(virt_phys_offset); @@ -80,9 +80,6 @@ EXPORT_SYMBOL(agp_special_page); void MMU_init(void); -/* XXX should be in current.h -- paulus */ -extern struct task_struct *current_set[NR_CPUS]; - /* * this tells the system to map all of ram with the segregs * (i.e. page tables) instead of the bats. diff --git a/arch/powerpc/mm/init_64.c b/arch/powerpc/mm/init_64.c index 33709bdb0..16ada1eb7 100644 --- a/arch/powerpc/mm/init_64.c +++ b/arch/powerpc/mm/init_64.c @@ -411,3 +411,25 @@ struct page *realmode_pfn_to_page(unsigned long pfn) EXPORT_SYMBOL_GPL(realmode_pfn_to_page); #endif /* CONFIG_SPARSEMEM_VMEMMAP/CONFIG_FLATMEM */ + +#ifdef CONFIG_PPC_STD_MMU_64 +static bool disable_radix; +static int __init parse_disable_radix(char *p) +{ + disable_radix = true; + return 0; +} +early_param("disable_radix", parse_disable_radix); + +void __init mmu_early_init_devtree(void) +{ + /* Disable radix mode based on kernel command line. */ + if (disable_radix) + cur_cpu_spec->mmu_features &= ~MMU_FTR_TYPE_RADIX; + + if (early_radix_enabled()) + radix__early_init_devtree(); + else + hash__early_init_devtree(); +} +#endif /* CONFIG_PPC_STD_MMU_64 */ diff --git a/arch/powerpc/mm/mem.c b/arch/powerpc/mm/mem.c index 2fd57fa48..5f844337d 100644 --- a/arch/powerpc/mm/mem.c +++ b/arch/powerpc/mm/mem.c @@ -116,6 +116,16 @@ int memory_add_physaddr_to_nid(u64 start) } #endif +int __weak create_section_mapping(unsigned long start, unsigned long end) +{ + return -ENODEV; +} + +int __weak remove_section_mapping(unsigned long start, unsigned long end) +{ + return -ENODEV; +} + int arch_add_memory(int nid, u64 start, u64 size, bool for_device) { struct pglist_data *pgdata; @@ -239,8 +249,14 @@ static int __init mark_nonram_nosave(void) static bool zone_limits_final; +/* + * The memory zones past TOP_ZONE are managed by generic mm code. + * These should be set to zero since that's what every other + * architecture does. + */ static unsigned long max_zone_pfns[MAX_NR_ZONES] = { - [0 ... MAX_NR_ZONES - 1] = ~0UL + [0 ... TOP_ZONE ] = ~0UL, + [TOP_ZONE + 1 ... MAX_NR_ZONES - 1] = 0 }; /* diff --git a/arch/powerpc/mm/mmu_context_book3s64.c b/arch/powerpc/mm/mmu_context_book3s64.c index 196222227..b114f8b93 100644 --- a/arch/powerpc/mm/mmu_context_book3s64.c +++ b/arch/powerpc/mm/mmu_context_book3s64.c @@ -181,7 +181,10 @@ void destroy_context(struct mm_struct *mm) #ifdef CONFIG_PPC_RADIX_MMU void radix__switch_mmu_context(struct mm_struct *prev, struct mm_struct *next) { - mtspr(SPRN_PID, next->context.id); asm volatile("isync": : :"memory"); + mtspr(SPRN_PID, next->context.id); + asm volatile("isync \n" + PPC_SLBIA(0x7) + : : :"memory"); } #endif diff --git a/arch/powerpc/mm/mmu_decl.h b/arch/powerpc/mm/mmu_decl.h index 6af65327c..f988db655 100644 --- a/arch/powerpc/mm/mmu_decl.h +++ b/arch/powerpc/mm/mmu_decl.h @@ -154,9 +154,10 @@ struct tlbcam { }; #endif -#if defined(CONFIG_6xx) || defined(CONFIG_FSL_BOOKE) +#if defined(CONFIG_6xx) || defined(CONFIG_FSL_BOOKE) || defined(CONFIG_PPC_8xx) /* 6xx have BATS */ /* FSL_BOOKE have TLBCAM */ +/* 8xx have LTLB */ phys_addr_t v_block_mapped(unsigned long va); unsigned long p_block_mapped(phys_addr_t pa); #else diff --git a/arch/powerpc/mm/numa.c b/arch/powerpc/mm/numa.c index 669a15e7f..75b9cd615 100644 --- a/arch/powerpc/mm/numa.c +++ b/arch/powerpc/mm/numa.c @@ -581,30 +581,22 @@ static void verify_cpu_node_mapping(int cpu, int node) } } -static int cpu_numa_callback(struct notifier_block *nfb, unsigned long action, - void *hcpu) +/* Must run before sched domains notifier. */ +static int ppc_numa_cpu_prepare(unsigned int cpu) { - unsigned long lcpu = (unsigned long)hcpu; - int ret = NOTIFY_DONE, nid; + int nid; - switch (action) { - case CPU_UP_PREPARE: - case CPU_UP_PREPARE_FROZEN: - nid = numa_setup_cpu(lcpu); - verify_cpu_node_mapping((int)lcpu, nid); - ret = NOTIFY_OK; - break; + nid = numa_setup_cpu(cpu); + verify_cpu_node_mapping(cpu, nid); + return 0; +} + +static int ppc_numa_cpu_dead(unsigned int cpu) +{ #ifdef CONFIG_HOTPLUG_CPU - case CPU_DEAD: - case CPU_DEAD_FROZEN: - case CPU_UP_CANCELED: - case CPU_UP_CANCELED_FROZEN: - unmap_cpu_from_node(lcpu); - ret = NOTIFY_OK; - break; + unmap_cpu_from_node(cpu); #endif - } - return ret; + return 0; } /* @@ -913,11 +905,6 @@ static void __init dump_numa_memory_topology(void) } } -static struct notifier_block ppc64_numa_nb = { - .notifier_call = cpu_numa_callback, - .priority = 1 /* Must run before sched domains notifier. */ -}; - /* Initialize NODE_DATA for a node on the local memory */ static void __init setup_node_data(int nid, u64 start_pfn, u64 end_pfn) { @@ -985,15 +972,18 @@ void __init initmem_init(void) setup_node_to_cpumask_map(); reset_numa_cpu_lookup_table(); - register_cpu_notifier(&ppc64_numa_nb); + /* * We need the numa_cpu_lookup_table to be accurate for all CPUs, * even before we online them, so that we can use cpu_to_{node,mem} * early in boot, cf. smp_prepare_cpus(). + * _nocalls() + manual invocation is used because cpuhp is not yet + * initialized for the boot CPU. */ - for_each_present_cpu(cpu) { - numa_setup_cpu((unsigned long)cpu); - } + cpuhp_setup_state_nocalls(CPUHP_POWER_NUMA_PREPARE, "POWER_NUMA_PREPARE", + ppc_numa_cpu_prepare, ppc_numa_cpu_dead); + for_each_present_cpu(cpu) + numa_setup_cpu(cpu); } static int __init early_numa(char *p) @@ -1163,18 +1153,34 @@ int hot_add_scn_to_nid(unsigned long scn_addr) static u64 hot_add_drconf_memory_max(void) { - struct device_node *memory = NULL; - unsigned int drconf_cell_cnt = 0; - u64 lmb_size = 0; + struct device_node *memory = NULL; + struct device_node *dn = NULL; + unsigned int drconf_cell_cnt = 0; + u64 lmb_size = 0; const __be32 *dm = NULL; + const __be64 *lrdr = NULL; + struct of_drconf_cell drmem; + + dn = of_find_node_by_path("/rtas"); + if (dn) { + lrdr = of_get_property(dn, "ibm,lrdr-capacity", NULL); + of_node_put(dn); + if (lrdr) + return be64_to_cpup(lrdr); + } - memory = of_find_node_by_path("/ibm,dynamic-reconfiguration-memory"); - if (memory) { - drconf_cell_cnt = of_get_drconf_memory(memory, &dm); - lmb_size = of_get_lmb_size(memory); - of_node_put(memory); - } - return lmb_size * drconf_cell_cnt; + memory = of_find_node_by_path("/ibm,dynamic-reconfiguration-memory"); + if (memory) { + drconf_cell_cnt = of_get_drconf_memory(memory, &dm); + lmb_size = of_get_lmb_size(memory); + + /* Advance to the last cell, each cell has 6 32 bit integers */ + dm += (drconf_cell_cnt - 1) * 6; + read_drconf_cell(&drmem, &dm); + of_node_put(memory); + return drmem.base_addr + lmb_size; + } + return 0; } /* diff --git a/arch/powerpc/mm/pgtable-book3s64.c b/arch/powerpc/mm/pgtable-book3s64.c index 670318766..34079302c 100644 --- a/arch/powerpc/mm/pgtable-book3s64.c +++ b/arch/powerpc/mm/pgtable-book3s64.c @@ -14,6 +14,9 @@ #include "mmu_decl.h" #include +int (*register_process_table)(unsigned long base, unsigned long page_size, + unsigned long tbl_size); + #ifdef CONFIG_TRANSPARENT_HUGEPAGE /* * This is called when relaxing access to a hugepage. It's also called in the page @@ -33,7 +36,7 @@ int pmdp_set_access_flags(struct vm_area_struct *vma, unsigned long address, changed = !pmd_same(*(pmdp), entry); if (changed) { __ptep_set_access_flags(pmdp_ptep(pmdp), pmd_pte(entry)); - flush_tlb_range(vma, address, address + HPAGE_PMD_SIZE); + flush_pmd_tlb_range(vma, address, address + HPAGE_PMD_SIZE); } return changed; } @@ -66,7 +69,7 @@ void pmdp_invalidate(struct vm_area_struct *vma, unsigned long address, pmd_t *pmdp) { pmd_hugepage_update(vma->vm_mm, address, pmdp, _PAGE_PRESENT, 0); - flush_tlb_range(vma, address, address + HPAGE_PMD_SIZE); + flush_pmd_tlb_range(vma, address, address + HPAGE_PMD_SIZE); /* * This ensures that generic code that rely on IRQ disabling * to prevent a parallel THP split work as expected. diff --git a/arch/powerpc/mm/pgtable-radix.c b/arch/powerpc/mm/pgtable-radix.c index 7931e1496..af897d91d 100644 --- a/arch/powerpc/mm/pgtable-radix.c +++ b/arch/powerpc/mm/pgtable-radix.c @@ -21,8 +21,11 @@ #include -static int native_update_partition_table(u64 patb1) +static int native_register_process_table(unsigned long base, unsigned long pg_sz, + unsigned long table_size) { + unsigned long patb1 = base | table_size | PATB_GR; + partition_tb->patb1 = cpu_to_be64(patb1); return 0; } @@ -168,7 +171,7 @@ redo: * of process table here. But our linear mapping also enable us to use * physical address here. */ - ppc_md.update_partition_table(__pa(process_tb) | (PRTB_SIZE_SHIFT - 12) | PATB_GR); + register_process_table(__pa(process_tb), 0, PRTB_SIZE_SHIFT - 12); pr_info("Process table %p and radix root for kernel: %p\n", process_tb, init_mm.pgd); } @@ -182,7 +185,8 @@ static void __init radix_init_partition_table(void) partition_tb = early_alloc_pgtable(1UL << PATB_SIZE_SHIFT); partition_tb->patb0 = cpu_to_be64(rts_field | __pa(init_mm.pgd) | RADIX_PGD_INDEX_SIZE | PATB_HR); - printk("Partition table %p\n", partition_tb); + pr_info("Initializing Radix MMU\n"); + pr_info("Partition table %p\n", partition_tb); memblock_set_current_limit(MEMBLOCK_ALLOC_ANYWHERE); /* @@ -194,7 +198,7 @@ static void __init radix_init_partition_table(void) void __init radix_init_native(void) { - ppc_md.update_partition_table = native_update_partition_table; + register_process_table = native_register_process_table; } static int __init get_idx_from_shift(unsigned int shift) @@ -260,7 +264,7 @@ static int __init radix_dt_scan_page_sizes(unsigned long node, return 1; } -static void __init radix_init_page_sizes(void) +void __init radix__early_init_devtree(void) { int rc; @@ -339,10 +343,10 @@ void __init radix__early_init_mmu(void) __pte_frag_nr = H_PTE_FRAG_NR; __pte_frag_size_shift = H_PTE_FRAG_SIZE_SHIFT; - radix_init_page_sizes(); if (!firmware_has_feature(FW_FEATURE_LPAR)) { + radix_init_native(); lpcr = mfspr(SPRN_LPCR); - mtspr(SPRN_LPCR, lpcr | LPCR_UPRT); + mtspr(SPRN_LPCR, lpcr | LPCR_UPRT | LPCR_HR); radix_init_partition_table(); } @@ -357,7 +361,7 @@ void radix__early_init_mmu_secondary(void) */ if (!firmware_has_feature(FW_FEATURE_LPAR)) { lpcr = mfspr(SPRN_LPCR); - mtspr(SPRN_LPCR, lpcr | LPCR_UPRT); + mtspr(SPRN_LPCR, lpcr | LPCR_UPRT | LPCR_HR); mtspr(SPRN_PTCR, __pa(partition_tb) | (PATB_SIZE_SHIFT - 12)); diff --git a/arch/powerpc/mm/pgtable.c b/arch/powerpc/mm/pgtable.c index 88a307504..0b6fb244d 100644 --- a/arch/powerpc/mm/pgtable.c +++ b/arch/powerpc/mm/pgtable.c @@ -225,7 +225,7 @@ int ptep_set_access_flags(struct vm_area_struct *vma, unsigned long address, if (!is_vm_hugetlb_page(vma)) assert_pte_locked(vma->vm_mm, address); __ptep_set_access_flags(ptep, entry); - flush_tlb_page_nohash(vma, address); + flush_tlb_page(vma, address); } return changed; } diff --git a/arch/powerpc/mm/pgtable_32.c b/arch/powerpc/mm/pgtable_32.c index 7f922f557..0ae0572bc 100644 --- a/arch/powerpc/mm/pgtable_32.c +++ b/arch/powerpc/mm/pgtable_32.c @@ -79,7 +79,7 @@ void pgd_free(struct mm_struct *mm, pgd_t *pgd) #endif } -__init_refok pte_t *pte_alloc_one_kernel(struct mm_struct *mm, unsigned long address) +__ref pte_t *pte_alloc_one_kernel(struct mm_struct *mm, unsigned long address) { pte_t *pte; diff --git a/arch/powerpc/mm/tlb-radix.c b/arch/powerpc/mm/tlb-radix.c index ab2f60e81..48df05ef5 100644 --- a/arch/powerpc/mm/tlb-radix.c +++ b/arch/powerpc/mm/tlb-radix.c @@ -12,6 +12,7 @@ #include #include #include +#include #include #include @@ -34,8 +35,7 @@ static inline void __tlbiel_pid(unsigned long pid, int set, r = 1; /* raidx format */ asm volatile("ptesync": : :"memory"); - asm volatile(".long 0x7c000224 | (%0 << 11) | (%1 << 16) |" - "(%2 << 17) | (%3 << 18) | (%4 << 21)" + asm volatile(PPC_TLBIEL(%0, %4, %3, %2, %1) : : "r"(rb), "i"(r), "i"(prs), "i"(ric), "r"(rs) : "memory"); asm volatile("ptesync": : :"memory"); } @@ -63,8 +63,7 @@ static inline void _tlbie_pid(unsigned long pid, unsigned long ric) r = 1; /* raidx format */ asm volatile("ptesync": : :"memory"); - asm volatile(".long 0x7c000264 | (%0 << 11) | (%1 << 16) |" - "(%2 << 17) | (%3 << 18) | (%4 << 21)" + asm volatile(PPC_TLBIE_5(%0, %4, %3, %2, %1) : : "r"(rb), "i"(r), "i"(prs), "i"(ric), "r"(rs) : "memory"); asm volatile("eieio; tlbsync; ptesync": : :"memory"); } @@ -81,8 +80,7 @@ static inline void _tlbiel_va(unsigned long va, unsigned long pid, r = 1; /* raidx format */ asm volatile("ptesync": : :"memory"); - asm volatile(".long 0x7c000224 | (%0 << 11) | (%1 << 16) |" - "(%2 << 17) | (%3 << 18) | (%4 << 21)" + asm volatile(PPC_TLBIEL(%0, %4, %3, %2, %1) : : "r"(rb), "i"(r), "i"(prs), "i"(ric), "r"(rs) : "memory"); asm volatile("ptesync": : :"memory"); } @@ -99,8 +97,7 @@ static inline void _tlbie_va(unsigned long va, unsigned long pid, r = 1; /* raidx format */ asm volatile("ptesync": : :"memory"); - asm volatile(".long 0x7c000264 | (%0 << 11) | (%1 << 16) |" - "(%2 << 17) | (%3 << 18) | (%4 << 21)" + asm volatile(PPC_TLBIE_5(%0, %4, %3, %2, %1) : : "r"(rb), "i"(r), "i"(prs), "i"(ric), "r"(rs) : "memory"); asm volatile("eieio; tlbsync; ptesync": : :"memory"); } @@ -143,10 +140,11 @@ void radix__local_flush_tlb_pwc(struct mmu_gather *tlb, unsigned long addr) } EXPORT_SYMBOL(radix__local_flush_tlb_pwc); -void radix___local_flush_tlb_page(struct mm_struct *mm, unsigned long vmaddr, - unsigned long ap, int nid) +void radix__local_flush_tlb_page_psize(struct mm_struct *mm, unsigned long vmaddr, + int psize) { unsigned long pid; + unsigned long ap = mmu_get_ap(psize); preempt_disable(); pid = mm ? mm->context.id : 0; @@ -162,18 +160,12 @@ void radix__local_flush_tlb_page(struct vm_area_struct *vma, unsigned long vmadd if (vma && is_vm_hugetlb_page(vma)) return __local_flush_hugetlb_page(vma, vmaddr); #endif - radix___local_flush_tlb_page(vma ? vma->vm_mm : NULL, vmaddr, - mmu_get_ap(mmu_virtual_psize), 0); + radix__local_flush_tlb_page_psize(vma ? vma->vm_mm : NULL, vmaddr, + mmu_virtual_psize); } EXPORT_SYMBOL(radix__local_flush_tlb_page); #ifdef CONFIG_SMP -static int mm_is_core_local(struct mm_struct *mm) -{ - return cpumask_subset(mm_cpumask(mm), - topology_sibling_cpumask(smp_processor_id())); -} - void radix__flush_tlb_mm(struct mm_struct *mm) { unsigned long pid; @@ -224,10 +216,11 @@ no_context: } EXPORT_SYMBOL(radix__flush_tlb_pwc); -void radix___flush_tlb_page(struct mm_struct *mm, unsigned long vmaddr, - unsigned long ap, int nid) +void radix__flush_tlb_page_psize(struct mm_struct *mm, unsigned long vmaddr, + int psize) { unsigned long pid; + unsigned long ap = mmu_get_ap(psize); preempt_disable(); pid = mm ? mm->context.id : 0; @@ -253,8 +246,8 @@ void radix__flush_tlb_page(struct vm_area_struct *vma, unsigned long vmaddr) if (vma && is_vm_hugetlb_page(vma)) return flush_hugetlb_page(vma, vmaddr); #endif - radix___flush_tlb_page(vma ? vma->vm_mm : NULL, vmaddr, - mmu_get_ap(mmu_virtual_psize), 0); + radix__flush_tlb_page_psize(vma ? vma->vm_mm : NULL, vmaddr, + mmu_virtual_psize); } EXPORT_SYMBOL(radix__flush_tlb_page); @@ -285,9 +278,125 @@ void radix__flush_tlb_range(struct vm_area_struct *vma, unsigned long start, } EXPORT_SYMBOL(radix__flush_tlb_range); +static int radix_get_mmu_psize(int page_size) +{ + int psize; + + if (page_size == (1UL << mmu_psize_defs[mmu_virtual_psize].shift)) + psize = mmu_virtual_psize; + else if (page_size == (1UL << mmu_psize_defs[MMU_PAGE_2M].shift)) + psize = MMU_PAGE_2M; + else if (page_size == (1UL << mmu_psize_defs[MMU_PAGE_1G].shift)) + psize = MMU_PAGE_1G; + else + return -1; + return psize; +} void radix__tlb_flush(struct mmu_gather *tlb) { + int psize = 0; struct mm_struct *mm = tlb->mm; - radix__flush_tlb_mm(mm); + int page_size = tlb->page_size; + + psize = radix_get_mmu_psize(page_size); + /* + * if page size is not something we understand, do a full mm flush + */ + if (psize != -1 && !tlb->fullmm && !tlb->need_flush_all) + radix__flush_tlb_range_psize(mm, tlb->start, tlb->end, psize); + else + radix__flush_tlb_mm(mm); +} + +#define TLB_FLUSH_ALL -1UL +/* + * Number of pages above which we will do a bcast tlbie. Just a + * number at this point copied from x86 + */ +static unsigned long tlb_single_page_flush_ceiling __read_mostly = 33; + +void radix__flush_tlb_range_psize(struct mm_struct *mm, unsigned long start, + unsigned long end, int psize) +{ + unsigned long pid; + unsigned long addr; + int local = mm_is_core_local(mm); + unsigned long ap = mmu_get_ap(psize); + int lock_tlbie = !mmu_has_feature(MMU_FTR_LOCKLESS_TLBIE); + unsigned long page_size = 1UL << mmu_psize_defs[psize].shift; + + + preempt_disable(); + pid = mm ? mm->context.id : 0; + if (unlikely(pid == MMU_NO_CONTEXT)) + goto err_out; + + if (end == TLB_FLUSH_ALL || + (end - start) > tlb_single_page_flush_ceiling * page_size) { + if (local) + _tlbiel_pid(pid, RIC_FLUSH_TLB); + else + _tlbie_pid(pid, RIC_FLUSH_TLB); + goto err_out; + } + for (addr = start; addr < end; addr += page_size) { + + if (local) + _tlbiel_va(addr, pid, ap, RIC_FLUSH_TLB); + else { + if (lock_tlbie) + raw_spin_lock(&native_tlbie_lock); + _tlbie_va(addr, pid, ap, RIC_FLUSH_TLB); + if (lock_tlbie) + raw_spin_unlock(&native_tlbie_lock); + } + } +err_out: + preempt_enable(); +} + +void radix__flush_tlb_lpid_va(unsigned long lpid, unsigned long gpa, + unsigned long page_size) +{ + unsigned long rb,rs,prs,r; + unsigned long ap; + unsigned long ric = RIC_FLUSH_TLB; + + ap = mmu_get_ap(radix_get_mmu_psize(page_size)); + rb = gpa & ~(PPC_BITMASK(52, 63)); + rb |= ap << PPC_BITLSHIFT(58); + rs = lpid & ((1UL << 32) - 1); + prs = 0; /* process scoped */ + r = 1; /* raidx format */ + + asm volatile("ptesync": : :"memory"); + asm volatile(PPC_TLBIE_5(%0, %4, %3, %2, %1) + : : "r"(rb), "i"(r), "i"(prs), "i"(ric), "r"(rs) : "memory"); + asm volatile("eieio; tlbsync; ptesync": : :"memory"); +} +EXPORT_SYMBOL(radix__flush_tlb_lpid_va); + +void radix__flush_tlb_lpid(unsigned long lpid) +{ + unsigned long rb,rs,prs,r; + unsigned long ric = RIC_FLUSH_ALL; + + rb = 0x2 << PPC_BITLSHIFT(53); /* IS = 2 */ + rs = lpid & ((1UL << 32) - 1); + prs = 0; /* partition scoped */ + r = 1; /* raidx format */ + + asm volatile("ptesync": : :"memory"); + asm volatile(PPC_TLBIE_5(%0, %4, %3, %2, %1) + : : "r"(rb), "i"(r), "i"(prs), "i"(ric), "r"(rs) : "memory"); + asm volatile("eieio; tlbsync; ptesync": : :"memory"); +} +EXPORT_SYMBOL(radix__flush_tlb_lpid); + +void radix__flush_pmd_tlb_range(struct vm_area_struct *vma, + unsigned long start, unsigned long end) +{ + radix__flush_tlb_range_psize(vma->vm_mm, start, end, MMU_PAGE_2M); } +EXPORT_SYMBOL(radix__flush_pmd_tlb_range); diff --git a/arch/powerpc/mm/tlb_hash32.c b/arch/powerpc/mm/tlb_hash32.c index 558e30cce..702d7689d 100644 --- a/arch/powerpc/mm/tlb_hash32.c +++ b/arch/powerpc/mm/tlb_hash32.c @@ -48,17 +48,6 @@ void flush_hash_entry(struct mm_struct *mm, pte_t *ptep, unsigned long addr) } EXPORT_SYMBOL(flush_hash_entry); -/* - * Called by ptep_set_access_flags, must flush on CPUs for which the - * DSI handler can't just "fixup" the TLB on a write fault - */ -void flush_tlb_page_nohash(struct vm_area_struct *vma, unsigned long addr) -{ - if (Hash != 0) - return; - _tlbie(addr); -} - /* * Called at the end of a mmu_gather operation to make sure the * TLB flush is completely done. diff --git a/arch/powerpc/mm/tlb_nohash.c b/arch/powerpc/mm/tlb_nohash.c index f46684885..050badc0e 100644 --- a/arch/powerpc/mm/tlb_nohash.c +++ b/arch/powerpc/mm/tlb_nohash.c @@ -215,12 +215,6 @@ EXPORT_SYMBOL(local_flush_tlb_page); static DEFINE_RAW_SPINLOCK(tlbivax_lock); -static int mm_is_core_local(struct mm_struct *mm) -{ - return cpumask_subset(mm_cpumask(mm), - topology_sibling_cpumask(smp_processor_id())); -} - struct tlb_flush_param { unsigned long addr; unsigned int pid; -- cgit v1.2.3-54-g00ecf