diff options
author | André Fabian Silva Delgado <emulatorman@parabola.nu> | 2015-09-08 01:01:14 -0300 |
---|---|---|
committer | André Fabian Silva Delgado <emulatorman@parabola.nu> | 2015-09-08 01:01:14 -0300 |
commit | e5fd91f1ef340da553f7a79da9540c3db711c937 (patch) | |
tree | b11842027dc6641da63f4bcc524f8678263304a3 /arch/powerpc/mm | |
parent | 2a9b0348e685a63d97486f6749622b61e9e3292f (diff) |
Linux-libre 4.2-gnu
Diffstat (limited to 'arch/powerpc/mm')
-rw-r--r-- | arch/powerpc/mm/Makefile | 1 | ||||
-rw-r--r-- | arch/powerpc/mm/copro_fault.c | 9 | ||||
-rw-r--r-- | arch/powerpc/mm/fault.c | 13 | ||||
-rw-r--r-- | arch/powerpc/mm/hash_native_64.c | 2 | ||||
-rw-r--r-- | arch/powerpc/mm/hash_utils_64.c | 4 | ||||
-rw-r--r-- | arch/powerpc/mm/highmem.c | 4 | ||||
-rw-r--r-- | arch/powerpc/mm/hugetlbpage.c | 11 | ||||
-rw-r--r-- | arch/powerpc/mm/mem.c | 2 | ||||
-rw-r--r-- | arch/powerpc/mm/mmu_context_hash64.c | 6 | ||||
-rw-r--r-- | arch/powerpc/mm/mmu_context_iommu.c | 316 | ||||
-rw-r--r-- | arch/powerpc/mm/pgtable_64.c | 73 | ||||
-rw-r--r-- | arch/powerpc/mm/tlb_low_64e.S | 51 | ||||
-rw-r--r-- | arch/powerpc/mm/tlb_nohash.c | 2 |
13 files changed, 432 insertions, 62 deletions
diff --git a/arch/powerpc/mm/Makefile b/arch/powerpc/mm/Makefile index 9c8770b5f..3eb73a382 100644 --- a/arch/powerpc/mm/Makefile +++ b/arch/powerpc/mm/Makefile @@ -36,3 +36,4 @@ obj-$(CONFIG_PPC_SUBPAGE_PROT) += subpage-prot.o obj-$(CONFIG_NOT_COHERENT_CACHE) += dma-noncoherent.o obj-$(CONFIG_HIGHMEM) += highmem.o obj-$(CONFIG_PPC_COPRO_BASE) += copro_fault.o +obj-$(CONFIG_SPAPR_TCE_IOMMU) += mmu_context_iommu.o diff --git a/arch/powerpc/mm/copro_fault.c b/arch/powerpc/mm/copro_fault.c index f031a47d7..6527882ce 100644 --- a/arch/powerpc/mm/copro_fault.c +++ b/arch/powerpc/mm/copro_fault.c @@ -26,7 +26,7 @@ #include <asm/reg.h> #include <asm/copro.h> #include <asm/spu.h> -#include <misc/cxl.h> +#include <misc/cxl-base.h> /* * This ought to be kept in sync with the powerpc specific do_page_fault @@ -100,7 +100,7 @@ EXPORT_SYMBOL_GPL(copro_handle_mm_fault); int copro_calculate_slb(struct mm_struct *mm, u64 ea, struct copro_slb *slb) { - u64 vsid; + u64 vsid, vsidkey; int psize, ssize; switch (REGION_ID(ea)) { @@ -109,6 +109,7 @@ int copro_calculate_slb(struct mm_struct *mm, u64 ea, struct copro_slb *slb) psize = get_slice_psize(mm, ea); ssize = user_segment_size(ea); vsid = get_vsid(mm->context.id, ea, ssize); + vsidkey = SLB_VSID_USER; break; case VMALLOC_REGION_ID: pr_devel("%s: 0x%llx -- VMALLOC_REGION_ID\n", __func__, ea); @@ -118,19 +119,21 @@ int copro_calculate_slb(struct mm_struct *mm, u64 ea, struct copro_slb *slb) psize = mmu_io_psize; ssize = mmu_kernel_ssize; vsid = get_kernel_vsid(ea, mmu_kernel_ssize); + vsidkey = SLB_VSID_KERNEL; break; case KERNEL_REGION_ID: pr_devel("%s: 0x%llx -- KERNEL_REGION_ID\n", __func__, ea); psize = mmu_linear_psize; ssize = mmu_kernel_ssize; vsid = get_kernel_vsid(ea, mmu_kernel_ssize); + vsidkey = SLB_VSID_KERNEL; break; default: pr_debug("%s: invalid region access at %016llx\n", __func__, ea); return 1; } - vsid = (vsid << slb_vsid_shift(ssize)) | SLB_VSID_USER; + vsid = (vsid << slb_vsid_shift(ssize)) | vsidkey; vsid |= mmu_psize_defs[psize].sllp | ((ssize == MMU_SEGSIZE_1T) ? SLB_VSID_B_1T : 0); diff --git a/arch/powerpc/mm/fault.c b/arch/powerpc/mm/fault.c index b396868d2..a67c6d781 100644 --- a/arch/powerpc/mm/fault.c +++ b/arch/powerpc/mm/fault.c @@ -33,13 +33,13 @@ #include <linux/ratelimit.h> #include <linux/context_tracking.h> #include <linux/hugetlb.h> +#include <linux/uaccess.h> #include <asm/firmware.h> #include <asm/page.h> #include <asm/pgtable.h> #include <asm/mmu.h> #include <asm/mmu_context.h> -#include <asm/uaccess.h> #include <asm/tlbflush.h> #include <asm/siginfo.h> #include <asm/debug.h> @@ -272,15 +272,16 @@ int __kprobes do_page_fault(struct pt_regs *regs, unsigned long address, if (!arch_irq_disabled_regs(regs)) local_irq_enable(); - if (in_atomic() || mm == NULL) { + if (faulthandler_disabled() || mm == NULL) { if (!user_mode(regs)) { rc = SIGSEGV; goto bail; } - /* in_atomic() in user mode is really bad, + /* faulthandler_disabled() in user mode is really bad, as is current->mm == NULL. */ printk(KERN_EMERG "Page fault in user mode with " - "in_atomic() = %d mm = %p\n", in_atomic(), mm); + "faulthandler_disabled() = %d mm = %p\n", + faulthandler_disabled(), mm); printk(KERN_EMERG "NIP = %lx MSR = %lx\n", regs->nip, regs->msr); die("Weird page fault", regs, SIGSEGV); @@ -528,6 +529,10 @@ void bad_page_fault(struct pt_regs *regs, unsigned long address, int sig) printk(KERN_ALERT "Unable to handle kernel paging request for " "instruction fetch\n"); break; + case 0x600: + printk(KERN_ALERT "Unable to handle kernel paging request for " + "unaligned access at address 0x%08lx\n", regs->dar); + break; default: printk(KERN_ALERT "Unable to handle kernel paging request for " "unknown fault\n"); diff --git a/arch/powerpc/mm/hash_native_64.c b/arch/powerpc/mm/hash_native_64.c index 9c4880dde..13befa35d 100644 --- a/arch/powerpc/mm/hash_native_64.c +++ b/arch/powerpc/mm/hash_native_64.c @@ -29,7 +29,7 @@ #include <asm/kexec.h> #include <asm/ppc-opcode.h> -#include <misc/cxl.h> +#include <misc/cxl-base.h> #ifdef DEBUG_LOW #define DBG_LOW(fmt...) udbg_printf(fmt) diff --git a/arch/powerpc/mm/hash_utils_64.c b/arch/powerpc/mm/hash_utils_64.c index fda236f90..5ec987f65 100644 --- a/arch/powerpc/mm/hash_utils_64.c +++ b/arch/powerpc/mm/hash_utils_64.c @@ -57,6 +57,7 @@ #include <asm/fadump.h> #include <asm/firmware.h> #include <asm/tm.h> +#include <asm/trace.h> #ifdef DEBUG #define DBG(fmt...) udbg_printf(fmt) @@ -1004,6 +1005,7 @@ int hash_page_mm(struct mm_struct *mm, unsigned long ea, DBG_LOW("hash_page(ea=%016lx, access=%lx, trap=%lx\n", ea, access, trap); + trace_hash_fault(ea, access, trap); /* Get region & vsid */ switch (REGION_ID(ea)) { @@ -1475,7 +1477,7 @@ static void kernel_map_linear_page(unsigned long vaddr, unsigned long lmi) unsigned long hash; unsigned long vsid = get_kernel_vsid(vaddr, mmu_kernel_ssize); unsigned long vpn = hpt_vpn(vaddr, vsid, mmu_kernel_ssize); - unsigned long mode = htab_convert_pte_flags(PAGE_KERNEL); + unsigned long mode = htab_convert_pte_flags(pgprot_val(PAGE_KERNEL)); long ret; hash = hpt_hash(vpn, PAGE_SHIFT, mmu_kernel_ssize); diff --git a/arch/powerpc/mm/highmem.c b/arch/powerpc/mm/highmem.c index e7450bdbe..e292c8a60 100644 --- a/arch/powerpc/mm/highmem.c +++ b/arch/powerpc/mm/highmem.c @@ -34,7 +34,7 @@ void *kmap_atomic_prot(struct page *page, pgprot_t prot) unsigned long vaddr; int idx, type; - /* even !CONFIG_PREEMPT needs this, for in_atomic in do_page_fault */ + preempt_disable(); pagefault_disable(); if (!PageHighMem(page)) return page_address(page); @@ -59,6 +59,7 @@ void __kunmap_atomic(void *kvaddr) if (vaddr < __fix_to_virt(FIX_KMAP_END)) { pagefault_enable(); + preempt_enable(); return; } @@ -82,5 +83,6 @@ void __kunmap_atomic(void *kvaddr) kmap_atomic_idx_pop(); pagefault_enable(); + preempt_enable(); } EXPORT_SYMBOL(__kunmap_atomic); diff --git a/arch/powerpc/mm/hugetlbpage.c b/arch/powerpc/mm/hugetlbpage.c index 3385e3d05..bb0bd7025 100644 --- a/arch/powerpc/mm/hugetlbpage.c +++ b/arch/powerpc/mm/hugetlbpage.c @@ -336,7 +336,7 @@ int alloc_bootmem_huge_page(struct hstate *hstate) unsigned long gpage_npages[MMU_PAGE_COUNT]; static int __init do_gpage_early_setup(char *param, char *val, - const char *unused) + const char *unused, void *arg) { static phys_addr_t size; unsigned long npages; @@ -385,7 +385,7 @@ void __init reserve_hugetlb_gpages(void) strlcpy(cmdline, boot_command_line, COMMAND_LINE_SIZE); parse_args("hugetlb gpages", cmdline, NULL, 0, 0, 0, - &do_gpage_early_setup); + NULL, &do_gpage_early_setup); /* * Walk gpage list in reverse, allocating larger page sizes first. @@ -439,11 +439,6 @@ int alloc_bootmem_huge_page(struct hstate *hstate) } #endif -int huge_pmd_unshare(struct mm_struct *mm, unsigned long *addr, pte_t *ptep) -{ - return 0; -} - #ifdef CONFIG_PPC_FSL_BOOK3E #define HUGEPD_FREELIST_SIZE \ ((PAGE_SIZE - sizeof(struct hugepd_freelist)) / sizeof(pte_t)) @@ -933,7 +928,7 @@ static int __init hugetlbpage_init(void) return 0; } #endif -module_init(hugetlbpage_init); +arch_initcall(hugetlbpage_init); void flush_dcache_icache_hugepage(struct page *page) { diff --git a/arch/powerpc/mm/mem.c b/arch/powerpc/mm/mem.c index 45fda71fe..0f11819d8 100644 --- a/arch/powerpc/mm/mem.c +++ b/arch/powerpc/mm/mem.c @@ -560,7 +560,7 @@ subsys_initcall(add_system_ram_resources); */ int devmem_is_allowed(unsigned long pfn) { - if (iomem_is_exclusive(pfn << PAGE_SHIFT)) + if (iomem_is_exclusive(PFN_PHYS(pfn))) return 0; if (!page_is_ram(pfn)) return 1; diff --git a/arch/powerpc/mm/mmu_context_hash64.c b/arch/powerpc/mm/mmu_context_hash64.c index 178876aef..4e4efbc26 100644 --- a/arch/powerpc/mm/mmu_context_hash64.c +++ b/arch/powerpc/mm/mmu_context_hash64.c @@ -89,6 +89,9 @@ int init_new_context(struct task_struct *tsk, struct mm_struct *mm) #ifdef CONFIG_PPC_64K_PAGES mm->context.pte_frag = NULL; #endif +#ifdef CONFIG_SPAPR_TCE_IOMMU + mm_iommu_init(&mm->context); +#endif return 0; } @@ -132,6 +135,9 @@ static inline void destroy_pagetable_page(struct mm_struct *mm) void destroy_context(struct mm_struct *mm) { +#ifdef CONFIG_SPAPR_TCE_IOMMU + mm_iommu_cleanup(&mm->context); +#endif #ifdef CONFIG_PPC_ICSWX drop_cop(mm->context.acop, mm); diff --git a/arch/powerpc/mm/mmu_context_iommu.c b/arch/powerpc/mm/mmu_context_iommu.c new file mode 100644 index 000000000..da6a2168a --- /dev/null +++ b/arch/powerpc/mm/mmu_context_iommu.c @@ -0,0 +1,316 @@ +/* + * IOMMU helpers in MMU context. + * + * Copyright (C) 2015 IBM Corp. <aik@ozlabs.ru> + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version + * 2 of the License, or (at your option) any later version. + * + */ + +#include <linux/sched.h> +#include <linux/slab.h> +#include <linux/rculist.h> +#include <linux/vmalloc.h> +#include <linux/mutex.h> +#include <asm/mmu_context.h> + +static DEFINE_MUTEX(mem_list_mutex); + +struct mm_iommu_table_group_mem_t { + struct list_head next; + struct rcu_head rcu; + unsigned long used; + atomic64_t mapped; + u64 ua; /* userspace address */ + u64 entries; /* number of entries in hpas[] */ + u64 *hpas; /* vmalloc'ed */ +}; + +static long mm_iommu_adjust_locked_vm(struct mm_struct *mm, + unsigned long npages, bool incr) +{ + long ret = 0, locked, lock_limit; + + if (!npages) + return 0; + + down_write(&mm->mmap_sem); + + if (incr) { + locked = mm->locked_vm + npages; + lock_limit = rlimit(RLIMIT_MEMLOCK) >> PAGE_SHIFT; + if (locked > lock_limit && !capable(CAP_IPC_LOCK)) + ret = -ENOMEM; + else + mm->locked_vm += npages; + } else { + if (WARN_ON_ONCE(npages > mm->locked_vm)) + npages = mm->locked_vm; + mm->locked_vm -= npages; + } + + pr_debug("[%d] RLIMIT_MEMLOCK HASH64 %c%ld %ld/%ld\n", + current->pid, + incr ? '+' : '-', + npages << PAGE_SHIFT, + mm->locked_vm << PAGE_SHIFT, + rlimit(RLIMIT_MEMLOCK)); + up_write(&mm->mmap_sem); + + return ret; +} + +bool mm_iommu_preregistered(void) +{ + if (!current || !current->mm) + return false; + + return !list_empty(¤t->mm->context.iommu_group_mem_list); +} +EXPORT_SYMBOL_GPL(mm_iommu_preregistered); + +long mm_iommu_get(unsigned long ua, unsigned long entries, + struct mm_iommu_table_group_mem_t **pmem) +{ + struct mm_iommu_table_group_mem_t *mem; + long i, j, ret = 0, locked_entries = 0; + struct page *page = NULL; + + if (!current || !current->mm) + return -ESRCH; /* process exited */ + + mutex_lock(&mem_list_mutex); + + list_for_each_entry_rcu(mem, ¤t->mm->context.iommu_group_mem_list, + next) { + if ((mem->ua == ua) && (mem->entries == entries)) { + ++mem->used; + *pmem = mem; + goto unlock_exit; + } + + /* Overlap? */ + if ((mem->ua < (ua + (entries << PAGE_SHIFT))) && + (ua < (mem->ua + + (mem->entries << PAGE_SHIFT)))) { + ret = -EINVAL; + goto unlock_exit; + } + + } + + ret = mm_iommu_adjust_locked_vm(current->mm, entries, true); + if (ret) + goto unlock_exit; + + locked_entries = entries; + + mem = kzalloc(sizeof(*mem), GFP_KERNEL); + if (!mem) { + ret = -ENOMEM; + goto unlock_exit; + } + + mem->hpas = vzalloc(entries * sizeof(mem->hpas[0])); + if (!mem->hpas) { + kfree(mem); + ret = -ENOMEM; + goto unlock_exit; + } + + for (i = 0; i < entries; ++i) { + if (1 != get_user_pages_fast(ua + (i << PAGE_SHIFT), + 1/* pages */, 1/* iswrite */, &page)) { + for (j = 0; j < i; ++j) + put_page(pfn_to_page( + mem->hpas[j] >> PAGE_SHIFT)); + vfree(mem->hpas); + kfree(mem); + ret = -EFAULT; + goto unlock_exit; + } + + mem->hpas[i] = page_to_pfn(page) << PAGE_SHIFT; + } + + atomic64_set(&mem->mapped, 1); + mem->used = 1; + mem->ua = ua; + mem->entries = entries; + *pmem = mem; + + list_add_rcu(&mem->next, ¤t->mm->context.iommu_group_mem_list); + +unlock_exit: + if (locked_entries && ret) + mm_iommu_adjust_locked_vm(current->mm, locked_entries, false); + + mutex_unlock(&mem_list_mutex); + + return ret; +} +EXPORT_SYMBOL_GPL(mm_iommu_get); + +static void mm_iommu_unpin(struct mm_iommu_table_group_mem_t *mem) +{ + long i; + struct page *page = NULL; + + for (i = 0; i < mem->entries; ++i) { + if (!mem->hpas[i]) + continue; + + page = pfn_to_page(mem->hpas[i] >> PAGE_SHIFT); + if (!page) + continue; + + put_page(page); + mem->hpas[i] = 0; + } +} + +static void mm_iommu_do_free(struct mm_iommu_table_group_mem_t *mem) +{ + + mm_iommu_unpin(mem); + vfree(mem->hpas); + kfree(mem); +} + +static void mm_iommu_free(struct rcu_head *head) +{ + struct mm_iommu_table_group_mem_t *mem = container_of(head, + struct mm_iommu_table_group_mem_t, rcu); + + mm_iommu_do_free(mem); +} + +static void mm_iommu_release(struct mm_iommu_table_group_mem_t *mem) +{ + list_del_rcu(&mem->next); + mm_iommu_adjust_locked_vm(current->mm, mem->entries, false); + call_rcu(&mem->rcu, mm_iommu_free); +} + +long mm_iommu_put(struct mm_iommu_table_group_mem_t *mem) +{ + long ret = 0; + + if (!current || !current->mm) + return -ESRCH; /* process exited */ + + mutex_lock(&mem_list_mutex); + + if (mem->used == 0) { + ret = -ENOENT; + goto unlock_exit; + } + + --mem->used; + /* There are still users, exit */ + if (mem->used) + goto unlock_exit; + + /* Are there still mappings? */ + if (atomic_cmpxchg(&mem->mapped, 1, 0) != 1) { + ++mem->used; + ret = -EBUSY; + goto unlock_exit; + } + + /* @mapped became 0 so now mappings are disabled, release the region */ + mm_iommu_release(mem); + +unlock_exit: + mutex_unlock(&mem_list_mutex); + + return ret; +} +EXPORT_SYMBOL_GPL(mm_iommu_put); + +struct mm_iommu_table_group_mem_t *mm_iommu_lookup(unsigned long ua, + unsigned long size) +{ + struct mm_iommu_table_group_mem_t *mem, *ret = NULL; + + list_for_each_entry_rcu(mem, + ¤t->mm->context.iommu_group_mem_list, + next) { + if ((mem->ua <= ua) && + (ua + size <= mem->ua + + (mem->entries << PAGE_SHIFT))) { + ret = mem; + break; + } + } + + return ret; +} +EXPORT_SYMBOL_GPL(mm_iommu_lookup); + +struct mm_iommu_table_group_mem_t *mm_iommu_find(unsigned long ua, + unsigned long entries) +{ + struct mm_iommu_table_group_mem_t *mem, *ret = NULL; + + list_for_each_entry_rcu(mem, + ¤t->mm->context.iommu_group_mem_list, + next) { + if ((mem->ua == ua) && (mem->entries == entries)) { + ret = mem; + break; + } + } + + return ret; +} +EXPORT_SYMBOL_GPL(mm_iommu_find); + +long mm_iommu_ua_to_hpa(struct mm_iommu_table_group_mem_t *mem, + unsigned long ua, unsigned long *hpa) +{ + const long entry = (ua - mem->ua) >> PAGE_SHIFT; + u64 *va = &mem->hpas[entry]; + + if (entry >= mem->entries) + return -EFAULT; + + *hpa = *va | (ua & ~PAGE_MASK); + + return 0; +} +EXPORT_SYMBOL_GPL(mm_iommu_ua_to_hpa); + +long mm_iommu_mapped_inc(struct mm_iommu_table_group_mem_t *mem) +{ + if (atomic64_inc_not_zero(&mem->mapped)) + return 0; + + /* Last mm_iommu_put() has been called, no more mappings allowed() */ + return -ENXIO; +} +EXPORT_SYMBOL_GPL(mm_iommu_mapped_inc); + +void mm_iommu_mapped_dec(struct mm_iommu_table_group_mem_t *mem) +{ + atomic64_add_unless(&mem->mapped, -1, 1); +} +EXPORT_SYMBOL_GPL(mm_iommu_mapped_dec); + +void mm_iommu_init(mm_context_t *ctx) +{ + INIT_LIST_HEAD_RCU(&ctx->iommu_group_mem_list); +} + +void mm_iommu_cleanup(mm_context_t *ctx) +{ + struct mm_iommu_table_group_mem_t *mem, *tmp; + + list_for_each_entry_safe(mem, tmp, &ctx->iommu_group_mem_list, next) { + list_del_rcu(&mem->next); + mm_iommu_do_free(mem); + } +} diff --git a/arch/powerpc/mm/pgtable_64.c b/arch/powerpc/mm/pgtable_64.c index 6bfadf1aa..876232d64 100644 --- a/arch/powerpc/mm/pgtable_64.c +++ b/arch/powerpc/mm/pgtable_64.c @@ -554,47 +554,42 @@ unsigned long pmd_hugepage_update(struct mm_struct *mm, unsigned long addr, return old; } -pmd_t pmdp_clear_flush(struct vm_area_struct *vma, unsigned long address, - pmd_t *pmdp) +pmd_t pmdp_collapse_flush(struct vm_area_struct *vma, unsigned long address, + pmd_t *pmdp) { pmd_t pmd; VM_BUG_ON(address & ~HPAGE_PMD_MASK); - if (pmd_trans_huge(*pmdp)) { - pmd = pmdp_get_and_clear(vma->vm_mm, address, pmdp); - } else { - /* - * khugepaged calls this for normal pmd - */ - pmd = *pmdp; - pmd_clear(pmdp); - /* - * Wait for all pending hash_page to finish. This is needed - * in case of subpage collapse. When we collapse normal pages - * to hugepage, we first clear the pmd, then invalidate all - * the PTE entries. The assumption here is that any low level - * page fault will see a none pmd and take the slow path that - * will wait on mmap_sem. But we could very well be in a - * hash_page with local ptep pointer value. Such a hash page - * can result in adding new HPTE entries for normal subpages. - * That means we could be modifying the page content as we - * copy them to a huge page. So wait for parallel hash_page - * to finish before invalidating HPTE entries. We can do this - * by sending an IPI to all the cpus and executing a dummy - * function there. - */ - kick_all_cpus_sync(); - /* - * Now invalidate the hpte entries in the range - * covered by pmd. This make sure we take a - * fault and will find the pmd as none, which will - * result in a major fault which takes mmap_sem and - * hence wait for collapse to complete. Without this - * the __collapse_huge_page_copy can result in copying - * the old content. - */ - flush_tlb_pmd_range(vma->vm_mm, &pmd, address); - } + VM_BUG_ON(pmd_trans_huge(*pmdp)); + + pmd = *pmdp; + pmd_clear(pmdp); + /* + * Wait for all pending hash_page to finish. This is needed + * in case of subpage collapse. When we collapse normal pages + * to hugepage, we first clear the pmd, then invalidate all + * the PTE entries. The assumption here is that any low level + * page fault will see a none pmd and take the slow path that + * will wait on mmap_sem. But we could very well be in a + * hash_page with local ptep pointer value. Such a hash page + * can result in adding new HPTE entries for normal subpages. + * That means we could be modifying the page content as we + * copy them to a huge page. So wait for parallel hash_page + * to finish before invalidating HPTE entries. We can do this + * by sending an IPI to all the cpus and executing a dummy + * function there. + */ + kick_all_cpus_sync(); + /* + * Now invalidate the hpte entries in the range + * covered by pmd. This make sure we take a + * fault and will find the pmd as none, which will + * result in a major fault which takes mmap_sem and + * hence wait for collapse to complete. Without this + * the __collapse_huge_page_copy can result in copying + * the old content. + */ + flush_tlb_pmd_range(vma->vm_mm, &pmd, address); return pmd; } @@ -817,8 +812,8 @@ void update_mmu_cache_pmd(struct vm_area_struct *vma, unsigned long addr, return; } -pmd_t pmdp_get_and_clear(struct mm_struct *mm, - unsigned long addr, pmd_t *pmdp) +pmd_t pmdp_huge_get_and_clear(struct mm_struct *mm, + unsigned long addr, pmd_t *pmdp) { pmd_t old_pmd; pgtable_t pgtable; diff --git a/arch/powerpc/mm/tlb_low_64e.S b/arch/powerpc/mm/tlb_low_64e.S index 89bf95bd6..765b41988 100644 --- a/arch/powerpc/mm/tlb_low_64e.S +++ b/arch/powerpc/mm/tlb_low_64e.S @@ -398,18 +398,18 @@ ALT_FTR_SECTION_END_IFSET(CPU_FTR_SMT) rldicl r15,r16,64-PUD_SHIFT+3,64-PUD_INDEX_SIZE-3 clrrdi r15,r15,3 cmpdi cr0,r14,0 - bge tlb_miss_fault_e6500 /* Bad pgd entry or hugepage; bail */ + bge tlb_miss_huge_e6500 /* Bad pgd entry or hugepage; bail */ ldx r14,r14,r15 /* grab pud entry */ rldicl r15,r16,64-PMD_SHIFT+3,64-PMD_INDEX_SIZE-3 clrrdi r15,r15,3 cmpdi cr0,r14,0 - bge tlb_miss_fault_e6500 + bge tlb_miss_huge_e6500 ldx r14,r14,r15 /* Grab pmd entry */ mfspr r10,SPRN_MAS0 cmpdi cr0,r14,0 - bge tlb_miss_fault_e6500 + bge tlb_miss_huge_e6500 /* Now we build the MAS for a 2M indirect page: * @@ -428,6 +428,7 @@ ALT_FTR_SECTION_END_IFSET(CPU_FTR_SMT) clrrdi r15,r16,21 /* make EA 2M-aligned */ mtspr SPRN_MAS2,r15 +tlb_miss_huge_done_e6500: lbz r15,TCD_ESEL_NEXT(r11) lbz r16,TCD_ESEL_MAX(r11) lbz r14,TCD_ESEL_FIRST(r11) @@ -456,6 +457,50 @@ END_FTR_SECTION_IFSET(CPU_FTR_SMT) tlb_epilog_bolted rfi +tlb_miss_huge_e6500: + beq tlb_miss_fault_e6500 + li r10,1 + andi. r15,r14,HUGEPD_SHIFT_MASK@l /* r15 = psize */ + rldimi r14,r10,63,0 /* Set PD_HUGE */ + xor r14,r14,r15 /* Clear size bits */ + ldx r14,0,r14 + + /* + * Now we build the MAS for a huge page. + * + * MAS 0 : ESEL needs to be filled by software round-robin + * - can be handled by indirect code + * MAS 1 : Need to clear IND and set TSIZE + * MAS 2,3+7: Needs to be redone similar to non-tablewalk handler + */ + + subi r15,r15,10 /* Convert psize to tsize */ + mfspr r10,SPRN_MAS1 + rlwinm r10,r10,0,~MAS1_IND + rlwimi r10,r15,MAS1_TSIZE_SHIFT,MAS1_TSIZE_MASK + mtspr SPRN_MAS1,r10 + + li r10,-0x400 + sld r15,r10,r15 /* Generate mask based on size */ + and r10,r16,r15 + rldicr r15,r14,64-(PTE_RPN_SHIFT-PAGE_SHIFT),63-PAGE_SHIFT + rlwimi r10,r14,32-19,27,31 /* Insert WIMGE */ + clrldi r15,r15,PAGE_SHIFT /* Clear crap at the top */ + rlwimi r15,r14,32-8,22,25 /* Move in U bits */ + mtspr SPRN_MAS2,r10 + andi. r10,r14,_PAGE_DIRTY + rlwimi r15,r14,32-2,26,31 /* Move in BAP bits */ + + /* Mask out SW and UW if !DIRTY (XXX optimize this !) */ + bne 1f + li r10,MAS3_SW|MAS3_UW + andc r15,r15,r10 +1: + mtspr SPRN_MAS7_MAS3,r15 + + mfspr r10,SPRN_MAS0 + b tlb_miss_huge_done_e6500 + tlb_miss_kernel_e6500: ld r14,PACA_KERNELPGD(r13) cmpldi cr1,r15,8 /* Check for vmalloc region */ diff --git a/arch/powerpc/mm/tlb_nohash.c b/arch/powerpc/mm/tlb_nohash.c index cbd3d0698..723a099f6 100644 --- a/arch/powerpc/mm/tlb_nohash.c +++ b/arch/powerpc/mm/tlb_nohash.c @@ -217,7 +217,7 @@ static DEFINE_RAW_SPINLOCK(tlbivax_lock); static int mm_is_core_local(struct mm_struct *mm) { return cpumask_subset(mm_cpumask(mm), - topology_thread_cpumask(smp_processor_id())); + topology_sibling_cpumask(smp_processor_id())); } struct tlb_flush_param { |