diff options
Diffstat (limited to 'mm/memory.c')
-rw-r--r-- | mm/memory.c | 170 |
1 files changed, 97 insertions, 73 deletions
diff --git a/mm/memory.c b/mm/memory.c index 869aa2cb2..dc696bcf0 100644 --- a/mm/memory.c +++ b/mm/memory.c @@ -65,6 +65,7 @@ #include <linux/userfaultfd_k.h> #include <asm/io.h> +#include <asm/mmu_context.h> #include <asm/pgalloc.h> #include <asm/uaccess.h> #include <asm/tlb.h> @@ -562,8 +563,7 @@ void free_pgtables(struct mmu_gather *tlb, struct vm_area_struct *vma, } } -int __pte_alloc(struct mm_struct *mm, struct vm_area_struct *vma, - pmd_t *pmd, unsigned long address) +int __pte_alloc(struct mm_struct *mm, pmd_t *pmd, unsigned long address) { spinlock_t *ptl; pgtable_t new = pte_alloc_one(mm, address); @@ -661,9 +661,8 @@ static void print_bad_pte(struct vm_area_struct *vma, unsigned long addr, return; } if (nr_unshown) { - printk(KERN_ALERT - "BUG: Bad page map: %lu messages suppressed\n", - nr_unshown); + pr_alert("BUG: Bad page map: %lu messages suppressed\n", + nr_unshown); nr_unshown = 0; } nr_shown = 0; @@ -674,15 +673,13 @@ static void print_bad_pte(struct vm_area_struct *vma, unsigned long addr, mapping = vma->vm_file ? vma->vm_file->f_mapping : NULL; index = linear_page_index(vma, addr); - printk(KERN_ALERT - "BUG: Bad page map in process %s pte:%08llx pmd:%08llx\n", - current->comm, - (long long)pte_val(pte), (long long)pmd_val(*pmd)); + pr_alert("BUG: Bad page map in process %s pte:%08llx pmd:%08llx\n", + current->comm, + (long long)pte_val(pte), (long long)pmd_val(*pmd)); if (page) dump_page(page, "bad pte"); - printk(KERN_ALERT - "addr:%p vm_flags:%08lx anon_vma:%p mapping:%p index:%lx\n", - (void *)addr, vma->vm_flags, vma->anon_vma, mapping, index); + pr_alert("addr:%p vm_flags:%08lx anon_vma:%p mapping:%p index:%lx\n", + (void *)addr, vma->vm_flags, vma->anon_vma, mapping, index); /* * Choose text because data symbols depend on CONFIG_KALLSYMS_ALL=y */ @@ -1145,6 +1142,12 @@ again: if (!PageAnon(page)) { if (pte_dirty(ptent)) { + /* + * oom_reaper cannot tear down dirty + * pages + */ + if (unlikely(details && details->ignore_dirty)) + continue; force_flush = 1; set_page_dirty(page); } @@ -1163,8 +1166,8 @@ again: } continue; } - /* If details->check_mapping, we leave swap entries. */ - if (unlikely(details)) + /* only check swap_entries if explicitly asked for in details */ + if (unlikely(details && !details->check_swap_entries)) continue; entry = pte_to_swp_entry(ptent); @@ -1219,15 +1222,8 @@ static inline unsigned long zap_pmd_range(struct mmu_gather *tlb, next = pmd_addr_end(addr, end); if (pmd_trans_huge(*pmd) || pmd_devmap(*pmd)) { if (next - addr != HPAGE_PMD_SIZE) { -#ifdef CONFIG_DEBUG_VM - if (!rwsem_is_locked(&tlb->mm->mmap_sem)) { - pr_err("%s: mmap_sem is unlocked! addr=0x%lx end=0x%lx vma->vm_start=0x%lx vma->vm_end=0x%lx\n", - __func__, addr, end, - vma->vm_start, - vma->vm_end); - BUG(); - } -#endif + VM_BUG_ON_VMA(vma_is_anonymous(vma) && + !rwsem_is_locked(&tlb->mm->mmap_sem), vma); split_huge_pmd(vma, pmd, addr); } else if (zap_huge_pmd(tlb, vma, pmd, addr)) goto next; @@ -1269,7 +1265,7 @@ static inline unsigned long zap_pud_range(struct mmu_gather *tlb, return addr; } -static void unmap_page_range(struct mmu_gather *tlb, +void unmap_page_range(struct mmu_gather *tlb, struct vm_area_struct *vma, unsigned long addr, unsigned long end, struct zap_details *details) @@ -1277,9 +1273,6 @@ static void unmap_page_range(struct mmu_gather *tlb, pgd_t *pgd; unsigned long next; - if (details && !details->check_mapping) - details = NULL; - BUG_ON(addr >= end); tlb_start_vma(tlb, vma); pgd = pgd_offset(vma->vm_mm, addr); @@ -1591,8 +1584,29 @@ out: int vm_insert_pfn(struct vm_area_struct *vma, unsigned long addr, unsigned long pfn) { + return vm_insert_pfn_prot(vma, addr, pfn, vma->vm_page_prot); +} +EXPORT_SYMBOL(vm_insert_pfn); + +/** + * vm_insert_pfn_prot - insert single pfn into user vma with specified pgprot + * @vma: user vma to map to + * @addr: target user address of this page + * @pfn: source kernel pfn + * @pgprot: pgprot flags for the inserted page + * + * This is exactly like vm_insert_pfn, except that it allows drivers to + * to override pgprot on a per-page basis. + * + * This only makes sense for IO mappings, and it makes no sense for + * cow mappings. In general, using multiple vmas is preferable; + * vm_insert_pfn_prot should only be used if using multiple VMAs is + * impractical. + */ +int vm_insert_pfn_prot(struct vm_area_struct *vma, unsigned long addr, + unsigned long pfn, pgprot_t pgprot) +{ int ret; - pgprot_t pgprot = vma->vm_page_prot; /* * Technically, architectures with pte_special can avoid all these * restrictions (same for remap_pfn_range). However we would like @@ -1614,7 +1628,7 @@ int vm_insert_pfn(struct vm_area_struct *vma, unsigned long addr, return ret; } -EXPORT_SYMBOL(vm_insert_pfn); +EXPORT_SYMBOL(vm_insert_pfn_prot); int vm_insert_mixed(struct vm_area_struct *vma, unsigned long addr, pfn_t pfn) @@ -1916,7 +1930,9 @@ int apply_to_page_range(struct mm_struct *mm, unsigned long addr, unsigned long end = addr + size; int err; - BUG_ON(addr >= end); + if (WARN_ON(addr >= end)) + return -EINVAL; + pgd = pgd_offset(mm, addr); do { next = pgd_addr_end(addr, end); @@ -2071,7 +2087,7 @@ static inline int wp_page_reuse(struct mm_struct *mm, VM_BUG_ON_PAGE(PageAnon(page), page); mapping = page->mapping; unlock_page(page); - page_cache_release(page); + put_page(page); if ((dirtied || page_mkwrite) && mapping) { /* @@ -2205,7 +2221,7 @@ static int wp_page_copy(struct mm_struct *mm, struct vm_area_struct *vma, } if (new_page) - page_cache_release(new_page); + put_page(new_page); pte_unmap_unlock(page_table, ptl); mmu_notifier_invalidate_range_end(mm, mmun_start, mmun_end); @@ -2220,14 +2236,14 @@ static int wp_page_copy(struct mm_struct *mm, struct vm_area_struct *vma, munlock_vma_page(old_page); unlock_page(old_page); } - page_cache_release(old_page); + put_page(old_page); } return page_copied ? VM_FAULT_WRITE : 0; oom_free_new: - page_cache_release(new_page); + put_page(new_page); oom: if (old_page) - page_cache_release(old_page); + put_page(old_page); return VM_FAULT_OOM; } @@ -2275,7 +2291,7 @@ static int wp_page_shared(struct mm_struct *mm, struct vm_area_struct *vma, { int page_mkwrite = 0; - page_cache_get(old_page); + get_page(old_page); if (vma->vm_ops && vma->vm_ops->page_mkwrite) { int tmp; @@ -2284,7 +2300,7 @@ static int wp_page_shared(struct mm_struct *mm, struct vm_area_struct *vma, tmp = do_page_mkwrite(vma, old_page, address); if (unlikely(!tmp || (tmp & (VM_FAULT_ERROR | VM_FAULT_NOPAGE)))) { - page_cache_release(old_page); + put_page(old_page); return tmp; } /* @@ -2298,7 +2314,7 @@ static int wp_page_shared(struct mm_struct *mm, struct vm_area_struct *vma, if (!pte_same(*page_table, orig_pte)) { unlock_page(old_page); pte_unmap_unlock(page_table, ptl); - page_cache_release(old_page); + put_page(old_page); return 0; } page_mkwrite = 1; @@ -2357,8 +2373,9 @@ static int do_wp_page(struct mm_struct *mm, struct vm_area_struct *vma, * not dirty accountable. */ if (PageAnon(old_page) && !PageKsm(old_page)) { + int total_mapcount; if (!trylock_page(old_page)) { - page_cache_get(old_page); + get_page(old_page); pte_unmap_unlock(page_table, ptl); lock_page(old_page); page_table = pte_offset_map_lock(mm, pmd, address, @@ -2366,18 +2383,23 @@ static int do_wp_page(struct mm_struct *mm, struct vm_area_struct *vma, if (!pte_same(*page_table, orig_pte)) { unlock_page(old_page); pte_unmap_unlock(page_table, ptl); - page_cache_release(old_page); + put_page(old_page); return 0; } - page_cache_release(old_page); + put_page(old_page); } - if (reuse_swap_page(old_page)) { - /* - * The page is all ours. Move it to our anon_vma so - * the rmap code will not search our parent or siblings. - * Protected against the rmap code by the page lock. - */ - page_move_anon_rmap(old_page, vma, address); + if (reuse_swap_page(old_page, &total_mapcount)) { + if (total_mapcount == 1) { + /* + * The page is all ours. Move it to + * our anon_vma so the rmap code will + * not search our parent or siblings. + * Protected against the rmap code by + * the page lock. + */ + page_move_anon_rmap(compound_head(old_page), + vma, address); + } unlock_page(old_page); return wp_page_reuse(mm, vma, address, page_table, ptl, orig_pte, old_page, 0, 0); @@ -2392,7 +2414,7 @@ static int do_wp_page(struct mm_struct *mm, struct vm_area_struct *vma, /* * Ok, we need to copy. Oh, well.. */ - page_cache_get(old_page); + get_page(old_page); pte_unmap_unlock(page_table, ptl); return wp_page_copy(mm, vma, address, page_table, pmd, @@ -2417,7 +2439,6 @@ static inline void unmap_mapping_range_tree(struct rb_root *root, vba = vma->vm_pgoff; vea = vba + vma_pages(vma) - 1; - /* Assume for now that PAGE_CACHE_SHIFT == PAGE_SHIFT */ zba = details->first_index; if (zba < vba) zba = vba; @@ -2452,7 +2473,7 @@ static inline void unmap_mapping_range_tree(struct rb_root *root, void unmap_mapping_range(struct address_space *mapping, loff_t const holebegin, loff_t const holelen, int even_cows) { - struct zap_details details; + struct zap_details details = { }; pgoff_t hba = holebegin >> PAGE_SHIFT; pgoff_t hlen = (holelen + PAGE_SIZE - 1) >> PAGE_SHIFT; @@ -2602,7 +2623,7 @@ static int do_swap_page(struct mm_struct *mm, struct vm_area_struct *vma, inc_mm_counter_fast(mm, MM_ANONPAGES); dec_mm_counter_fast(mm, MM_SWAPENTS); pte = mk_pte(page, vma->vm_page_prot); - if ((flags & FAULT_FLAG_WRITE) && reuse_swap_page(page)) { + if ((flags & FAULT_FLAG_WRITE) && reuse_swap_page(page, NULL)) { pte = maybe_mkwrite(pte_mkdirty(pte), vma); flags &= ~FAULT_FLAG_WRITE; ret |= VM_FAULT_WRITE; @@ -2636,7 +2657,7 @@ static int do_swap_page(struct mm_struct *mm, struct vm_area_struct *vma, * parallel locked swapcache. */ unlock_page(swapcache); - page_cache_release(swapcache); + put_page(swapcache); } if (flags & FAULT_FLAG_WRITE) { @@ -2658,10 +2679,10 @@ out_nomap: out_page: unlock_page(page); out_release: - page_cache_release(page); + put_page(page); if (page != swapcache) { unlock_page(swapcache); - page_cache_release(swapcache); + put_page(swapcache); } return ret; } @@ -2769,7 +2790,7 @@ static int do_anonymous_page(struct mm_struct *mm, struct vm_area_struct *vma, if (userfaultfd_missing(vma)) { pte_unmap_unlock(page_table, ptl); mem_cgroup_cancel_charge(page, memcg, false); - page_cache_release(page); + put_page(page); return handle_userfault(vma, address, flags, VM_UFFD_MISSING); } @@ -2788,10 +2809,10 @@ unlock: return 0; release: mem_cgroup_cancel_charge(page, memcg, false); - page_cache_release(page); + put_page(page); goto unlock; oom_free_page: - page_cache_release(page); + put_page(page); oom: return VM_FAULT_OOM; } @@ -2824,7 +2845,7 @@ static int __do_fault(struct vm_area_struct *vma, unsigned long address, if (unlikely(PageHWPoison(vmf.page))) { if (ret & VM_FAULT_LOCKED) unlock_page(vmf.page); - page_cache_release(vmf.page); + put_page(vmf.page); return VM_FAULT_HWPOISON; } @@ -3013,7 +3034,7 @@ static int do_read_fault(struct mm_struct *mm, struct vm_area_struct *vma, if (unlikely(!pte_same(*pte, orig_pte))) { pte_unmap_unlock(pte, ptl); unlock_page(fault_page); - page_cache_release(fault_page); + put_page(fault_page); return ret; } do_set_pte(vma, address, fault_page, pte, false, false); @@ -3041,7 +3062,7 @@ static int do_cow_fault(struct mm_struct *mm, struct vm_area_struct *vma, return VM_FAULT_OOM; if (mem_cgroup_try_charge(new_page, mm, GFP_KERNEL, &memcg, false)) { - page_cache_release(new_page); + put_page(new_page); return VM_FAULT_OOM; } @@ -3058,7 +3079,7 @@ static int do_cow_fault(struct mm_struct *mm, struct vm_area_struct *vma, pte_unmap_unlock(pte, ptl); if (fault_page) { unlock_page(fault_page); - page_cache_release(fault_page); + put_page(fault_page); } else { /* * The fault handler has no page to lock, so it holds @@ -3074,7 +3095,7 @@ static int do_cow_fault(struct mm_struct *mm, struct vm_area_struct *vma, pte_unmap_unlock(pte, ptl); if (fault_page) { unlock_page(fault_page); - page_cache_release(fault_page); + put_page(fault_page); } else { /* * The fault handler has no page to lock, so it holds @@ -3085,7 +3106,7 @@ static int do_cow_fault(struct mm_struct *mm, struct vm_area_struct *vma, return ret; uncharge_out: mem_cgroup_cancel_charge(new_page, memcg, false); - page_cache_release(new_page); + put_page(new_page); return ret; } @@ -3113,7 +3134,7 @@ static int do_shared_fault(struct mm_struct *mm, struct vm_area_struct *vma, tmp = do_page_mkwrite(vma, fault_page, address); if (unlikely(!tmp || (tmp & (VM_FAULT_ERROR | VM_FAULT_NOPAGE)))) { - page_cache_release(fault_page); + put_page(fault_page); return tmp; } } @@ -3122,7 +3143,7 @@ static int do_shared_fault(struct mm_struct *mm, struct vm_area_struct *vma, if (unlikely(!pte_same(*pte, orig_pte))) { pte_unmap_unlock(pte, ptl); unlock_page(fault_page); - page_cache_release(fault_page); + put_page(fault_page); return ret; } do_set_pte(vma, address, fault_page, pte, true, false); @@ -3162,8 +3183,7 @@ static int do_fault(struct mm_struct *mm, struct vm_area_struct *vma, unsigned long address, pte_t *page_table, pmd_t *pmd, unsigned int flags, pte_t orig_pte) { - pgoff_t pgoff = (((address & PAGE_MASK) - - vma->vm_start) >> PAGE_SHIFT) + vma->vm_pgoff; + pgoff_t pgoff = linear_page_index(vma, address); pte_unmap(page_table); /* The VMA was not fully populated on mmap() or missing VM_DONTEXPAND */ @@ -3397,6 +3417,11 @@ static int __handle_mm_fault(struct mm_struct *mm, struct vm_area_struct *vma, pmd_t *pmd; pte_t *pte; + if (!arch_vma_access_permitted(vma, flags & FAULT_FLAG_WRITE, + flags & FAULT_FLAG_INSTRUCTION, + flags & FAULT_FLAG_REMOTE)) + return VM_FAULT_SIGSEGV; + if (unlikely(is_vm_hugetlb_page(vma))) return hugetlb_fault(mm, vma, address, flags); @@ -3437,12 +3462,11 @@ static int __handle_mm_fault(struct mm_struct *mm, struct vm_area_struct *vma, } /* - * Use __pte_alloc instead of pte_alloc_map, because we can't + * Use pte_alloc() instead of pte_alloc_map, because we can't * run pte_offset_map on the pmd, if an huge pmd could * materialize from under us from a different thread. */ - if (unlikely(pmd_none(*pmd)) && - unlikely(__pte_alloc(mm, vma, pmd, address))) + if (unlikely(pte_alloc(mm, pmd, address))) return VM_FAULT_OOM; /* * If a huge pmd materialized under us just retry later. Use @@ -3714,7 +3738,7 @@ static int __access_remote_vm(struct task_struct *tsk, struct mm_struct *mm, void *maddr; struct page *page = NULL; - ret = get_user_pages(tsk, mm, addr, 1, + ret = get_user_pages_remote(tsk, mm, addr, 1, write, 1, &page, &vma); if (ret <= 0) { #ifndef CONFIG_HAVE_IOREMAP_PROT @@ -3750,7 +3774,7 @@ static int __access_remote_vm(struct task_struct *tsk, struct mm_struct *mm, buf, maddr + offset, bytes); } kunmap(page); - page_cache_release(page); + put_page(page); } len -= bytes; buf += bytes; |