diff options
author | André Fabian Silva Delgado <emulatorman@parabola.nu> | 2016-03-25 03:53:42 -0300 |
---|---|---|
committer | André Fabian Silva Delgado <emulatorman@parabola.nu> | 2016-03-25 03:53:42 -0300 |
commit | 03dd4cb26d967f9588437b0fc9cc0e8353322bb7 (patch) | |
tree | fa581f6dc1c0596391690d1f67eceef3af8246dc /mm/memory.c | |
parent | d4e493caf788ef44982e131ff9c786546904d934 (diff) |
Linux-libre 4.5-gnu
Diffstat (limited to 'mm/memory.c')
-rw-r--r-- | mm/memory.c | 166 |
1 files changed, 85 insertions, 81 deletions
diff --git a/mm/memory.c b/mm/memory.c index 0efd48e94..09e967a3c 100644 --- a/mm/memory.c +++ b/mm/memory.c @@ -50,6 +50,7 @@ #include <linux/export.h> #include <linux/delayacct.h> #include <linux/init.h> +#include <linux/pfn_t.h> #include <linux/writeback.h> #include <linux/memcontrol.h> #include <linux/mmu_notifier.h> @@ -589,7 +590,6 @@ int __pte_alloc(struct mm_struct *mm, struct vm_area_struct *vma, { spinlock_t *ptl; pgtable_t new = pte_alloc_one(mm, address); - int wait_split_huge_page; if (!new) return -ENOMEM; @@ -609,18 +609,14 @@ int __pte_alloc(struct mm_struct *mm, struct vm_area_struct *vma, smp_wmb(); /* Could be smp_wmb__xxx(before|after)_spin_lock */ ptl = pmd_lock(mm, pmd); - wait_split_huge_page = 0; if (likely(pmd_none(*pmd))) { /* Has another populated it ? */ atomic_long_inc(&mm->nr_ptes); pmd_populate(mm, pmd, new); new = NULL; - } else if (unlikely(pmd_trans_splitting(*pmd))) - wait_split_huge_page = 1; + } spin_unlock(ptl); if (new) pte_free(mm, new); - if (wait_split_huge_page) - wait_split_huge_page(vma->anon_vma, pmd); return 0; } @@ -636,8 +632,7 @@ int __pte_alloc_kernel(pmd_t *pmd, unsigned long address) if (likely(pmd_none(*pmd))) { /* Has another populated it ? */ pmd_populate_kernel(&init_mm, pmd, new); new = NULL; - } else - VM_BUG_ON(pmd_trans_splitting(*pmd)); + } spin_unlock(&init_mm.page_table_lock); if (new) pte_free_kernel(&init_mm, new); @@ -855,10 +850,7 @@ copy_one_pte(struct mm_struct *dst_mm, struct mm_struct *src_mm, } else if (is_migration_entry(entry)) { page = migration_entry_to_page(entry); - if (PageAnon(page)) - rss[MM_ANONPAGES]++; - else - rss[MM_FILEPAGES]++; + rss[mm_counter(page)]++; if (is_write_migration_entry(entry) && is_cow_mapping(vm_flags)) { @@ -896,11 +888,8 @@ copy_one_pte(struct mm_struct *dst_mm, struct mm_struct *src_mm, page = vm_normal_page(vma, addr, pte); if (page) { get_page(page); - page_dup_rmap(page); - if (PageAnon(page)) - rss[MM_ANONPAGES]++; - else - rss[MM_FILEPAGES]++; + page_dup_rmap(page, false); + rss[mm_counter(page)]++; /* Should return NULL in vm_normal_page() */ uksm_bugon_zeropage(pte); @@ -989,7 +978,7 @@ static inline int copy_pmd_range(struct mm_struct *dst_mm, struct mm_struct *src src_pmd = pmd_offset(src_pud, addr); do { next = pmd_addr_end(addr, end); - if (pmd_trans_huge(*src_pmd)) { + if (pmd_trans_huge(*src_pmd) || pmd_devmap(*src_pmd)) { int err; VM_BUG_ON(next-addr != HPAGE_PMD_SIZE); err = copy_huge_pmd(dst_mm, src_mm, @@ -1139,13 +1128,11 @@ again: ptent = ptep_get_and_clear_full(mm, addr, pte, tlb->fullmm); tlb_remove_tlb_entry(tlb, pte, addr); - if (unlikely(!page)) { + if (unlikely(!page)) uksm_unmap_zero_page(ptent); continue; - } - if (PageAnon(page)) - rss[MM_ANONPAGES]--; - else { + + if (!PageAnon(page)) { if (pte_dirty(ptent)) { force_flush = 1; set_page_dirty(page); @@ -1153,9 +1140,9 @@ again: if (pte_young(ptent) && likely(!(vma->vm_flags & VM_SEQ_READ))) mark_page_accessed(page); - rss[MM_FILEPAGES]--; } - page_remove_rmap(page); + rss[mm_counter(page)]--; + page_remove_rmap(page, false); if (unlikely(page_mapcount(page) < 0)) print_bad_pte(vma, addr, ptent, page); if (unlikely(!__tlb_remove_page(tlb, page))) { @@ -1176,11 +1163,7 @@ again: struct page *page; page = migration_entry_to_page(entry); - - if (PageAnon(page)) - rss[MM_ANONPAGES]--; - else - rss[MM_FILEPAGES]--; + rss[mm_counter(page)]--; } if (unlikely(!free_swap_and_cache(entry))) print_bad_pte(vma, addr, ptent, NULL); @@ -1223,7 +1206,7 @@ static inline unsigned long zap_pmd_range(struct mmu_gather *tlb, pmd = pmd_offset(pud, addr); do { next = pmd_addr_end(addr, end); - if (pmd_trans_huge(*pmd)) { + if (pmd_trans_huge(*pmd) || pmd_devmap(*pmd)) { if (next - addr != HPAGE_PMD_SIZE) { #ifdef CONFIG_DEBUG_VM if (!rwsem_is_locked(&tlb->mm->mmap_sem)) { @@ -1234,7 +1217,7 @@ static inline unsigned long zap_pmd_range(struct mmu_gather *tlb, BUG(); } #endif - split_huge_page_pmd(vma, addr, pmd); + split_huge_pmd(vma, pmd, addr); } else if (zap_huge_pmd(tlb, vma, pmd, addr)) goto next; /* fall through */ @@ -1490,7 +1473,7 @@ static int insert_page(struct vm_area_struct *vma, unsigned long addr, /* Ok, finally just insert the thing.. */ get_page(page); - inc_mm_counter_fast(mm, MM_FILEPAGES); + inc_mm_counter_fast(mm, mm_counter_file(page)); page_add_file_rmap(page); set_pte_at(mm, addr, pte, mk_pte(page, prot)); @@ -1547,7 +1530,7 @@ int vm_insert_page(struct vm_area_struct *vma, unsigned long addr, EXPORT_SYMBOL(vm_insert_page); static int insert_pfn(struct vm_area_struct *vma, unsigned long addr, - unsigned long pfn, pgprot_t prot) + pfn_t pfn, pgprot_t prot) { struct mm_struct *mm = vma->vm_mm; int retval; @@ -1563,7 +1546,10 @@ static int insert_pfn(struct vm_area_struct *vma, unsigned long addr, goto out_unlock; /* Ok, finally just insert the thing.. */ - entry = pte_mkspecial(pfn_pte(pfn, prot)); + if (pfn_t_devmap(pfn)) + entry = pte_mkdevmap(pfn_t_pte(pfn, prot)); + else + entry = pte_mkspecial(pfn_t_pte(pfn, prot)); set_pte_at(mm, addr, pte, entry); update_mmu_cache(vma, addr, pte); /* XXX: why not for insert_page? */ @@ -1610,17 +1596,17 @@ int vm_insert_pfn(struct vm_area_struct *vma, unsigned long addr, if (addr < vma->vm_start || addr >= vma->vm_end) return -EFAULT; - if (track_pfn_insert(vma, &pgprot, pfn)) + if (track_pfn_insert(vma, &pgprot, __pfn_to_pfn_t(pfn, PFN_DEV))) return -EINVAL; - ret = insert_pfn(vma, addr, pfn, pgprot); + ret = insert_pfn(vma, addr, __pfn_to_pfn_t(pfn, PFN_DEV), pgprot); return ret; } EXPORT_SYMBOL(vm_insert_pfn); int vm_insert_mixed(struct vm_area_struct *vma, unsigned long addr, - unsigned long pfn) + pfn_t pfn) { BUG_ON(!(vma->vm_flags & VM_MIXEDMAP)); @@ -1634,10 +1620,15 @@ int vm_insert_mixed(struct vm_area_struct *vma, unsigned long addr, * than insert_pfn). If a zero_pfn were inserted into a VM_MIXEDMAP * without pte special, it would there be refcounted as a normal page. */ - if (!HAVE_PTE_SPECIAL && pfn_valid(pfn)) { + if (!HAVE_PTE_SPECIAL && !pfn_t_devmap(pfn) && pfn_t_valid(pfn)) { struct page *page; - page = pfn_to_page(pfn); + /* + * At this point we are committed to insert_page() + * regardless of whether the caller specified flags that + * result in pfn_t_has_page() == false. + */ + page = pfn_to_page(pfn_t_to_pfn(pfn)); return insert_page(vma, addr, page, vma->vm_page_prot); } return insert_pfn(vma, addr, pfn, vma->vm_page_prot); @@ -1981,6 +1972,20 @@ static inline void cow_user_page(struct page *dst, struct page *src, unsigned lo } } +static gfp_t __get_fault_gfp_mask(struct vm_area_struct *vma) +{ + struct file *vm_file = vma->vm_file; + + if (vm_file) + return mapping_gfp_mask(vm_file->f_mapping) | __GFP_FS | __GFP_IO; + + /* + * Special mappings (e.g. VDSO) do not have any file so fake + * a default GFP_KERNEL for them. + */ + return GFP_KERNEL; +} + /* * Notify the address space that the page is about to become writable so that * it can prohibit this or wait for the page to get into an appropriate state. @@ -1996,6 +2001,7 @@ static int do_page_mkwrite(struct vm_area_struct *vma, struct page *page, vmf.virtual_address = (void __user *)(address & PAGE_MASK); vmf.pgoff = page->index; vmf.flags = FAULT_FLAG_WRITE|FAULT_FLAG_MKWRITE; + vmf.gfp_mask = __get_fault_gfp_mask(vma); vmf.page = page; vmf.cow_page = NULL; @@ -2067,7 +2073,7 @@ static inline int wp_page_reuse(struct mm_struct *mm, } if (!page_mkwrite) - vma_file_update_time(vma); + file_update_time(vma->vm_file); } return VM_FAULT_WRITE; @@ -2116,7 +2122,7 @@ static int wp_page_copy(struct mm_struct *mm, struct vm_area_struct *vma, cow_user_page(new_page, old_page, address, vma); } - if (mem_cgroup_try_charge(new_page, mm, GFP_KERNEL, &memcg)) + if (mem_cgroup_try_charge(new_page, mm, GFP_KERNEL, &memcg, false)) goto oom_free_new; __SetPageUptodate(new_page); @@ -2130,7 +2136,8 @@ static int wp_page_copy(struct mm_struct *mm, struct vm_area_struct *vma, if (likely(pte_same(*page_table, orig_pte))) { if (old_page) { if (!PageAnon(old_page)) { - dec_mm_counter_fast(mm, MM_FILEPAGES); + dec_mm_counter_fast(mm, + mm_counter_file(old_page)); inc_mm_counter_fast(mm, MM_ANONPAGES); } uksm_bugon_zeropage(orig_pte); @@ -2148,8 +2155,8 @@ static int wp_page_copy(struct mm_struct *mm, struct vm_area_struct *vma, * thread doing COW. */ ptep_clear_flush_notify(vma, address, page_table); - page_add_new_anon_rmap(new_page, vma, address); - mem_cgroup_commit_charge(new_page, memcg, false); + page_add_new_anon_rmap(new_page, vma, address, false); + mem_cgroup_commit_charge(new_page, memcg, false, false); lru_cache_add_active_or_unevictable(new_page, vma); /* * We call the notify macro here because, when using secondary @@ -2181,14 +2188,14 @@ static int wp_page_copy(struct mm_struct *mm, struct vm_area_struct *vma, * mapcount is visible. So transitively, TLBs to * old page will be flushed before it can be reused. */ - page_remove_rmap(old_page); + page_remove_rmap(old_page, false); } /* Free the old page.. */ new_page = old_page; page_copied = 1; } else { - mem_cgroup_cancel_charge(new_page, memcg); + mem_cgroup_cancel_charge(new_page, memcg, false); } if (new_page) @@ -2203,7 +2210,8 @@ static int wp_page_copy(struct mm_struct *mm, struct vm_area_struct *vma, */ if (page_copied && (vma->vm_flags & VM_LOCKED)) { lock_page(old_page); /* LRU manipulation */ - munlock_vma_page(old_page); + if (PageMlocked(old_page)) + munlock_vma_page(old_page); unlock_page(old_page); } page_cache_release(old_page); @@ -2263,11 +2271,6 @@ static int wp_page_shared(struct mm_struct *mm, struct vm_area_struct *vma, page_cache_get(old_page); - /* - * Only catch write-faults on shared writable pages, - * read-only shared pages can get COWed by - * get_user_pages(.write=1, .force=1). - */ if (vma->vm_ops && vma->vm_ops->page_mkwrite) { int tmp; @@ -2563,7 +2566,7 @@ static int do_swap_page(struct mm_struct *mm, struct vm_area_struct *vma, goto out_page; } - if (mem_cgroup_try_charge(page, mm, GFP_KERNEL, &memcg)) { + if (mem_cgroup_try_charge(page, mm, GFP_KERNEL, &memcg, false)) { ret = VM_FAULT_OOM; goto out_page; } @@ -2597,7 +2600,7 @@ static int do_swap_page(struct mm_struct *mm, struct vm_area_struct *vma, pte = maybe_mkwrite(pte_mkdirty(pte), vma); flags &= ~FAULT_FLAG_WRITE; ret |= VM_FAULT_WRITE; - exclusive = 1; + exclusive = RMAP_EXCLUSIVE; } flush_icache_page(vma, page); if (pte_swp_soft_dirty(orig_pte)) @@ -2605,15 +2608,16 @@ static int do_swap_page(struct mm_struct *mm, struct vm_area_struct *vma, set_pte_at(mm, address, page_table, pte); if (page == swapcache) { do_page_add_anon_rmap(page, vma, address, exclusive); - mem_cgroup_commit_charge(page, memcg, true); + mem_cgroup_commit_charge(page, memcg, true, false); } else { /* ksm created a completely new copy */ - page_add_new_anon_rmap(page, vma, address); - mem_cgroup_commit_charge(page, memcg, false); + page_add_new_anon_rmap(page, vma, address, false); + mem_cgroup_commit_charge(page, memcg, false, false); lru_cache_add_active_or_unevictable(page, vma); } swap_free(entry); - if (vm_swap_full() || (vma->vm_flags & VM_LOCKED) || PageMlocked(page)) + if (mem_cgroup_swap_full(page) || + (vma->vm_flags & VM_LOCKED) || PageMlocked(page)) try_to_free_swap(page); unlock_page(page); if (page != swapcache) { @@ -2643,7 +2647,7 @@ unlock: out: return ret; out_nomap: - mem_cgroup_cancel_charge(page, memcg); + mem_cgroup_cancel_charge(page, memcg, false); pte_unmap_unlock(page_table, ptl); out_page: unlock_page(page); @@ -2737,7 +2741,7 @@ static int do_anonymous_page(struct mm_struct *mm, struct vm_area_struct *vma, if (!page) goto oom; - if (mem_cgroup_try_charge(page, mm, GFP_KERNEL, &memcg)) + if (mem_cgroup_try_charge(page, mm, GFP_KERNEL, &memcg, false)) goto oom_free_page; /* @@ -2758,15 +2762,15 @@ static int do_anonymous_page(struct mm_struct *mm, struct vm_area_struct *vma, /* Deliver the page fault to userland, check inside PT lock */ if (userfaultfd_missing(vma)) { pte_unmap_unlock(page_table, ptl); - mem_cgroup_cancel_charge(page, memcg); + mem_cgroup_cancel_charge(page, memcg, false); page_cache_release(page); return handle_userfault(vma, address, flags, VM_UFFD_MISSING); } inc_mm_counter_fast(mm, MM_ANONPAGES); - page_add_new_anon_rmap(page, vma, address); - mem_cgroup_commit_charge(page, memcg, false); + page_add_new_anon_rmap(page, vma, address, false); + mem_cgroup_commit_charge(page, memcg, false, false); lru_cache_add_active_or_unevictable(page, vma); setpte: set_pte_at(mm, address, page_table, entry); @@ -2777,7 +2781,7 @@ unlock: pte_unmap_unlock(page_table, ptl); return 0; release: - mem_cgroup_cancel_charge(page, memcg); + mem_cgroup_cancel_charge(page, memcg, false); page_cache_release(page); goto unlock; oom_free_page: @@ -2802,6 +2806,7 @@ static int __do_fault(struct vm_area_struct *vma, unsigned long address, vmf.pgoff = pgoff; vmf.flags = flags; vmf.page = NULL; + vmf.gfp_mask = __get_fault_gfp_mask(vma); vmf.cow_page = cow_page; ret = vma->vm_ops->fault(vma, &vmf); @@ -2853,9 +2858,9 @@ void do_set_pte(struct vm_area_struct *vma, unsigned long address, entry = maybe_mkwrite(pte_mkdirty(entry), vma); if (anon) { inc_mm_counter_fast(vma->vm_mm, MM_ANONPAGES); - page_add_new_anon_rmap(page, vma, address); + page_add_new_anon_rmap(page, vma, address, false); } else { - inc_mm_counter_fast(vma->vm_mm, MM_FILEPAGES); + inc_mm_counter_fast(vma->vm_mm, mm_counter_file(page)); page_add_file_rmap(page); } set_pte_at(vma->vm_mm, address, pte, entry); @@ -2968,6 +2973,7 @@ static void do_fault_around(struct vm_area_struct *vma, unsigned long address, vmf.pgoff = pgoff; vmf.max_pgoff = max_pgoff; vmf.flags = flags; + vmf.gfp_mask = __get_fault_gfp_mask(vma); vma->vm_ops->map_pages(vma, &vmf); } @@ -3028,7 +3034,7 @@ static int do_cow_fault(struct mm_struct *mm, struct vm_area_struct *vma, if (!new_page) return VM_FAULT_OOM; - if (mem_cgroup_try_charge(new_page, mm, GFP_KERNEL, &memcg)) { + if (mem_cgroup_try_charge(new_page, mm, GFP_KERNEL, &memcg, false)) { page_cache_release(new_page); return VM_FAULT_OOM; } @@ -3057,7 +3063,7 @@ static int do_cow_fault(struct mm_struct *mm, struct vm_area_struct *vma, goto uncharge_out; } do_set_pte(vma, address, new_page, pte, true, true); - mem_cgroup_commit_charge(new_page, memcg, false); + mem_cgroup_commit_charge(new_page, memcg, false, false); lru_cache_add_active_or_unevictable(new_page, vma); pte_unmap_unlock(pte, ptl); if (fault_page) { @@ -3072,7 +3078,7 @@ static int do_cow_fault(struct mm_struct *mm, struct vm_area_struct *vma, } return ret; uncharge_out: - mem_cgroup_cancel_charge(new_page, memcg); + mem_cgroup_cancel_charge(new_page, memcg, false); page_cache_release(new_page); return ret; } @@ -3124,7 +3130,7 @@ static int do_shared_fault(struct mm_struct *mm, struct vm_area_struct *vma, * pinned by vma->vm_file's reference. We rely on unlock_page()'s * release semantics to prevent the compiler from undoing this copying. */ - mapping = fault_page->mapping; + mapping = page_rmapping(fault_page); unlock_page(fault_page); if ((dirtied || vma->vm_ops->page_mkwrite) && mapping) { /* @@ -3226,6 +3232,12 @@ static int do_numa_page(struct mm_struct *mm, struct vm_area_struct *vma, return 0; } + /* TODO: handle PTE-mapped THP */ + if (PageCompound(page)) { + pte_unmap_unlock(ptep, ptl); + return 0; + } + /* * Avoid grouping on RO pages in general. RO pages shouldn't hurt as * much anyway since they can be in shared cache state. This misses @@ -3398,17 +3410,9 @@ static int __handle_mm_fault(struct mm_struct *mm, struct vm_area_struct *vma, int ret; barrier(); - if (pmd_trans_huge(orig_pmd)) { + if (pmd_trans_huge(orig_pmd) || pmd_devmap(orig_pmd)) { unsigned int dirty = flags & FAULT_FLAG_WRITE; - /* - * If the pmd is splitting, return and retry the - * the fault. Alternative: wait until the split - * is done, and goto retry. - */ - if (pmd_trans_splitting(orig_pmd)) - return 0; - if (pmd_protnone(orig_pmd)) return do_huge_pmd_numa_page(mm, vma, address, orig_pmd, pmd); @@ -3445,7 +3449,7 @@ static int __handle_mm_fault(struct mm_struct *mm, struct vm_area_struct *vma, * through an atomic read in C, which is what pmd_trans_unstable() * provides. */ - if (unlikely(pmd_trans_unstable(pmd))) + if (unlikely(pmd_trans_unstable(pmd) || pmd_devmap(*pmd))) return 0; /* * A regular pmd is established and it can't morph into a huge pmd |