summaryrefslogtreecommitdiff
path: root/mm/memory.c
diff options
context:
space:
mode:
Diffstat (limited to 'mm/memory.c')
-rw-r--r--mm/memory.c166
1 files changed, 85 insertions, 81 deletions
diff --git a/mm/memory.c b/mm/memory.c
index 0efd48e94..09e967a3c 100644
--- a/mm/memory.c
+++ b/mm/memory.c
@@ -50,6 +50,7 @@
#include <linux/export.h>
#include <linux/delayacct.h>
#include <linux/init.h>
+#include <linux/pfn_t.h>
#include <linux/writeback.h>
#include <linux/memcontrol.h>
#include <linux/mmu_notifier.h>
@@ -589,7 +590,6 @@ int __pte_alloc(struct mm_struct *mm, struct vm_area_struct *vma,
{
spinlock_t *ptl;
pgtable_t new = pte_alloc_one(mm, address);
- int wait_split_huge_page;
if (!new)
return -ENOMEM;
@@ -609,18 +609,14 @@ int __pte_alloc(struct mm_struct *mm, struct vm_area_struct *vma,
smp_wmb(); /* Could be smp_wmb__xxx(before|after)_spin_lock */
ptl = pmd_lock(mm, pmd);
- wait_split_huge_page = 0;
if (likely(pmd_none(*pmd))) { /* Has another populated it ? */
atomic_long_inc(&mm->nr_ptes);
pmd_populate(mm, pmd, new);
new = NULL;
- } else if (unlikely(pmd_trans_splitting(*pmd)))
- wait_split_huge_page = 1;
+ }
spin_unlock(ptl);
if (new)
pte_free(mm, new);
- if (wait_split_huge_page)
- wait_split_huge_page(vma->anon_vma, pmd);
return 0;
}
@@ -636,8 +632,7 @@ int __pte_alloc_kernel(pmd_t *pmd, unsigned long address)
if (likely(pmd_none(*pmd))) { /* Has another populated it ? */
pmd_populate_kernel(&init_mm, pmd, new);
new = NULL;
- } else
- VM_BUG_ON(pmd_trans_splitting(*pmd));
+ }
spin_unlock(&init_mm.page_table_lock);
if (new)
pte_free_kernel(&init_mm, new);
@@ -855,10 +850,7 @@ copy_one_pte(struct mm_struct *dst_mm, struct mm_struct *src_mm,
} else if (is_migration_entry(entry)) {
page = migration_entry_to_page(entry);
- if (PageAnon(page))
- rss[MM_ANONPAGES]++;
- else
- rss[MM_FILEPAGES]++;
+ rss[mm_counter(page)]++;
if (is_write_migration_entry(entry) &&
is_cow_mapping(vm_flags)) {
@@ -896,11 +888,8 @@ copy_one_pte(struct mm_struct *dst_mm, struct mm_struct *src_mm,
page = vm_normal_page(vma, addr, pte);
if (page) {
get_page(page);
- page_dup_rmap(page);
- if (PageAnon(page))
- rss[MM_ANONPAGES]++;
- else
- rss[MM_FILEPAGES]++;
+ page_dup_rmap(page, false);
+ rss[mm_counter(page)]++;
/* Should return NULL in vm_normal_page() */
uksm_bugon_zeropage(pte);
@@ -989,7 +978,7 @@ static inline int copy_pmd_range(struct mm_struct *dst_mm, struct mm_struct *src
src_pmd = pmd_offset(src_pud, addr);
do {
next = pmd_addr_end(addr, end);
- if (pmd_trans_huge(*src_pmd)) {
+ if (pmd_trans_huge(*src_pmd) || pmd_devmap(*src_pmd)) {
int err;
VM_BUG_ON(next-addr != HPAGE_PMD_SIZE);
err = copy_huge_pmd(dst_mm, src_mm,
@@ -1139,13 +1128,11 @@ again:
ptent = ptep_get_and_clear_full(mm, addr, pte,
tlb->fullmm);
tlb_remove_tlb_entry(tlb, pte, addr);
- if (unlikely(!page)) {
+ if (unlikely(!page))
uksm_unmap_zero_page(ptent);
continue;
- }
- if (PageAnon(page))
- rss[MM_ANONPAGES]--;
- else {
+
+ if (!PageAnon(page)) {
if (pte_dirty(ptent)) {
force_flush = 1;
set_page_dirty(page);
@@ -1153,9 +1140,9 @@ again:
if (pte_young(ptent) &&
likely(!(vma->vm_flags & VM_SEQ_READ)))
mark_page_accessed(page);
- rss[MM_FILEPAGES]--;
}
- page_remove_rmap(page);
+ rss[mm_counter(page)]--;
+ page_remove_rmap(page, false);
if (unlikely(page_mapcount(page) < 0))
print_bad_pte(vma, addr, ptent, page);
if (unlikely(!__tlb_remove_page(tlb, page))) {
@@ -1176,11 +1163,7 @@ again:
struct page *page;
page = migration_entry_to_page(entry);
-
- if (PageAnon(page))
- rss[MM_ANONPAGES]--;
- else
- rss[MM_FILEPAGES]--;
+ rss[mm_counter(page)]--;
}
if (unlikely(!free_swap_and_cache(entry)))
print_bad_pte(vma, addr, ptent, NULL);
@@ -1223,7 +1206,7 @@ static inline unsigned long zap_pmd_range(struct mmu_gather *tlb,
pmd = pmd_offset(pud, addr);
do {
next = pmd_addr_end(addr, end);
- if (pmd_trans_huge(*pmd)) {
+ if (pmd_trans_huge(*pmd) || pmd_devmap(*pmd)) {
if (next - addr != HPAGE_PMD_SIZE) {
#ifdef CONFIG_DEBUG_VM
if (!rwsem_is_locked(&tlb->mm->mmap_sem)) {
@@ -1234,7 +1217,7 @@ static inline unsigned long zap_pmd_range(struct mmu_gather *tlb,
BUG();
}
#endif
- split_huge_page_pmd(vma, addr, pmd);
+ split_huge_pmd(vma, pmd, addr);
} else if (zap_huge_pmd(tlb, vma, pmd, addr))
goto next;
/* fall through */
@@ -1490,7 +1473,7 @@ static int insert_page(struct vm_area_struct *vma, unsigned long addr,
/* Ok, finally just insert the thing.. */
get_page(page);
- inc_mm_counter_fast(mm, MM_FILEPAGES);
+ inc_mm_counter_fast(mm, mm_counter_file(page));
page_add_file_rmap(page);
set_pte_at(mm, addr, pte, mk_pte(page, prot));
@@ -1547,7 +1530,7 @@ int vm_insert_page(struct vm_area_struct *vma, unsigned long addr,
EXPORT_SYMBOL(vm_insert_page);
static int insert_pfn(struct vm_area_struct *vma, unsigned long addr,
- unsigned long pfn, pgprot_t prot)
+ pfn_t pfn, pgprot_t prot)
{
struct mm_struct *mm = vma->vm_mm;
int retval;
@@ -1563,7 +1546,10 @@ static int insert_pfn(struct vm_area_struct *vma, unsigned long addr,
goto out_unlock;
/* Ok, finally just insert the thing.. */
- entry = pte_mkspecial(pfn_pte(pfn, prot));
+ if (pfn_t_devmap(pfn))
+ entry = pte_mkdevmap(pfn_t_pte(pfn, prot));
+ else
+ entry = pte_mkspecial(pfn_t_pte(pfn, prot));
set_pte_at(mm, addr, pte, entry);
update_mmu_cache(vma, addr, pte); /* XXX: why not for insert_page? */
@@ -1610,17 +1596,17 @@ int vm_insert_pfn(struct vm_area_struct *vma, unsigned long addr,
if (addr < vma->vm_start || addr >= vma->vm_end)
return -EFAULT;
- if (track_pfn_insert(vma, &pgprot, pfn))
+ if (track_pfn_insert(vma, &pgprot, __pfn_to_pfn_t(pfn, PFN_DEV)))
return -EINVAL;
- ret = insert_pfn(vma, addr, pfn, pgprot);
+ ret = insert_pfn(vma, addr, __pfn_to_pfn_t(pfn, PFN_DEV), pgprot);
return ret;
}
EXPORT_SYMBOL(vm_insert_pfn);
int vm_insert_mixed(struct vm_area_struct *vma, unsigned long addr,
- unsigned long pfn)
+ pfn_t pfn)
{
BUG_ON(!(vma->vm_flags & VM_MIXEDMAP));
@@ -1634,10 +1620,15 @@ int vm_insert_mixed(struct vm_area_struct *vma, unsigned long addr,
* than insert_pfn). If a zero_pfn were inserted into a VM_MIXEDMAP
* without pte special, it would there be refcounted as a normal page.
*/
- if (!HAVE_PTE_SPECIAL && pfn_valid(pfn)) {
+ if (!HAVE_PTE_SPECIAL && !pfn_t_devmap(pfn) && pfn_t_valid(pfn)) {
struct page *page;
- page = pfn_to_page(pfn);
+ /*
+ * At this point we are committed to insert_page()
+ * regardless of whether the caller specified flags that
+ * result in pfn_t_has_page() == false.
+ */
+ page = pfn_to_page(pfn_t_to_pfn(pfn));
return insert_page(vma, addr, page, vma->vm_page_prot);
}
return insert_pfn(vma, addr, pfn, vma->vm_page_prot);
@@ -1981,6 +1972,20 @@ static inline void cow_user_page(struct page *dst, struct page *src, unsigned lo
}
}
+static gfp_t __get_fault_gfp_mask(struct vm_area_struct *vma)
+{
+ struct file *vm_file = vma->vm_file;
+
+ if (vm_file)
+ return mapping_gfp_mask(vm_file->f_mapping) | __GFP_FS | __GFP_IO;
+
+ /*
+ * Special mappings (e.g. VDSO) do not have any file so fake
+ * a default GFP_KERNEL for them.
+ */
+ return GFP_KERNEL;
+}
+
/*
* Notify the address space that the page is about to become writable so that
* it can prohibit this or wait for the page to get into an appropriate state.
@@ -1996,6 +2001,7 @@ static int do_page_mkwrite(struct vm_area_struct *vma, struct page *page,
vmf.virtual_address = (void __user *)(address & PAGE_MASK);
vmf.pgoff = page->index;
vmf.flags = FAULT_FLAG_WRITE|FAULT_FLAG_MKWRITE;
+ vmf.gfp_mask = __get_fault_gfp_mask(vma);
vmf.page = page;
vmf.cow_page = NULL;
@@ -2067,7 +2073,7 @@ static inline int wp_page_reuse(struct mm_struct *mm,
}
if (!page_mkwrite)
- vma_file_update_time(vma);
+ file_update_time(vma->vm_file);
}
return VM_FAULT_WRITE;
@@ -2116,7 +2122,7 @@ static int wp_page_copy(struct mm_struct *mm, struct vm_area_struct *vma,
cow_user_page(new_page, old_page, address, vma);
}
- if (mem_cgroup_try_charge(new_page, mm, GFP_KERNEL, &memcg))
+ if (mem_cgroup_try_charge(new_page, mm, GFP_KERNEL, &memcg, false))
goto oom_free_new;
__SetPageUptodate(new_page);
@@ -2130,7 +2136,8 @@ static int wp_page_copy(struct mm_struct *mm, struct vm_area_struct *vma,
if (likely(pte_same(*page_table, orig_pte))) {
if (old_page) {
if (!PageAnon(old_page)) {
- dec_mm_counter_fast(mm, MM_FILEPAGES);
+ dec_mm_counter_fast(mm,
+ mm_counter_file(old_page));
inc_mm_counter_fast(mm, MM_ANONPAGES);
}
uksm_bugon_zeropage(orig_pte);
@@ -2148,8 +2155,8 @@ static int wp_page_copy(struct mm_struct *mm, struct vm_area_struct *vma,
* thread doing COW.
*/
ptep_clear_flush_notify(vma, address, page_table);
- page_add_new_anon_rmap(new_page, vma, address);
- mem_cgroup_commit_charge(new_page, memcg, false);
+ page_add_new_anon_rmap(new_page, vma, address, false);
+ mem_cgroup_commit_charge(new_page, memcg, false, false);
lru_cache_add_active_or_unevictable(new_page, vma);
/*
* We call the notify macro here because, when using secondary
@@ -2181,14 +2188,14 @@ static int wp_page_copy(struct mm_struct *mm, struct vm_area_struct *vma,
* mapcount is visible. So transitively, TLBs to
* old page will be flushed before it can be reused.
*/
- page_remove_rmap(old_page);
+ page_remove_rmap(old_page, false);
}
/* Free the old page.. */
new_page = old_page;
page_copied = 1;
} else {
- mem_cgroup_cancel_charge(new_page, memcg);
+ mem_cgroup_cancel_charge(new_page, memcg, false);
}
if (new_page)
@@ -2203,7 +2210,8 @@ static int wp_page_copy(struct mm_struct *mm, struct vm_area_struct *vma,
*/
if (page_copied && (vma->vm_flags & VM_LOCKED)) {
lock_page(old_page); /* LRU manipulation */
- munlock_vma_page(old_page);
+ if (PageMlocked(old_page))
+ munlock_vma_page(old_page);
unlock_page(old_page);
}
page_cache_release(old_page);
@@ -2263,11 +2271,6 @@ static int wp_page_shared(struct mm_struct *mm, struct vm_area_struct *vma,
page_cache_get(old_page);
- /*
- * Only catch write-faults on shared writable pages,
- * read-only shared pages can get COWed by
- * get_user_pages(.write=1, .force=1).
- */
if (vma->vm_ops && vma->vm_ops->page_mkwrite) {
int tmp;
@@ -2563,7 +2566,7 @@ static int do_swap_page(struct mm_struct *mm, struct vm_area_struct *vma,
goto out_page;
}
- if (mem_cgroup_try_charge(page, mm, GFP_KERNEL, &memcg)) {
+ if (mem_cgroup_try_charge(page, mm, GFP_KERNEL, &memcg, false)) {
ret = VM_FAULT_OOM;
goto out_page;
}
@@ -2597,7 +2600,7 @@ static int do_swap_page(struct mm_struct *mm, struct vm_area_struct *vma,
pte = maybe_mkwrite(pte_mkdirty(pte), vma);
flags &= ~FAULT_FLAG_WRITE;
ret |= VM_FAULT_WRITE;
- exclusive = 1;
+ exclusive = RMAP_EXCLUSIVE;
}
flush_icache_page(vma, page);
if (pte_swp_soft_dirty(orig_pte))
@@ -2605,15 +2608,16 @@ static int do_swap_page(struct mm_struct *mm, struct vm_area_struct *vma,
set_pte_at(mm, address, page_table, pte);
if (page == swapcache) {
do_page_add_anon_rmap(page, vma, address, exclusive);
- mem_cgroup_commit_charge(page, memcg, true);
+ mem_cgroup_commit_charge(page, memcg, true, false);
} else { /* ksm created a completely new copy */
- page_add_new_anon_rmap(page, vma, address);
- mem_cgroup_commit_charge(page, memcg, false);
+ page_add_new_anon_rmap(page, vma, address, false);
+ mem_cgroup_commit_charge(page, memcg, false, false);
lru_cache_add_active_or_unevictable(page, vma);
}
swap_free(entry);
- if (vm_swap_full() || (vma->vm_flags & VM_LOCKED) || PageMlocked(page))
+ if (mem_cgroup_swap_full(page) ||
+ (vma->vm_flags & VM_LOCKED) || PageMlocked(page))
try_to_free_swap(page);
unlock_page(page);
if (page != swapcache) {
@@ -2643,7 +2647,7 @@ unlock:
out:
return ret;
out_nomap:
- mem_cgroup_cancel_charge(page, memcg);
+ mem_cgroup_cancel_charge(page, memcg, false);
pte_unmap_unlock(page_table, ptl);
out_page:
unlock_page(page);
@@ -2737,7 +2741,7 @@ static int do_anonymous_page(struct mm_struct *mm, struct vm_area_struct *vma,
if (!page)
goto oom;
- if (mem_cgroup_try_charge(page, mm, GFP_KERNEL, &memcg))
+ if (mem_cgroup_try_charge(page, mm, GFP_KERNEL, &memcg, false))
goto oom_free_page;
/*
@@ -2758,15 +2762,15 @@ static int do_anonymous_page(struct mm_struct *mm, struct vm_area_struct *vma,
/* Deliver the page fault to userland, check inside PT lock */
if (userfaultfd_missing(vma)) {
pte_unmap_unlock(page_table, ptl);
- mem_cgroup_cancel_charge(page, memcg);
+ mem_cgroup_cancel_charge(page, memcg, false);
page_cache_release(page);
return handle_userfault(vma, address, flags,
VM_UFFD_MISSING);
}
inc_mm_counter_fast(mm, MM_ANONPAGES);
- page_add_new_anon_rmap(page, vma, address);
- mem_cgroup_commit_charge(page, memcg, false);
+ page_add_new_anon_rmap(page, vma, address, false);
+ mem_cgroup_commit_charge(page, memcg, false, false);
lru_cache_add_active_or_unevictable(page, vma);
setpte:
set_pte_at(mm, address, page_table, entry);
@@ -2777,7 +2781,7 @@ unlock:
pte_unmap_unlock(page_table, ptl);
return 0;
release:
- mem_cgroup_cancel_charge(page, memcg);
+ mem_cgroup_cancel_charge(page, memcg, false);
page_cache_release(page);
goto unlock;
oom_free_page:
@@ -2802,6 +2806,7 @@ static int __do_fault(struct vm_area_struct *vma, unsigned long address,
vmf.pgoff = pgoff;
vmf.flags = flags;
vmf.page = NULL;
+ vmf.gfp_mask = __get_fault_gfp_mask(vma);
vmf.cow_page = cow_page;
ret = vma->vm_ops->fault(vma, &vmf);
@@ -2853,9 +2858,9 @@ void do_set_pte(struct vm_area_struct *vma, unsigned long address,
entry = maybe_mkwrite(pte_mkdirty(entry), vma);
if (anon) {
inc_mm_counter_fast(vma->vm_mm, MM_ANONPAGES);
- page_add_new_anon_rmap(page, vma, address);
+ page_add_new_anon_rmap(page, vma, address, false);
} else {
- inc_mm_counter_fast(vma->vm_mm, MM_FILEPAGES);
+ inc_mm_counter_fast(vma->vm_mm, mm_counter_file(page));
page_add_file_rmap(page);
}
set_pte_at(vma->vm_mm, address, pte, entry);
@@ -2968,6 +2973,7 @@ static void do_fault_around(struct vm_area_struct *vma, unsigned long address,
vmf.pgoff = pgoff;
vmf.max_pgoff = max_pgoff;
vmf.flags = flags;
+ vmf.gfp_mask = __get_fault_gfp_mask(vma);
vma->vm_ops->map_pages(vma, &vmf);
}
@@ -3028,7 +3034,7 @@ static int do_cow_fault(struct mm_struct *mm, struct vm_area_struct *vma,
if (!new_page)
return VM_FAULT_OOM;
- if (mem_cgroup_try_charge(new_page, mm, GFP_KERNEL, &memcg)) {
+ if (mem_cgroup_try_charge(new_page, mm, GFP_KERNEL, &memcg, false)) {
page_cache_release(new_page);
return VM_FAULT_OOM;
}
@@ -3057,7 +3063,7 @@ static int do_cow_fault(struct mm_struct *mm, struct vm_area_struct *vma,
goto uncharge_out;
}
do_set_pte(vma, address, new_page, pte, true, true);
- mem_cgroup_commit_charge(new_page, memcg, false);
+ mem_cgroup_commit_charge(new_page, memcg, false, false);
lru_cache_add_active_or_unevictable(new_page, vma);
pte_unmap_unlock(pte, ptl);
if (fault_page) {
@@ -3072,7 +3078,7 @@ static int do_cow_fault(struct mm_struct *mm, struct vm_area_struct *vma,
}
return ret;
uncharge_out:
- mem_cgroup_cancel_charge(new_page, memcg);
+ mem_cgroup_cancel_charge(new_page, memcg, false);
page_cache_release(new_page);
return ret;
}
@@ -3124,7 +3130,7 @@ static int do_shared_fault(struct mm_struct *mm, struct vm_area_struct *vma,
* pinned by vma->vm_file's reference. We rely on unlock_page()'s
* release semantics to prevent the compiler from undoing this copying.
*/
- mapping = fault_page->mapping;
+ mapping = page_rmapping(fault_page);
unlock_page(fault_page);
if ((dirtied || vma->vm_ops->page_mkwrite) && mapping) {
/*
@@ -3226,6 +3232,12 @@ static int do_numa_page(struct mm_struct *mm, struct vm_area_struct *vma,
return 0;
}
+ /* TODO: handle PTE-mapped THP */
+ if (PageCompound(page)) {
+ pte_unmap_unlock(ptep, ptl);
+ return 0;
+ }
+
/*
* Avoid grouping on RO pages in general. RO pages shouldn't hurt as
* much anyway since they can be in shared cache state. This misses
@@ -3398,17 +3410,9 @@ static int __handle_mm_fault(struct mm_struct *mm, struct vm_area_struct *vma,
int ret;
barrier();
- if (pmd_trans_huge(orig_pmd)) {
+ if (pmd_trans_huge(orig_pmd) || pmd_devmap(orig_pmd)) {
unsigned int dirty = flags & FAULT_FLAG_WRITE;
- /*
- * If the pmd is splitting, return and retry the
- * the fault. Alternative: wait until the split
- * is done, and goto retry.
- */
- if (pmd_trans_splitting(orig_pmd))
- return 0;
-
if (pmd_protnone(orig_pmd))
return do_huge_pmd_numa_page(mm, vma, address,
orig_pmd, pmd);
@@ -3445,7 +3449,7 @@ static int __handle_mm_fault(struct mm_struct *mm, struct vm_area_struct *vma,
* through an atomic read in C, which is what pmd_trans_unstable()
* provides.
*/
- if (unlikely(pmd_trans_unstable(pmd)))
+ if (unlikely(pmd_trans_unstable(pmd) || pmd_devmap(*pmd)))
return 0;
/*
* A regular pmd is established and it can't morph into a huge pmd