summaryrefslogtreecommitdiff
path: root/mm/mmap.c
diff options
context:
space:
mode:
Diffstat (limited to 'mm/mmap.c')
-rw-r--r--mm/mmap.c103
1 files changed, 90 insertions, 13 deletions
diff --git a/mm/mmap.c b/mm/mmap.c
index b7f391c8d..1f631e0b9 100644
--- a/mm/mmap.c
+++ b/mm/mmap.c
@@ -25,6 +25,7 @@
#include <linux/personality.h>
#include <linux/security.h>
#include <linux/hugetlb.h>
+#include <linux/shmem_fs.h>
#include <linux/profile.h>
#include <linux/export.h>
#include <linux/mount.h>
@@ -43,6 +44,7 @@
#include <linux/userfaultfd_k.h>
#include <linux/moduleparam.h>
#include <linux/pkeys.h>
+#include <linux/ksm.h>
#include <asm/uaccess.h>
#include <asm/cacheflush.h>
@@ -164,6 +166,7 @@ static struct vm_area_struct *remove_vma(struct vm_area_struct *vma)
if (vma->vm_file)
vma_fput(vma);
mpol_put(vma_policy(vma));
+ uksm_remove_vma(vma);
kmem_cache_free(vm_area_cachep, vma);
return next;
}
@@ -620,7 +623,6 @@ int vma_adjust(struct vm_area_struct *vma, unsigned long start,
{
struct mm_struct *mm = vma->vm_mm;
struct vm_area_struct *next = vma->vm_next;
- struct vm_area_struct *importer = NULL;
struct address_space *mapping = NULL;
struct rb_root *root = NULL;
struct anon_vma *anon_vma = NULL;
@@ -629,18 +631,33 @@ int vma_adjust(struct vm_area_struct *vma, unsigned long start,
long adjust_next = 0;
int remove_next = 0;
+/*
+ * to avoid deadlock, ksm_remove_vma must be done before any spin_lock is
+ * acquired
+ */
+ uksm_remove_vma(vma);
+
if (next && !insert) {
- struct vm_area_struct *exporter = NULL;
+ struct vm_area_struct *exporter = NULL, *importer = NULL;
+ uksm_remove_vma(next);
if (end >= next->vm_end) {
/*
* vma expands, overlapping all the next, and
* perhaps the one after too (mprotect case 6).
*/
-again: remove_next = 1 + (end > next->vm_end);
+ remove_next = 1 + (end > next->vm_end);
end = next->vm_end;
exporter = next;
importer = vma;
+
+ /*
+ * If next doesn't have anon_vma, import from vma after
+ * next, if the vma overlaps with it.
+ */
+ if (remove_next == 2 && next && !next->anon_vma)
+ exporter = next->vm_next;
+
} else if (end > next->vm_start) {
/*
* vma expands, overlapping part of the next:
@@ -674,6 +691,8 @@ again: remove_next = 1 + (end > next->vm_end);
return error;
}
}
+again:
+ vma_adjust_trans_huge(vma, start, end, adjust_next);
if (file) {
mapping = file->f_mapping;
@@ -695,8 +714,6 @@ again: remove_next = 1 + (end > next->vm_end);
}
}
- vma_adjust_trans_huge(vma, start, end, adjust_next);
-
anon_vma = vma->anon_vma;
if (!anon_vma && adjust_next)
anon_vma = next->anon_vma;
@@ -725,6 +742,7 @@ again: remove_next = 1 + (end > next->vm_end);
end_changed = true;
}
vma->vm_pgoff = pgoff;
+
if (adjust_next) {
next->vm_start += adjust_next << PAGE_SHIFT;
next->vm_pgoff += adjust_next;
@@ -795,16 +813,24 @@ again: remove_next = 1 + (end > next->vm_end);
* up the code too much to do both in one go.
*/
next = vma->vm_next;
- if (remove_next == 2)
+ if (remove_next == 2) {
+ remove_next = 1;
+ end = next->vm_end;
+ uksm_remove_vma(next);
goto again;
- else if (next)
+ } else if (next) {
vma_gap_update(next);
- else
+ } else {
mm->highest_vm_end = end;
+ }
+ } else {
+ if (next && !insert)
+ uksm_vma_add_new(next);
}
if (insert && file)
uprobe_mmap(insert);
+ uksm_vma_add_new(vma);
validate_mm(mm);
return 0;
@@ -1196,6 +1222,9 @@ unsigned long do_mmap(struct file *file, unsigned long addr,
vm_flags |= calc_vm_prot_bits(prot, pkey) | calc_vm_flag_bits(flags) |
mm->def_flags | VM_MAYREAD | VM_MAYWRITE | VM_MAYEXEC;
+ /* If uksm is enabled, we add VM_MERGABLE to new VMAs. */
+ uksm_vm_flags_mod(&vm_flags);
+
if (flags & MAP_LOCKED)
if (!can_do_mlock())
return -EPERM;
@@ -1534,6 +1563,7 @@ unsigned long mmap_region(struct file *file, unsigned long addr,
allow_write_access(file);
}
file = vma->vm_file;
+ uksm_vma_add_new(vma);
out:
perf_event_mmap(vma);
@@ -1575,6 +1605,7 @@ allow_write_and_free_vma:
if (vm_flags & VM_DENYWRITE)
allow_write_access(file);
free_vma:
+ uksm_remove_vma(vma);
kmem_cache_free(vm_area_cachep, vma);
unacct_error:
if (charged)
@@ -1897,8 +1928,19 @@ get_unmapped_area(struct file *file, unsigned long addr, unsigned long len,
return -ENOMEM;
get_area = current->mm->get_unmapped_area;
- if (file && file->f_op->get_unmapped_area)
- get_area = file->f_op->get_unmapped_area;
+ if (file) {
+ if (file->f_op->get_unmapped_area)
+ get_area = file->f_op->get_unmapped_area;
+ } else if (flags & MAP_SHARED) {
+ /*
+ * mmap_region() will call shmem_zero_setup() to create a file,
+ * so use shmem's get_unmapped_area in case it can be huge.
+ * do_mmap_pgoff() will clear pgoff, so match alignment.
+ */
+ pgoff = 0;
+ get_area = shmem_get_unmapped_area;
+ }
+
addr = get_area(file, addr, len, pgoff, flags);
if (IS_ERR_VALUE(addr))
return addr;
@@ -2369,6 +2411,8 @@ static int __split_vma(struct mm_struct *mm, struct vm_area_struct *vma,
else
err = vma_adjust(vma, vma->vm_start, addr, vma->vm_pgoff, new);
+ uksm_vma_add_new(new);
+
/* Success. */
if (!err)
return 0;
@@ -2591,6 +2635,12 @@ SYSCALL_DEFINE5(remap_file_pages, unsigned long, start, unsigned long, size,
/* drop PG_Mlocked flag for over-mapped range */
for (tmp = vma; tmp->vm_start >= start + size;
tmp = tmp->vm_next) {
+ /*
+ * Split pmd and munlock page on the border
+ * of the range.
+ */
+ vma_adjust_trans_huge(tmp, start, start + size, 0);
+
munlock_vma_pages_range(tmp,
max(tmp->vm_start, start),
min(tmp->vm_end, start + size));
@@ -2642,20 +2692,23 @@ static inline void verify_mm_writelocked(struct mm_struct *mm)
* anonymous maps. eventually we may be able to do some
* brk-specific accounting here.
*/
-static int do_brk(unsigned long addr, unsigned long len)
+static int do_brk(unsigned long addr, unsigned long request)
{
struct mm_struct *mm = current->mm;
struct vm_area_struct *vma, *prev;
- unsigned long flags;
+ unsigned long flags, len;
struct rb_node **rb_link, *rb_parent;
pgoff_t pgoff = addr >> PAGE_SHIFT;
int error;
- len = PAGE_ALIGN(len);
+ len = PAGE_ALIGN(request);
+ if (len < request)
+ return -ENOMEM;
if (!len)
return 0;
flags = VM_DATA_DEFAULT_FLAGS | VM_ACCOUNT | mm->def_flags;
+ uksm_vm_flags_mod(&flags);
error = get_unmapped_area(NULL, addr, len, 0, MAP_FIXED);
if (offset_in_page(error))
@@ -2713,6 +2766,7 @@ static int do_brk(unsigned long addr, unsigned long len)
vma->vm_flags = flags;
vma->vm_page_prot = vm_get_page_prot(flags);
vma_link(mm, vma, prev, rb_link, rb_parent);
+ uksm_vma_add_new(vma);
out:
perf_event_mmap(vma);
mm->total_vm += len >> PAGE_SHIFT;
@@ -2751,6 +2805,12 @@ void exit_mmap(struct mm_struct *mm)
/* mm's last user has gone, and its about to be pulled down */
mmu_notifier_release(mm);
+ /*
+ * Taking write lock on mmap_sem does not harm others,
+ * but it's crucial for uksm to avoid races.
+ */
+ down_write(&mm->mmap_sem);
+
if (mm->locked_vm) {
vma = mm->mmap;
while (vma) {
@@ -2786,6 +2846,11 @@ void exit_mmap(struct mm_struct *mm)
vma = remove_vma(vma);
}
vm_unacct_memory(nr_accounted);
+
+ mm->mmap = NULL;
+ mm->mm_rb = RB_ROOT;
+ vmacache_invalidate(mm);
+ up_write(&mm->mmap_sem);
}
/* Insert vm structure into process list sorted by address
@@ -2895,6 +2960,7 @@ struct vm_area_struct *copy_vma(struct vm_area_struct **vmap,
new_vma->vm_ops->open(new_vma);
vma_link(mm, new_vma, prev, rb_link, rb_parent);
*need_rmap_locks = false;
+ uksm_vma_add_new(new_vma);
}
return new_vma;
@@ -2960,9 +3026,19 @@ static const char *special_mapping_name(struct vm_area_struct *vma)
return ((struct vm_special_mapping *)vma->vm_private_data)->name;
}
+static int special_mapping_mremap(struct vm_area_struct *new_vma)
+{
+ struct vm_special_mapping *sm = new_vma->vm_private_data;
+
+ if (sm->mremap)
+ return sm->mremap(sm, new_vma);
+ return 0;
+}
+
static const struct vm_operations_struct special_mapping_vmops = {
.close = special_mapping_close,
.fault = special_mapping_fault,
+ .mremap = special_mapping_mremap,
.name = special_mapping_name,
};
@@ -3032,6 +3108,7 @@ static struct vm_area_struct *__install_special_mapping(
vm_stat_account(mm, vma->vm_flags, len >> PAGE_SHIFT);
perf_event_mmap(vma);
+ uksm_vma_add_new(vma);
return vma;