diff options
Diffstat (limited to 'mm/mmap.c')
-rw-r--r-- | mm/mmap.c | 103 |
1 files changed, 90 insertions, 13 deletions
@@ -25,6 +25,7 @@ #include <linux/personality.h> #include <linux/security.h> #include <linux/hugetlb.h> +#include <linux/shmem_fs.h> #include <linux/profile.h> #include <linux/export.h> #include <linux/mount.h> @@ -43,6 +44,7 @@ #include <linux/userfaultfd_k.h> #include <linux/moduleparam.h> #include <linux/pkeys.h> +#include <linux/ksm.h> #include <asm/uaccess.h> #include <asm/cacheflush.h> @@ -164,6 +166,7 @@ static struct vm_area_struct *remove_vma(struct vm_area_struct *vma) if (vma->vm_file) vma_fput(vma); mpol_put(vma_policy(vma)); + uksm_remove_vma(vma); kmem_cache_free(vm_area_cachep, vma); return next; } @@ -620,7 +623,6 @@ int vma_adjust(struct vm_area_struct *vma, unsigned long start, { struct mm_struct *mm = vma->vm_mm; struct vm_area_struct *next = vma->vm_next; - struct vm_area_struct *importer = NULL; struct address_space *mapping = NULL; struct rb_root *root = NULL; struct anon_vma *anon_vma = NULL; @@ -629,18 +631,33 @@ int vma_adjust(struct vm_area_struct *vma, unsigned long start, long adjust_next = 0; int remove_next = 0; +/* + * to avoid deadlock, ksm_remove_vma must be done before any spin_lock is + * acquired + */ + uksm_remove_vma(vma); + if (next && !insert) { - struct vm_area_struct *exporter = NULL; + struct vm_area_struct *exporter = NULL, *importer = NULL; + uksm_remove_vma(next); if (end >= next->vm_end) { /* * vma expands, overlapping all the next, and * perhaps the one after too (mprotect case 6). */ -again: remove_next = 1 + (end > next->vm_end); + remove_next = 1 + (end > next->vm_end); end = next->vm_end; exporter = next; importer = vma; + + /* + * If next doesn't have anon_vma, import from vma after + * next, if the vma overlaps with it. + */ + if (remove_next == 2 && next && !next->anon_vma) + exporter = next->vm_next; + } else if (end > next->vm_start) { /* * vma expands, overlapping part of the next: @@ -674,6 +691,8 @@ again: remove_next = 1 + (end > next->vm_end); return error; } } +again: + vma_adjust_trans_huge(vma, start, end, adjust_next); if (file) { mapping = file->f_mapping; @@ -695,8 +714,6 @@ again: remove_next = 1 + (end > next->vm_end); } } - vma_adjust_trans_huge(vma, start, end, adjust_next); - anon_vma = vma->anon_vma; if (!anon_vma && adjust_next) anon_vma = next->anon_vma; @@ -725,6 +742,7 @@ again: remove_next = 1 + (end > next->vm_end); end_changed = true; } vma->vm_pgoff = pgoff; + if (adjust_next) { next->vm_start += adjust_next << PAGE_SHIFT; next->vm_pgoff += adjust_next; @@ -795,16 +813,24 @@ again: remove_next = 1 + (end > next->vm_end); * up the code too much to do both in one go. */ next = vma->vm_next; - if (remove_next == 2) + if (remove_next == 2) { + remove_next = 1; + end = next->vm_end; + uksm_remove_vma(next); goto again; - else if (next) + } else if (next) { vma_gap_update(next); - else + } else { mm->highest_vm_end = end; + } + } else { + if (next && !insert) + uksm_vma_add_new(next); } if (insert && file) uprobe_mmap(insert); + uksm_vma_add_new(vma); validate_mm(mm); return 0; @@ -1196,6 +1222,9 @@ unsigned long do_mmap(struct file *file, unsigned long addr, vm_flags |= calc_vm_prot_bits(prot, pkey) | calc_vm_flag_bits(flags) | mm->def_flags | VM_MAYREAD | VM_MAYWRITE | VM_MAYEXEC; + /* If uksm is enabled, we add VM_MERGABLE to new VMAs. */ + uksm_vm_flags_mod(&vm_flags); + if (flags & MAP_LOCKED) if (!can_do_mlock()) return -EPERM; @@ -1534,6 +1563,7 @@ unsigned long mmap_region(struct file *file, unsigned long addr, allow_write_access(file); } file = vma->vm_file; + uksm_vma_add_new(vma); out: perf_event_mmap(vma); @@ -1575,6 +1605,7 @@ allow_write_and_free_vma: if (vm_flags & VM_DENYWRITE) allow_write_access(file); free_vma: + uksm_remove_vma(vma); kmem_cache_free(vm_area_cachep, vma); unacct_error: if (charged) @@ -1897,8 +1928,19 @@ get_unmapped_area(struct file *file, unsigned long addr, unsigned long len, return -ENOMEM; get_area = current->mm->get_unmapped_area; - if (file && file->f_op->get_unmapped_area) - get_area = file->f_op->get_unmapped_area; + if (file) { + if (file->f_op->get_unmapped_area) + get_area = file->f_op->get_unmapped_area; + } else if (flags & MAP_SHARED) { + /* + * mmap_region() will call shmem_zero_setup() to create a file, + * so use shmem's get_unmapped_area in case it can be huge. + * do_mmap_pgoff() will clear pgoff, so match alignment. + */ + pgoff = 0; + get_area = shmem_get_unmapped_area; + } + addr = get_area(file, addr, len, pgoff, flags); if (IS_ERR_VALUE(addr)) return addr; @@ -2369,6 +2411,8 @@ static int __split_vma(struct mm_struct *mm, struct vm_area_struct *vma, else err = vma_adjust(vma, vma->vm_start, addr, vma->vm_pgoff, new); + uksm_vma_add_new(new); + /* Success. */ if (!err) return 0; @@ -2591,6 +2635,12 @@ SYSCALL_DEFINE5(remap_file_pages, unsigned long, start, unsigned long, size, /* drop PG_Mlocked flag for over-mapped range */ for (tmp = vma; tmp->vm_start >= start + size; tmp = tmp->vm_next) { + /* + * Split pmd and munlock page on the border + * of the range. + */ + vma_adjust_trans_huge(tmp, start, start + size, 0); + munlock_vma_pages_range(tmp, max(tmp->vm_start, start), min(tmp->vm_end, start + size)); @@ -2642,20 +2692,23 @@ static inline void verify_mm_writelocked(struct mm_struct *mm) * anonymous maps. eventually we may be able to do some * brk-specific accounting here. */ -static int do_brk(unsigned long addr, unsigned long len) +static int do_brk(unsigned long addr, unsigned long request) { struct mm_struct *mm = current->mm; struct vm_area_struct *vma, *prev; - unsigned long flags; + unsigned long flags, len; struct rb_node **rb_link, *rb_parent; pgoff_t pgoff = addr >> PAGE_SHIFT; int error; - len = PAGE_ALIGN(len); + len = PAGE_ALIGN(request); + if (len < request) + return -ENOMEM; if (!len) return 0; flags = VM_DATA_DEFAULT_FLAGS | VM_ACCOUNT | mm->def_flags; + uksm_vm_flags_mod(&flags); error = get_unmapped_area(NULL, addr, len, 0, MAP_FIXED); if (offset_in_page(error)) @@ -2713,6 +2766,7 @@ static int do_brk(unsigned long addr, unsigned long len) vma->vm_flags = flags; vma->vm_page_prot = vm_get_page_prot(flags); vma_link(mm, vma, prev, rb_link, rb_parent); + uksm_vma_add_new(vma); out: perf_event_mmap(vma); mm->total_vm += len >> PAGE_SHIFT; @@ -2751,6 +2805,12 @@ void exit_mmap(struct mm_struct *mm) /* mm's last user has gone, and its about to be pulled down */ mmu_notifier_release(mm); + /* + * Taking write lock on mmap_sem does not harm others, + * but it's crucial for uksm to avoid races. + */ + down_write(&mm->mmap_sem); + if (mm->locked_vm) { vma = mm->mmap; while (vma) { @@ -2786,6 +2846,11 @@ void exit_mmap(struct mm_struct *mm) vma = remove_vma(vma); } vm_unacct_memory(nr_accounted); + + mm->mmap = NULL; + mm->mm_rb = RB_ROOT; + vmacache_invalidate(mm); + up_write(&mm->mmap_sem); } /* Insert vm structure into process list sorted by address @@ -2895,6 +2960,7 @@ struct vm_area_struct *copy_vma(struct vm_area_struct **vmap, new_vma->vm_ops->open(new_vma); vma_link(mm, new_vma, prev, rb_link, rb_parent); *need_rmap_locks = false; + uksm_vma_add_new(new_vma); } return new_vma; @@ -2960,9 +3026,19 @@ static const char *special_mapping_name(struct vm_area_struct *vma) return ((struct vm_special_mapping *)vma->vm_private_data)->name; } +static int special_mapping_mremap(struct vm_area_struct *new_vma) +{ + struct vm_special_mapping *sm = new_vma->vm_private_data; + + if (sm->mremap) + return sm->mremap(sm, new_vma); + return 0; +} + static const struct vm_operations_struct special_mapping_vmops = { .close = special_mapping_close, .fault = special_mapping_fault, + .mremap = special_mapping_mremap, .name = special_mapping_name, }; @@ -3032,6 +3108,7 @@ static struct vm_area_struct *__install_special_mapping( vm_stat_account(mm, vma->vm_flags, len >> PAGE_SHIFT); perf_event_mmap(vma); + uksm_vma_add_new(vma); return vma; |