From 376027f2a3888ef3dec73ee41f85d90e51162d78 Mon Sep 17 00:00:00 2001 From: André Fabian Silva Delgado Date: Tue, 8 Sep 2015 11:24:16 -0300 Subject: Add TuxOnIce support --- mm/debug.c | 6 ++++++ mm/page_alloc.c | 12 +++++++++++ mm/percpu.c | 18 ++++++++++++++++ mm/shmem.c | 31 +++++++++++++++------------- mm/slub.c | 4 ++-- mm/swapfile.c | 64 +++++++++++++++++++++++++++++++++++++++++++++++++++++++-- mm/vmscan.c | 23 +++++++++++++++++++-- 7 files changed, 138 insertions(+), 20 deletions(-) (limited to 'mm') diff --git a/mm/debug.c b/mm/debug.c index 76089ddf9..689f5e4b1 100644 --- a/mm/debug.c +++ b/mm/debug.c @@ -48,6 +48,12 @@ static const struct trace_print_flags pageflag_names[] = { #ifdef CONFIG_TRANSPARENT_HUGEPAGE {1UL << PG_compound_lock, "compound_lock" }, #endif +#ifdef CONFIG_TOI_INCREMENTAL + {1UL << PG_toi_untracked, "toi_untracked" }, + {1UL << PG_toi_ro, "toi_ro" }, + {1UL << PG_toi_cbw, "toi_cbw" }, + {1UL << PG_toi_dirty, "toi_dirty" }, +#endif }; static void dump_flags(unsigned long flags, diff --git a/mm/page_alloc.c b/mm/page_alloc.c index 5b5240b7f..3a167b135 100644 --- a/mm/page_alloc.c +++ b/mm/page_alloc.c @@ -61,6 +61,7 @@ #include #include #include +#include #include #include @@ -726,6 +727,12 @@ static inline int free_pages_check(struct page *page) if (unlikely(page->mem_cgroup)) bad_reason = "page still charged to cgroup"; #endif + if (unlikely(PageTOI_Untracked(page))) { + // Make it writable and included in image if allocated. + ClearPageTOI_Untracked(page); + // If it gets allocated, it will be dirty from TOI's POV. + SetPageTOI_Dirty(page); + } if (unlikely(bad_reason)) { bad_page(page, bad_reason, bad_flags); return 1; @@ -1324,6 +1331,11 @@ static int prep_new_page(struct page *page, unsigned int order, gfp_t gfp_flags, struct page *p = page + i; if (unlikely(check_new_page(p))) return 1; + if (unlikely(toi_incremental_support() && gfp_flags & ___GFP_TOI_NOTRACK)) { + // Make the page writable if it's protected, and set it to be untracked. + SetPageTOI_Untracked(p); + toi_make_writable(init_mm.pgd, (unsigned long) page_address(p)); + } } set_page_private(page, 0); diff --git a/mm/percpu.c b/mm/percpu.c index 2dd74487a..b4fe24569 100644 --- a/mm/percpu.c +++ b/mm/percpu.c @@ -125,6 +125,7 @@ static int pcpu_nr_units __read_mostly; static int pcpu_atom_size __read_mostly; static int pcpu_nr_slots __read_mostly; static size_t pcpu_chunk_struct_size __read_mostly; +static int pcpu_pfns; /* cpus with the lowest and highest unit addresses */ static unsigned int pcpu_low_unit_cpu __read_mostly; @@ -1795,6 +1796,7 @@ static struct pcpu_alloc_info * __init pcpu_build_alloc_info( /* calculate size_sum and ensure dyn_size is enough for early alloc */ size_sum = PFN_ALIGN(static_size + reserved_size + max_t(size_t, dyn_size, PERCPU_DYNAMIC_EARLY_SIZE)); + pcpu_pfns = PFN_DOWN(size_sum); dyn_size = size_sum - static_size - reserved_size; /* @@ -2282,6 +2284,22 @@ void __init percpu_init_late(void) } } +#ifdef CONFIG_TOI_INCREMENTAL +/* + * It doesn't matter if we mark an extra page as untracked (and therefore + * always save it in incremental images). + */ +void toi_mark_per_cpus_pages_untracked(void) +{ + int i; + + struct page *page = virt_to_page(pcpu_base_addr); + + for (i = 0; i < pcpu_pfns; i++) + SetPageTOI_Untracked(page + i); +} +#endif + /* * Percpu allocator is initialized early during boot when neither slab or * workqueue is available. Plug async management until everything is up diff --git a/mm/shmem.c b/mm/shmem.c index dbe0c1e83..d88bf98f0 100644 --- a/mm/shmem.c +++ b/mm/shmem.c @@ -1396,7 +1396,7 @@ static int shmem_mmap(struct file *file, struct vm_area_struct *vma) } static struct inode *shmem_get_inode(struct super_block *sb, const struct inode *dir, - umode_t mode, dev_t dev, unsigned long flags) + umode_t mode, dev_t dev, unsigned long flags, int atomic_copy) { struct inode *inode; struct shmem_inode_info *info; @@ -1417,6 +1417,8 @@ static struct inode *shmem_get_inode(struct super_block *sb, const struct inode spin_lock_init(&info->lock); info->seals = F_SEAL_SEAL; info->flags = flags & VM_NORESERVE; + if (atomic_copy) + inode->i_flags |= S_ATOMIC_COPY; INIT_LIST_HEAD(&info->swaplist); simple_xattrs_init(&info->xattrs); cache_no_acl(inode); @@ -2206,7 +2208,7 @@ shmem_mknod(struct inode *dir, struct dentry *dentry, umode_t mode, dev_t dev) struct inode *inode; int error = -ENOSPC; - inode = shmem_get_inode(dir->i_sb, dir, mode, dev, VM_NORESERVE); + inode = shmem_get_inode(dir->i_sb, dir, mode, dev, VM_NORESERVE, 0); if (inode) { error = simple_acl_create(dir, inode); if (error) @@ -2235,7 +2237,7 @@ shmem_tmpfile(struct inode *dir, struct dentry *dentry, umode_t mode) struct inode *inode; int error = -ENOSPC; - inode = shmem_get_inode(dir->i_sb, dir, mode, 0, VM_NORESERVE); + inode = shmem_get_inode(dir->i_sb, dir, mode, 0, VM_NORESERVE, 0); if (inode) { error = security_inode_init_security(inode, dir, NULL, @@ -2428,7 +2430,7 @@ static int shmem_symlink(struct inode *dir, struct dentry *dentry, const char *s if (len > PAGE_CACHE_SIZE) return -ENAMETOOLONG; - inode = shmem_get_inode(dir->i_sb, dir, S_IFLNK|S_IRWXUGO, 0, VM_NORESERVE); + inode = shmem_get_inode(dir->i_sb, dir, S_IFLNK|S_IRWXUGO, 0, VM_NORESERVE, 0); if (!inode) return -ENOSPC; @@ -2946,7 +2948,7 @@ SYSCALL_DEFINE2(memfd_create, goto err_name; } - file = shmem_file_setup(name, 0, VM_NORESERVE); + file = shmem_file_setup(name, 0, VM_NORESERVE, 0); if (IS_ERR(file)) { error = PTR_ERR(file); goto err_fd; @@ -3037,7 +3039,7 @@ int shmem_fill_super(struct super_block *sb, void *data, int silent) sb->s_flags |= MS_POSIXACL; #endif - inode = shmem_get_inode(sb, NULL, S_IFDIR | sbinfo->mode, 0, VM_NORESERVE); + inode = shmem_get_inode(sb, NULL, S_IFDIR | sbinfo->mode, 0, VM_NORESERVE, 0); if (!inode) goto failed; inode->i_uid = sbinfo->uid; @@ -3291,7 +3293,7 @@ EXPORT_SYMBOL_GPL(shmem_truncate_range); #define shmem_vm_ops generic_file_vm_ops #define shmem_file_operations ramfs_file_operations -#define shmem_get_inode(sb, dir, mode, dev, flags) ramfs_get_inode(sb, dir, mode, dev) +#define shmem_get_inode(sb, dir, mode, dev, flags, atomic_copy) ramfs_get_inode(sb, dir, mode, dev) #define shmem_acct_size(flags, size) 0 #define shmem_unacct_size(flags, size) do {} while (0) @@ -3304,7 +3306,8 @@ static struct dentry_operations anon_ops = { }; static struct file *__shmem_file_setup(const char *name, loff_t size, - unsigned long flags, unsigned int i_flags) + unsigned long flags, unsigned int i_flags, + int atomic_copy) { struct file *res; struct inode *inode; @@ -3333,7 +3336,7 @@ static struct file *__shmem_file_setup(const char *name, loff_t size, d_set_d_op(path.dentry, &anon_ops); res = ERR_PTR(-ENOSPC); - inode = shmem_get_inode(sb, NULL, S_IFREG | S_IRWXUGO, 0, flags); + inode = shmem_get_inode(sb, NULL, S_IFREG | S_IRWXUGO, 0, flags, atomic_copy); if (!inode) goto put_memory; @@ -3369,9 +3372,9 @@ put_path: * @size: size to be set for the file * @flags: VM_NORESERVE suppresses pre-accounting of the entire object size */ -struct file *shmem_kernel_file_setup(const char *name, loff_t size, unsigned long flags) +struct file *shmem_kernel_file_setup(const char *name, loff_t size, unsigned long flags, int atomic_copy) { - return __shmem_file_setup(name, size, flags, S_PRIVATE); + return __shmem_file_setup(name, size, flags, S_PRIVATE, atomic_copy); } /** @@ -3380,9 +3383,9 @@ struct file *shmem_kernel_file_setup(const char *name, loff_t size, unsigned lon * @size: size to be set for the file * @flags: VM_NORESERVE suppresses pre-accounting of the entire object size */ -struct file *shmem_file_setup(const char *name, loff_t size, unsigned long flags) +struct file *shmem_file_setup(const char *name, loff_t size, unsigned long flags, int atomic_copy) { - return __shmem_file_setup(name, size, flags, 0); + return __shmem_file_setup(name, size, flags, 0, atomic_copy); } EXPORT_SYMBOL_GPL(shmem_file_setup); @@ -3401,7 +3404,7 @@ int shmem_zero_setup(struct vm_area_struct *vma) * accessible to the user through its mapping, use S_PRIVATE flag to * bypass file security, in the same way as shmem_kernel_file_setup(). */ - file = __shmem_file_setup("dev/zero", size, vma->vm_flags, S_PRIVATE); + file = __shmem_file_setup("dev/zero", size, vma->vm_flags, S_PRIVATE, 0); if (IS_ERR(file)) return PTR_ERR(file); diff --git a/mm/slub.c b/mm/slub.c index f68c0e50f..b806d8d12 100644 --- a/mm/slub.c +++ b/mm/slub.c @@ -1315,7 +1315,7 @@ static inline struct page *alloc_slab_page(struct kmem_cache *s, struct page *page; int order = oo_order(oo); - flags |= __GFP_NOTRACK; + flags |= (__GFP_NOTRACK | ___GFP_TOI_NOTRACK); if (memcg_charge_slab(s, flags, order)) return NULL; @@ -3331,7 +3331,7 @@ static void *kmalloc_large_node(size_t size, gfp_t flags, int node) struct page *page; void *ptr = NULL; - flags |= __GFP_COMP | __GFP_NOTRACK; + flags |= __GFP_COMP | __GFP_NOTRACK | __GFP_TOI_NOTRACK; page = alloc_kmem_pages_node(node, flags, get_order(size)); if (page) ptr = page_address(page); diff --git a/mm/swapfile.c b/mm/swapfile.c index 41e4581af..3ce3f2978 100644 --- a/mm/swapfile.c +++ b/mm/swapfile.c @@ -9,6 +9,7 @@ #include #include #include +#include #include #include #include @@ -43,7 +44,6 @@ static bool swap_count_continued(struct swap_info_struct *, pgoff_t, unsigned char); static void free_swap_count_continuations(struct swap_info_struct *); -static sector_t map_swap_entry(swp_entry_t, struct block_device**); DEFINE_SPINLOCK(swap_lock); static unsigned int nr_swapfiles; @@ -722,6 +722,60 @@ swp_entry_t get_swap_page_of_type(int type) return (swp_entry_t) {0}; } +static unsigned int find_next_to_unuse(struct swap_info_struct *si, + unsigned int prev, bool frontswap); + +void get_swap_range_of_type(int type, swp_entry_t *start, swp_entry_t *end, + unsigned int limit) +{ + struct swap_info_struct *si; + pgoff_t start_at; + unsigned int i; + + *start = swp_entry(0, 0); + *end = swp_entry(0, 0); + si = swap_info[type]; + spin_lock(&si->lock); + if (si && (si->flags & SWP_WRITEOK)) { + atomic_long_dec(&nr_swap_pages); + /* This is called for allocating swap entry, not cache */ + start_at = scan_swap_map(si, 1); + if (start_at) { + unsigned long stop_at = find_next_to_unuse(si, start_at, 0); + if (stop_at > start_at) + stop_at--; + else + stop_at = si->max - 1; + if (stop_at - start_at + 1 > limit) + stop_at = min_t(unsigned int, + start_at + limit - 1, + si->max - 1); + /* Mark them used */ + for (i = start_at; i <= stop_at; i++) + si->swap_map[i] = 1; + /* first page already done above */ + si->inuse_pages += stop_at - start_at; + + atomic_long_sub(stop_at - start_at, &nr_swap_pages); + if (start_at == si->lowest_bit) + si->lowest_bit = stop_at + 1; + if (stop_at == si->highest_bit) + si->highest_bit = start_at - 1; + if (si->inuse_pages == si->pages) { + si->lowest_bit = si->max; + si->highest_bit = 0; + } + for (i = start_at + 1; i <= stop_at; i++) + inc_cluster_info_page(si, si->cluster_info, i); + si->cluster_next = stop_at + 1; + *start = swp_entry(type, start_at); + *end = swp_entry(type, stop_at); + } else + atomic_long_inc(&nr_swap_pages); + } + spin_unlock(&si->lock); +} + static struct swap_info_struct *swap_info_get(swp_entry_t entry) { struct swap_info_struct *p; @@ -1576,7 +1630,7 @@ static void drain_mmlist(void) * Note that the type of this function is sector_t, but it returns page offset * into the bdev, not sector offset. */ -static sector_t map_swap_entry(swp_entry_t entry, struct block_device **bdev) +sector_t map_swap_entry(swp_entry_t entry, struct block_device **bdev) { struct swap_info_struct *sis; struct swap_extent *start_se; @@ -2721,8 +2775,14 @@ pgoff_t __page_file_index(struct page *page) VM_BUG_ON_PAGE(!PageSwapCache(page), page); return swp_offset(swap); } + EXPORT_SYMBOL_GPL(__page_file_index); +struct swap_info_struct *get_swap_info_struct(unsigned type) +{ + return swap_info[type]; +} + /* * add_swap_count_continuation - called when a swap count is duplicated * beyond SWAP_MAP_MAX, it allocates a new page and links that to the entry's diff --git a/mm/vmscan.c b/mm/vmscan.c index 8286938c7..74eb7a5e7 100644 --- a/mm/vmscan.c +++ b/mm/vmscan.c @@ -1446,7 +1446,7 @@ static int too_many_isolated(struct zone *zone, int file, { unsigned long inactive, isolated; - if (current_is_kswapd()) + if (current_is_kswapd() || sc->hibernation_mode) return 0; if (!sane_reclaim(sc)) @@ -2295,6 +2295,9 @@ static inline bool should_continue_reclaim(struct zone *zone, unsigned long pages_for_compaction; unsigned long inactive_lru_pages; + if (nr_reclaimed && nr_scanned && sc->nr_to_reclaim >= sc->nr_reclaimed) + return true; + /* If not in reclaim/compaction mode, stop */ if (!in_reclaim_compaction(sc)) return false; @@ -2605,6 +2608,12 @@ static unsigned long do_try_to_free_pages(struct zonelist *zonelist, unsigned long total_scanned = 0; unsigned long writeback_threshold; bool zones_reclaimable; + +#ifdef CONFIG_FREEZER + if (unlikely(pm_freezing && !sc->hibernation_mode)) + return 0; +#endif + retry: delayacct_freepages_start(); @@ -3488,6 +3497,11 @@ void wakeup_kswapd(struct zone *zone, int order, enum zone_type classzone_idx) if (!populated_zone(zone)) return; +#ifdef CONFIG_FREEZER + if (pm_freezing) + return; +#endif + if (!cpuset_zone_allowed(zone, GFP_KERNEL | __GFP_HARDWALL)) return; pgdat = zone->zone_pgdat; @@ -3513,7 +3527,7 @@ void wakeup_kswapd(struct zone *zone, int order, enum zone_type classzone_idx) * LRU order by reclaiming preferentially * inactive > active > active referenced > active mapped */ -unsigned long shrink_all_memory(unsigned long nr_to_reclaim) +unsigned long shrink_memory_mask(unsigned long nr_to_reclaim, gfp_t mask) { struct reclaim_state reclaim_state; struct scan_control sc = { @@ -3542,6 +3556,11 @@ unsigned long shrink_all_memory(unsigned long nr_to_reclaim) return nr_reclaimed; } + +unsigned long shrink_all_memory(unsigned long nr_to_reclaim) +{ + return shrink_memory_mask(nr_to_reclaim, GFP_HIGHUSER_MOVABLE); +} #endif /* CONFIG_HIBERNATION */ /* It's optimal to keep kswapds on the same CPUs as their memory, but -- cgit v1.2.3-54-g00ecf