summaryrefslogtreecommitdiff
path: root/mm
diff options
context:
space:
mode:
Diffstat (limited to 'mm')
-rw-r--r--mm/debug.c6
-rw-r--r--mm/page_alloc.c12
-rw-r--r--mm/percpu.c18
-rw-r--r--mm/shmem.c31
-rw-r--r--mm/slub.c4
-rw-r--r--mm/swapfile.c64
-rw-r--r--mm/vmscan.c23
7 files changed, 138 insertions, 20 deletions
diff --git a/mm/debug.c b/mm/debug.c
index f05b2d5d6..5c6da0ffd 100644
--- a/mm/debug.c
+++ b/mm/debug.c
@@ -40,6 +40,12 @@ static const struct trace_print_flags pageflag_names[] = {
#ifdef CONFIG_MEMORY_FAILURE
{1UL << PG_hwpoison, "hwpoison" },
#endif
+#ifdef CONFIG_TOI_INCREMENTAL
+ {1UL << PG_toi_untracked, "toi_untracked" },
+ {1UL << PG_toi_ro, "toi_ro" },
+ {1UL << PG_toi_cbw, "toi_cbw" },
+ {1UL << PG_toi_dirty, "toi_dirty" },
+#endif
#if defined(CONFIG_IDLE_PAGE_TRACKING) && defined(CONFIG_64BIT)
{1UL << PG_young, "young" },
{1UL << PG_idle, "idle" },
diff --git a/mm/page_alloc.c b/mm/page_alloc.c
index 838ca8bb6..58bc48250 100644
--- a/mm/page_alloc.c
+++ b/mm/page_alloc.c
@@ -62,6 +62,7 @@
#include <linux/hugetlb.h>
#include <linux/sched/rt.h>
#include <linux/page_owner.h>
+#include <linux/tuxonice.h>
#include <linux/kthread.h>
#include <asm/sections.h>
@@ -751,6 +752,12 @@ static inline int free_pages_check(struct page *page)
if (unlikely(page->mem_cgroup))
bad_reason = "page still charged to cgroup";
#endif
+ if (unlikely(PageTOI_Untracked(page))) {
+ // Make it writable and included in image if allocated.
+ ClearPageTOI_Untracked(page);
+ // If it gets allocated, it will be dirty from TOI's POV.
+ SetPageTOI_Dirty(page);
+ }
if (unlikely(bad_reason)) {
bad_page(page, bad_reason, bad_flags);
return 1;
@@ -1390,6 +1397,11 @@ static int prep_new_page(struct page *page, unsigned int order, gfp_t gfp_flags,
struct page *p = page + i;
if (unlikely(check_new_page(p)))
return 1;
+ if (unlikely(toi_incremental_support() && gfp_flags & ___GFP_TOI_NOTRACK)) {
+ // Make the page writable if it's protected, and set it to be untracked.
+ SetPageTOI_Untracked(p);
+ toi_make_writable(init_mm.pgd, (unsigned long) page_address(p));
+ }
}
set_page_private(page, 0);
diff --git a/mm/percpu.c b/mm/percpu.c
index 998607adf..2f040d0b8 100644
--- a/mm/percpu.c
+++ b/mm/percpu.c
@@ -125,6 +125,7 @@ static int pcpu_nr_units __read_mostly;
static int pcpu_atom_size __read_mostly;
static int pcpu_nr_slots __read_mostly;
static size_t pcpu_chunk_struct_size __read_mostly;
+static int pcpu_pfns;
/* cpus with the lowest and highest unit addresses */
static unsigned int pcpu_low_unit_cpu __read_mostly;
@@ -1790,6 +1791,7 @@ static struct pcpu_alloc_info * __init pcpu_build_alloc_info(
/* calculate size_sum and ensure dyn_size is enough for early alloc */
size_sum = PFN_ALIGN(static_size + reserved_size +
max_t(size_t, dyn_size, PERCPU_DYNAMIC_EARLY_SIZE));
+ pcpu_pfns = PFN_DOWN(size_sum);
dyn_size = size_sum - static_size - reserved_size;
/*
@@ -2277,6 +2279,22 @@ void __init percpu_init_late(void)
}
}
+#ifdef CONFIG_TOI_INCREMENTAL
+/*
+ * It doesn't matter if we mark an extra page as untracked (and therefore
+ * always save it in incremental images).
+ */
+void toi_mark_per_cpus_pages_untracked(void)
+{
+ int i;
+
+ struct page *page = virt_to_page(pcpu_base_addr);
+
+ for (i = 0; i < pcpu_pfns; i++)
+ SetPageTOI_Untracked(page + i);
+}
+#endif
+
/*
* Percpu allocator is initialized early during boot when neither slab or
* workqueue is available. Plug async management until everything is up
diff --git a/mm/shmem.c b/mm/shmem.c
index 440e2a7e6..f64ab5f8c 100644
--- a/mm/shmem.c
+++ b/mm/shmem.c
@@ -1500,7 +1500,7 @@ static int shmem_mmap(struct file *file, struct vm_area_struct *vma)
}
static struct inode *shmem_get_inode(struct super_block *sb, const struct inode *dir,
- umode_t mode, dev_t dev, unsigned long flags)
+ umode_t mode, dev_t dev, unsigned long flags, int atomic_copy)
{
struct inode *inode;
struct shmem_inode_info *info;
@@ -1521,6 +1521,8 @@ static struct inode *shmem_get_inode(struct super_block *sb, const struct inode
spin_lock_init(&info->lock);
info->seals = F_SEAL_SEAL;
info->flags = flags & VM_NORESERVE;
+ if (atomic_copy)
+ inode->i_flags |= S_ATOMIC_COPY;
INIT_LIST_HEAD(&info->swaplist);
simple_xattrs_init(&info->xattrs);
cache_no_acl(inode);
@@ -2310,7 +2312,7 @@ shmem_mknod(struct inode *dir, struct dentry *dentry, umode_t mode, dev_t dev)
struct inode *inode;
int error = -ENOSPC;
- inode = shmem_get_inode(dir->i_sb, dir, mode, dev, VM_NORESERVE);
+ inode = shmem_get_inode(dir->i_sb, dir, mode, dev, VM_NORESERVE, 0);
if (inode) {
error = simple_acl_create(dir, inode);
if (error)
@@ -2339,7 +2341,7 @@ shmem_tmpfile(struct inode *dir, struct dentry *dentry, umode_t mode)
struct inode *inode;
int error = -ENOSPC;
- inode = shmem_get_inode(dir->i_sb, dir, mode, 0, VM_NORESERVE);
+ inode = shmem_get_inode(dir->i_sb, dir, mode, 0, VM_NORESERVE, 0);
if (inode) {
error = security_inode_init_security(inode, dir,
NULL,
@@ -2531,7 +2533,7 @@ static int shmem_symlink(struct inode *dir, struct dentry *dentry, const char *s
if (len > PAGE_CACHE_SIZE)
return -ENAMETOOLONG;
- inode = shmem_get_inode(dir->i_sb, dir, S_IFLNK|S_IRWXUGO, 0, VM_NORESERVE);
+ inode = shmem_get_inode(dir->i_sb, dir, S_IFLNK|S_IRWXUGO, 0, VM_NORESERVE, 0);
if (!inode)
return -ENOSPC;
@@ -3010,7 +3012,7 @@ SYSCALL_DEFINE2(memfd_create,
goto err_name;
}
- file = shmem_file_setup(name, 0, VM_NORESERVE);
+ file = shmem_file_setup(name, 0, VM_NORESERVE, 0);
if (IS_ERR(file)) {
error = PTR_ERR(file);
goto err_fd;
@@ -3101,7 +3103,7 @@ int shmem_fill_super(struct super_block *sb, void *data, int silent)
sb->s_flags |= MS_POSIXACL;
#endif
- inode = shmem_get_inode(sb, NULL, S_IFDIR | sbinfo->mode, 0, VM_NORESERVE);
+ inode = shmem_get_inode(sb, NULL, S_IFDIR | sbinfo->mode, 0, VM_NORESERVE, 0);
if (!inode)
goto failed;
inode->i_uid = sbinfo->uid;
@@ -3357,7 +3359,7 @@ EXPORT_SYMBOL_GPL(shmem_truncate_range);
#define shmem_vm_ops generic_file_vm_ops
#define shmem_file_operations ramfs_file_operations
-#define shmem_get_inode(sb, dir, mode, dev, flags) ramfs_get_inode(sb, dir, mode, dev)
+#define shmem_get_inode(sb, dir, mode, dev, flags, atomic_copy) ramfs_get_inode(sb, dir, mode, dev)
#define shmem_acct_size(flags, size) 0
#define shmem_unacct_size(flags, size) do {} while (0)
@@ -3370,7 +3372,8 @@ static struct dentry_operations anon_ops = {
};
static struct file *__shmem_file_setup(const char *name, loff_t size,
- unsigned long flags, unsigned int i_flags)
+ unsigned long flags, unsigned int i_flags,
+ int atomic_copy)
{
struct file *res;
struct inode *inode;
@@ -3399,7 +3402,7 @@ static struct file *__shmem_file_setup(const char *name, loff_t size,
d_set_d_op(path.dentry, &anon_ops);
res = ERR_PTR(-ENOSPC);
- inode = shmem_get_inode(sb, NULL, S_IFREG | S_IRWXUGO, 0, flags);
+ inode = shmem_get_inode(sb, NULL, S_IFREG | S_IRWXUGO, 0, flags, atomic_copy);
if (!inode)
goto put_memory;
@@ -3435,9 +3438,9 @@ put_path:
* @size: size to be set for the file
* @flags: VM_NORESERVE suppresses pre-accounting of the entire object size
*/
-struct file *shmem_kernel_file_setup(const char *name, loff_t size, unsigned long flags)
+struct file *shmem_kernel_file_setup(const char *name, loff_t size, unsigned long flags, int atomic_copy)
{
- return __shmem_file_setup(name, size, flags, S_PRIVATE);
+ return __shmem_file_setup(name, size, flags, S_PRIVATE, atomic_copy);
}
/**
@@ -3446,9 +3449,9 @@ struct file *shmem_kernel_file_setup(const char *name, loff_t size, unsigned lon
* @size: size to be set for the file
* @flags: VM_NORESERVE suppresses pre-accounting of the entire object size
*/
-struct file *shmem_file_setup(const char *name, loff_t size, unsigned long flags)
+struct file *shmem_file_setup(const char *name, loff_t size, unsigned long flags, int atomic_copy)
{
- return __shmem_file_setup(name, size, flags, 0);
+ return __shmem_file_setup(name, size, flags, 0, atomic_copy);
}
EXPORT_SYMBOL_GPL(shmem_file_setup);
@@ -3467,7 +3470,7 @@ int shmem_zero_setup(struct vm_area_struct *vma)
* accessible to the user through its mapping, use S_PRIVATE flag to
* bypass file security, in the same way as shmem_kernel_file_setup().
*/
- file = __shmem_file_setup("dev/zero", size, vma->vm_flags, S_PRIVATE);
+ file = __shmem_file_setup("dev/zero", size, vma->vm_flags, S_PRIVATE, 0);
if (IS_ERR(file))
return PTR_ERR(file);
diff --git a/mm/slub.c b/mm/slub.c
index d8fbd4a6e..5328e64b9 100644
--- a/mm/slub.c
+++ b/mm/slub.c
@@ -1379,7 +1379,7 @@ static inline struct page *alloc_slab_page(struct kmem_cache *s,
struct page *page;
int order = oo_order(oo);
- flags |= __GFP_NOTRACK;
+ flags |= (__GFP_NOTRACK | ___GFP_TOI_NOTRACK);
if (node == NUMA_NO_NODE)
page = alloc_pages(flags, order);
@@ -3543,7 +3543,7 @@ static void *kmalloc_large_node(size_t size, gfp_t flags, int node)
struct page *page;
void *ptr = NULL;
- flags |= __GFP_COMP | __GFP_NOTRACK;
+ flags |= __GFP_COMP | __GFP_NOTRACK | __GFP_TOI_NOTRACK;
page = alloc_kmem_pages_node(node, flags, get_order(size));
if (page)
ptr = page_address(page);
diff --git a/mm/swapfile.c b/mm/swapfile.c
index d2c37365e..98d348347 100644
--- a/mm/swapfile.c
+++ b/mm/swapfile.c
@@ -9,6 +9,7 @@
#include <linux/hugetlb.h>
#include <linux/mman.h>
#include <linux/slab.h>
+#include <linux/export.h>
#include <linux/kernel_stat.h>
#include <linux/swap.h>
#include <linux/vmalloc.h>
@@ -43,7 +44,6 @@
static bool swap_count_continued(struct swap_info_struct *, pgoff_t,
unsigned char);
static void free_swap_count_continuations(struct swap_info_struct *);
-static sector_t map_swap_entry(swp_entry_t, struct block_device**);
DEFINE_SPINLOCK(swap_lock);
static unsigned int nr_swapfiles;
@@ -719,6 +719,60 @@ swp_entry_t get_swap_page_of_type(int type)
return (swp_entry_t) {0};
}
+static unsigned int find_next_to_unuse(struct swap_info_struct *si,
+ unsigned int prev, bool frontswap);
+
+void get_swap_range_of_type(int type, swp_entry_t *start, swp_entry_t *end,
+ unsigned int limit)
+{
+ struct swap_info_struct *si;
+ pgoff_t start_at;
+ unsigned int i;
+
+ *start = swp_entry(0, 0);
+ *end = swp_entry(0, 0);
+ si = swap_info[type];
+ spin_lock(&si->lock);
+ if (si && (si->flags & SWP_WRITEOK)) {
+ atomic_long_dec(&nr_swap_pages);
+ /* This is called for allocating swap entry, not cache */
+ start_at = scan_swap_map(si, 1);
+ if (start_at) {
+ unsigned long stop_at = find_next_to_unuse(si, start_at, 0);
+ if (stop_at > start_at)
+ stop_at--;
+ else
+ stop_at = si->max - 1;
+ if (stop_at - start_at + 1 > limit)
+ stop_at = min_t(unsigned int,
+ start_at + limit - 1,
+ si->max - 1);
+ /* Mark them used */
+ for (i = start_at; i <= stop_at; i++)
+ si->swap_map[i] = 1;
+ /* first page already done above */
+ si->inuse_pages += stop_at - start_at;
+
+ atomic_long_sub(stop_at - start_at, &nr_swap_pages);
+ if (start_at == si->lowest_bit)
+ si->lowest_bit = stop_at + 1;
+ if (stop_at == si->highest_bit)
+ si->highest_bit = start_at - 1;
+ if (si->inuse_pages == si->pages) {
+ si->lowest_bit = si->max;
+ si->highest_bit = 0;
+ }
+ for (i = start_at + 1; i <= stop_at; i++)
+ inc_cluster_info_page(si, si->cluster_info, i);
+ si->cluster_next = stop_at + 1;
+ *start = swp_entry(type, start_at);
+ *end = swp_entry(type, stop_at);
+ } else
+ atomic_long_inc(&nr_swap_pages);
+ }
+ spin_unlock(&si->lock);
+}
+
static struct swap_info_struct *swap_info_get(swp_entry_t entry)
{
struct swap_info_struct *p;
@@ -1607,7 +1661,7 @@ static void drain_mmlist(void)
* Note that the type of this function is sector_t, but it returns page offset
* into the bdev, not sector offset.
*/
-static sector_t map_swap_entry(swp_entry_t entry, struct block_device **bdev)
+sector_t map_swap_entry(swp_entry_t entry, struct block_device **bdev)
{
struct swap_info_struct *sis;
struct swap_extent *start_se;
@@ -2738,8 +2792,14 @@ pgoff_t __page_file_index(struct page *page)
VM_BUG_ON_PAGE(!PageSwapCache(page), page);
return swp_offset(swap);
}
+
EXPORT_SYMBOL_GPL(__page_file_index);
+struct swap_info_struct *get_swap_info_struct(unsigned type)
+{
+ return swap_info[type];
+}
+
/*
* add_swap_count_continuation - called when a swap count is duplicated
* beyond SWAP_MAP_MAX, it allocates a new page and links that to the entry's
diff --git a/mm/vmscan.c b/mm/vmscan.c
index 71b1c2994..f406a4765 100644
--- a/mm/vmscan.c
+++ b/mm/vmscan.c
@@ -1475,7 +1475,7 @@ static int too_many_isolated(struct zone *zone, int file,
{
unsigned long inactive, isolated;
- if (current_is_kswapd())
+ if (current_is_kswapd() || sc->hibernation_mode)
return 0;
if (!sane_reclaim(sc))
@@ -2345,6 +2345,9 @@ static inline bool should_continue_reclaim(struct zone *zone,
unsigned long pages_for_compaction;
unsigned long inactive_lru_pages;
+ if (nr_reclaimed && nr_scanned && sc->nr_to_reclaim >= sc->nr_reclaimed)
+ return true;
+
/* If not in reclaim/compaction mode, stop */
if (!in_reclaim_compaction(sc))
return false;
@@ -2659,6 +2662,12 @@ static unsigned long do_try_to_free_pages(struct zonelist *zonelist,
unsigned long total_scanned = 0;
unsigned long writeback_threshold;
bool zones_reclaimable;
+
+#ifdef CONFIG_FREEZER
+ if (unlikely(pm_freezing && !sc->hibernation_mode))
+ return 0;
+#endif
+
retry:
delayacct_freepages_start();
@@ -3540,6 +3549,11 @@ void wakeup_kswapd(struct zone *zone, int order, enum zone_type classzone_idx)
if (!populated_zone(zone))
return;
+#ifdef CONFIG_FREEZER
+ if (pm_freezing)
+ return;
+#endif
+
if (!cpuset_zone_allowed(zone, GFP_KERNEL | __GFP_HARDWALL))
return;
pgdat = zone->zone_pgdat;
@@ -3565,7 +3579,7 @@ void wakeup_kswapd(struct zone *zone, int order, enum zone_type classzone_idx)
* LRU order by reclaiming preferentially
* inactive > active > active referenced > active mapped
*/
-unsigned long shrink_all_memory(unsigned long nr_to_reclaim)
+unsigned long shrink_memory_mask(unsigned long nr_to_reclaim, gfp_t mask)
{
struct reclaim_state reclaim_state;
struct scan_control sc = {
@@ -3594,6 +3608,11 @@ unsigned long shrink_all_memory(unsigned long nr_to_reclaim)
return nr_reclaimed;
}
+
+unsigned long shrink_all_memory(unsigned long nr_to_reclaim)
+{
+ return shrink_memory_mask(nr_to_reclaim, GFP_HIGHUSER_MOVABLE);
+}
#endif /* CONFIG_HIBERNATION */
/* It's optimal to keep kswapds on the same CPUs as their memory, but