7 files changed, 138 insertions, 20 deletions
diff --git a/mm/debug.c b/mm/debug.c
index f05b2d5d6..5c6da0ffd 100644
--- a/mm/debug.c
+++ b/mm/debug.c
@@ -40,6 +40,12 @@ static const struct trace_print_flags pageflag_names[] = {
 #ifdef CONFIG_MEMORY_FAILURE
 	{1UL << PG_hwpoison,		"hwpoison"	},
 #endif
+#ifdef CONFIG_TOI_INCREMENTAL
+	{1UL << PG_toi_untracked,	"toi_untracked"	},
+	{1UL << PG_toi_ro,		"toi_ro"	},
+	{1UL << PG_toi_cbw,		"toi_cbw"	},
+	{1UL << PG_toi_dirty,		"toi_dirty"	},
+#endif
 #if defined(CONFIG_IDLE_PAGE_TRACKING) && defined(CONFIG_64BIT)
 	{1UL << PG_young,		"young"		},
 	{1UL << PG_idle,		"idle"		},
diff --git a/mm/page_alloc.c b/mm/page_alloc.c
index 838ca8bb6..58bc48250 100644
--- a/mm/page_alloc.c
+++ b/mm/page_alloc.c
@@ -62,6 +62,7 @@
 #include <linux/hugetlb.h>
 #include <linux/sched/rt.h>
 #include <linux/page_owner.h>
+#include <linux/tuxonice.h>
 #include <linux/kthread.h>
 
 #include <asm/sections.h>
@@ -751,6 +752,12 @@ static inline int free_pages_check(struct page *page)
 	if (unlikely(page->mem_cgroup))
 		bad_reason = "page still charged to cgroup";
 #endif
+        if (unlikely(PageTOI_Untracked(page))) {
+            // Make it writable and included in image if allocated.
+            ClearPageTOI_Untracked(page);
+            // If it gets allocated, it will be dirty from TOI's POV.
+            SetPageTOI_Dirty(page);
+        }
 	if (unlikely(bad_reason)) {
 		bad_page(page, bad_reason, bad_flags);
 		return 1;
@@ -1390,6 +1397,11 @@ static int prep_new_page(struct page *page, unsigned int order, gfp_t gfp_flags,
 		struct page *p = page + i;
 		if (unlikely(check_new_page(p)))
 			return 1;
+                if (unlikely(toi_incremental_support() && gfp_flags & ___GFP_TOI_NOTRACK)) {
+                    // Make the page writable if it's protected, and set it to be untracked.
+                    SetPageTOI_Untracked(p);
+                    toi_make_writable(init_mm.pgd, (unsigned long) page_address(p));
+                }
 	}
 
 	set_page_private(page, 0);
diff --git a/mm/percpu.c b/mm/percpu.c
index 998607adf..2f040d0b8 100644
--- a/mm/percpu.c
+++ b/mm/percpu.c
@@ -125,6 +125,7 @@ static int pcpu_nr_units __read_mostly;
 static int pcpu_atom_size __read_mostly;
 static int pcpu_nr_slots __read_mostly;
 static size_t pcpu_chunk_struct_size __read_mostly;
+static int pcpu_pfns;
 
 /* cpus with the lowest and highest unit addresses */
 static unsigned int pcpu_low_unit_cpu __read_mostly;
@@ -1790,6 +1791,7 @@ static struct pcpu_alloc_info * __init pcpu_build_alloc_info(
 	/* calculate size_sum and ensure dyn_size is enough for early alloc */
 	size_sum = PFN_ALIGN(static_size + reserved_size +
 			    max_t(size_t, dyn_size, PERCPU_DYNAMIC_EARLY_SIZE));
+        pcpu_pfns = PFN_DOWN(size_sum);
 	dyn_size = size_sum - static_size - reserved_size;
 
 	/*
@@ -2277,6 +2279,22 @@ void __init percpu_init_late(void)
 	}
 }
 
+#ifdef CONFIG_TOI_INCREMENTAL
+/*
+ * It doesn't matter if we mark an extra page as untracked (and therefore
+ * always save it in incremental images).
+ */
+void toi_mark_per_cpus_pages_untracked(void)
+{
+    int i;
+
+    struct page *page = virt_to_page(pcpu_base_addr);
+
+    for (i = 0; i < pcpu_pfns; i++)
+        SetPageTOI_Untracked(page + i);
+}
+#endif
+
 /*
  * Percpu allocator is initialized early during boot when neither slab or
  * workqueue is available.  Plug async management until everything is up
diff --git a/mm/shmem.c b/mm/shmem.c
index 440e2a7e6..f64ab5f8c 100644
--- a/mm/shmem.c
+++ b/mm/shmem.c
@@ -1500,7 +1500,7 @@ static int shmem_mmap(struct file *file, struct vm_area_struct *vma)
 }
 
 static struct inode *shmem_get_inode(struct super_block *sb, const struct inode *dir,
-				     umode_t mode, dev_t dev, unsigned long flags)
+				     umode_t mode, dev_t dev, unsigned long flags, int atomic_copy)
 {
 	struct inode *inode;
 	struct shmem_inode_info *info;
@@ -1521,6 +1521,8 @@ static struct inode *shmem_get_inode(struct super_block *sb, const struct inode
 		spin_lock_init(&info->lock);
 		info->seals = F_SEAL_SEAL;
 		info->flags = flags & VM_NORESERVE;
+		if (atomic_copy)
+			inode->i_flags |= S_ATOMIC_COPY;
 		INIT_LIST_HEAD(&info->swaplist);
 		simple_xattrs_init(&info->xattrs);
 		cache_no_acl(inode);
@@ -2310,7 +2312,7 @@ shmem_mknod(struct inode *dir, struct dentry *dentry, umode_t mode, dev_t dev)
 	struct inode *inode;
 	int error = -ENOSPC;
 
-	inode = shmem_get_inode(dir->i_sb, dir, mode, dev, VM_NORESERVE);
+	inode = shmem_get_inode(dir->i_sb, dir, mode, dev, VM_NORESERVE, 0);
 	if (inode) {
 		error = simple_acl_create(dir, inode);
 		if (error)
@@ -2339,7 +2341,7 @@ shmem_tmpfile(struct inode *dir, struct dentry *dentry, umode_t mode)
 	struct inode *inode;
 	int error = -ENOSPC;
 
-	inode = shmem_get_inode(dir->i_sb, dir, mode, 0, VM_NORESERVE);
+	inode = shmem_get_inode(dir->i_sb, dir, mode, 0, VM_NORESERVE, 0);
 	if (inode) {
 		error = security_inode_init_security(inode, dir,
 						     NULL,
@@ -2531,7 +2533,7 @@ static int shmem_symlink(struct inode *dir, struct dentry *dentry, const char *s
 	if (len > PAGE_CACHE_SIZE)
 		return -ENAMETOOLONG;
 
-	inode = shmem_get_inode(dir->i_sb, dir, S_IFLNK|S_IRWXUGO, 0, VM_NORESERVE);
+	inode = shmem_get_inode(dir->i_sb, dir, S_IFLNK|S_IRWXUGO, 0, VM_NORESERVE, 0);
 	if (!inode)
 		return -ENOSPC;
 
@@ -3010,7 +3012,7 @@ SYSCALL_DEFINE2(memfd_create,
 		goto err_name;
 	}
 
-	file = shmem_file_setup(name, 0, VM_NORESERVE);
+	file = shmem_file_setup(name, 0, VM_NORESERVE, 0);
 	if (IS_ERR(file)) {
 		error = PTR_ERR(file);
 		goto err_fd;
@@ -3101,7 +3103,7 @@ int shmem_fill_super(struct super_block *sb, void *data, int silent)
 	sb->s_flags |= MS_POSIXACL;
 #endif
 
-	inode = shmem_get_inode(sb, NULL, S_IFDIR | sbinfo->mode, 0, VM_NORESERVE);
+	inode = shmem_get_inode(sb, NULL, S_IFDIR | sbinfo->mode, 0, VM_NORESERVE, 0);
 	if (!inode)
 		goto failed;
 	inode->i_uid = sbinfo->uid;
@@ -3357,7 +3359,7 @@ EXPORT_SYMBOL_GPL(shmem_truncate_range);
 
 #define shmem_vm_ops				generic_file_vm_ops
 #define shmem_file_operations			ramfs_file_operations
-#define shmem_get_inode(sb, dir, mode, dev, flags)	ramfs_get_inode(sb, dir, mode, dev)
+#define shmem_get_inode(sb, dir, mode, dev, flags, atomic_copy)	ramfs_get_inode(sb, dir, mode, dev)
 #define shmem_acct_size(flags, size)		0
 #define shmem_unacct_size(flags, size)		do {} while (0)
 
@@ -3370,7 +3372,8 @@ static struct dentry_operations anon_ops = {
 };
 
 static struct file *__shmem_file_setup(const char *name, loff_t size,
-				       unsigned long flags, unsigned int i_flags)
+				       unsigned long flags, unsigned int i_flags,
+				       int atomic_copy)
 {
 	struct file *res;
 	struct inode *inode;
@@ -3399,7 +3402,7 @@ static struct file *__shmem_file_setup(const char *name, loff_t size,
 	d_set_d_op(path.dentry, &anon_ops);
 
 	res = ERR_PTR(-ENOSPC);
-	inode = shmem_get_inode(sb, NULL, S_IFREG | S_IRWXUGO, 0, flags);
+	inode = shmem_get_inode(sb, NULL, S_IFREG | S_IRWXUGO, 0, flags, atomic_copy);
 	if (!inode)
 		goto put_memory;
 
@@ -3435,9 +3438,9 @@ put_path:
  * @size: size to be set for the file
  * @flags: VM_NORESERVE suppresses pre-accounting of the entire object size
  */
-struct file *shmem_kernel_file_setup(const char *name, loff_t size, unsigned long flags)
+struct file *shmem_kernel_file_setup(const char *name, loff_t size, unsigned long flags, int atomic_copy)
 {
-	return __shmem_file_setup(name, size, flags, S_PRIVATE);
+	return __shmem_file_setup(name, size, flags, S_PRIVATE, atomic_copy);
 }
 
 /**
@@ -3446,9 +3449,9 @@ struct file *shmem_kernel_file_setup(const char *name, loff_t size, unsigned lon
  * @size: size to be set for the file
  * @flags: VM_NORESERVE suppresses pre-accounting of the entire object size
  */
-struct file *shmem_file_setup(const char *name, loff_t size, unsigned long flags)
+struct file *shmem_file_setup(const char *name, loff_t size, unsigned long flags, int atomic_copy)
 {
-	return __shmem_file_setup(name, size, flags, 0);
+	return __shmem_file_setup(name, size, flags, 0, atomic_copy);
 }
 EXPORT_SYMBOL_GPL(shmem_file_setup);
 
@@ -3467,7 +3470,7 @@ int shmem_zero_setup(struct vm_area_struct *vma)
 	 * accessible to the user through its mapping, use S_PRIVATE flag to
 	 * bypass file security, in the same way as shmem_kernel_file_setup().
 	 */
-	file = __shmem_file_setup("dev/zero", size, vma->vm_flags, S_PRIVATE);
+	file = __shmem_file_setup("dev/zero", size, vma->vm_flags, S_PRIVATE, 0);
 	if (IS_ERR(file))
 		return PTR_ERR(file);
 
diff --git a/mm/slub.c b/mm/slub.c
index d8fbd4a6e..5328e64b9 100644
--- a/mm/slub.c
+++ b/mm/slub.c
@@ -1379,7 +1379,7 @@ static inline struct page *alloc_slab_page(struct kmem_cache *s,
 	struct page *page;
 	int order = oo_order(oo);
 
-	flags |= __GFP_NOTRACK;
+	flags |= (__GFP_NOTRACK | ___GFP_TOI_NOTRACK);
 
 	if (node == NUMA_NO_NODE)
 		page = alloc_pages(flags, order);
@@ -3543,7 +3543,7 @@ static void *kmalloc_large_node(size_t size, gfp_t flags, int node)
 	struct page *page;
 	void *ptr = NULL;
 
-	flags |= __GFP_COMP | __GFP_NOTRACK;
+	flags |= __GFP_COMP | __GFP_NOTRACK | __GFP_TOI_NOTRACK;
 	page = alloc_kmem_pages_node(node, flags, get_order(size));
 	if (page)
 		ptr = page_address(page);
diff --git a/mm/swapfile.c b/mm/swapfile.c
index d2c37365e..98d348347 100644
--- a/mm/swapfile.c
+++ b/mm/swapfile.c
@@ -9,6 +9,7 @@
 #include <linux/hugetlb.h>
 #include <linux/mman.h>
 #include <linux/slab.h>
+#include <linux/export.h>
 #include <linux/kernel_stat.h>
 #include <linux/swap.h>
 #include <linux/vmalloc.h>
@@ -43,7 +44,6 @@
 static bool swap_count_continued(struct swap_info_struct *, pgoff_t,
 				 unsigned char);
 static void free_swap_count_continuations(struct swap_info_struct *);
-static sector_t map_swap_entry(swp_entry_t, struct block_device**);
 
 DEFINE_SPINLOCK(swap_lock);
 static unsigned int nr_swapfiles;
@@ -719,6 +719,60 @@ swp_entry_t get_swap_page_of_type(int type)
 	return (swp_entry_t) {0};
 }
 
+static unsigned int find_next_to_unuse(struct swap_info_struct *si,
+					unsigned int prev, bool frontswap);
+
+void get_swap_range_of_type(int type, swp_entry_t *start, swp_entry_t *end,
+		unsigned int limit)
+{
+	struct swap_info_struct *si;
+	pgoff_t start_at;
+	unsigned int i;
+
+	*start = swp_entry(0, 0);
+	*end = swp_entry(0, 0);
+	si = swap_info[type];
+	spin_lock(&si->lock);
+	if (si && (si->flags & SWP_WRITEOK)) {
+		atomic_long_dec(&nr_swap_pages);
+		/* This is called for allocating swap entry, not cache */
+		start_at = scan_swap_map(si, 1);
+		if (start_at) {
+			unsigned long stop_at = find_next_to_unuse(si, start_at, 0);
+			if (stop_at > start_at)
+				stop_at--;
+			else
+				stop_at = si->max - 1;
+			if (stop_at - start_at + 1 > limit)
+				stop_at = min_t(unsigned int,
+						start_at + limit - 1,
+						si->max - 1);
+			/* Mark them used */
+			for (i = start_at; i <= stop_at; i++)
+				si->swap_map[i] = 1;
+			/* first page already done above */
+			si->inuse_pages += stop_at - start_at;
+
+			atomic_long_sub(stop_at - start_at, &nr_swap_pages);
+			if (start_at == si->lowest_bit)
+				si->lowest_bit = stop_at + 1;
+			if (stop_at == si->highest_bit)
+				si->highest_bit = start_at - 1;
+			if (si->inuse_pages == si->pages) {
+				si->lowest_bit = si->max;
+				si->highest_bit = 0;
+			}
+			for (i = start_at + 1; i <= stop_at; i++)
+				inc_cluster_info_page(si, si->cluster_info, i);
+			si->cluster_next = stop_at + 1;
+			*start = swp_entry(type, start_at);
+			*end = swp_entry(type, stop_at);
+		} else
+			atomic_long_inc(&nr_swap_pages);
+	}
+	spin_unlock(&si->lock);
+}
+
 static struct swap_info_struct *swap_info_get(swp_entry_t entry)
 {
 	struct swap_info_struct *p;
@@ -1607,7 +1661,7 @@ static void drain_mmlist(void)
  * Note that the type of this function is sector_t, but it returns page offset
  * into the bdev, not sector offset.
  */
-static sector_t map_swap_entry(swp_entry_t entry, struct block_device **bdev)
+sector_t map_swap_entry(swp_entry_t entry, struct block_device **bdev)
 {
 	struct swap_info_struct *sis;
 	struct swap_extent *start_se;
@@ -2738,8 +2792,14 @@ pgoff_t __page_file_index(struct page *page)
 	VM_BUG_ON_PAGE(!PageSwapCache(page), page);
 	return swp_offset(swap);
 }
+
 EXPORT_SYMBOL_GPL(__page_file_index);
 
+struct swap_info_struct *get_swap_info_struct(unsigned type)
+{
+	return swap_info[type];
+}
+
 /*
  * add_swap_count_continuation - called when a swap count is duplicated
  * beyond SWAP_MAP_MAX, it allocates a new page and links that to the entry's
diff --git a/mm/vmscan.c b/mm/vmscan.c
index 71b1c2994..f406a4765 100644
--- a/mm/vmscan.c
+++ b/mm/vmscan.c
@@ -1475,7 +1475,7 @@ static int too_many_isolated(struct zone *zone, int file,
 {
 	unsigned long inactive, isolated;
 
-	if (current_is_kswapd())
+	if (current_is_kswapd() || sc->hibernation_mode)
 		return 0;
 
 	if (!sane_reclaim(sc))
@@ -2345,6 +2345,9 @@ static inline bool should_continue_reclaim(struct zone *zone,
 	unsigned long pages_for_compaction;
 	unsigned long inactive_lru_pages;
 
+	if (nr_reclaimed && nr_scanned && sc->nr_to_reclaim >= sc->nr_reclaimed)
+		return true;
+
 	/* If not in reclaim/compaction mode, stop */
 	if (!in_reclaim_compaction(sc))
 		return false;
@@ -2659,6 +2662,12 @@ static unsigned long do_try_to_free_pages(struct zonelist *zonelist,
 	unsigned long total_scanned = 0;
 	unsigned long writeback_threshold;
 	bool zones_reclaimable;
+
+#ifdef CONFIG_FREEZER
+	if (unlikely(pm_freezing && !sc->hibernation_mode))
+		return 0;
+#endif
+
 retry:
 	delayacct_freepages_start();
 
@@ -3540,6 +3549,11 @@ void wakeup_kswapd(struct zone *zone, int order, enum zone_type classzone_idx)
 	if (!populated_zone(zone))
 		return;
 
+#ifdef CONFIG_FREEZER
+	if (pm_freezing)
+		return;
+#endif
+
 	if (!cpuset_zone_allowed(zone, GFP_KERNEL | __GFP_HARDWALL))
 		return;
 	pgdat = zone->zone_pgdat;
@@ -3565,7 +3579,7 @@ void wakeup_kswapd(struct zone *zone, int order, enum zone_type classzone_idx)
  * LRU order by reclaiming preferentially
  * inactive > active > active referenced > active mapped
  */
-unsigned long shrink_all_memory(unsigned long nr_to_reclaim)
+unsigned long shrink_memory_mask(unsigned long nr_to_reclaim, gfp_t mask)
 {
 	struct reclaim_state reclaim_state;
 	struct scan_control sc = {
@@ -3594,6 +3608,11 @@ unsigned long shrink_all_memory(unsigned long nr_to_reclaim)
 
 	return nr_reclaimed;
 }
+
+unsigned long shrink_all_memory(unsigned long nr_to_reclaim)
+{
+	return shrink_memory_mask(nr_to_reclaim, GFP_HIGHUSER_MOVABLE);
+}
 #endif /* CONFIG_HIBERNATION */
 
 /* It's optimal to keep kswapds on the same CPUs as their memory, but