diff options
Diffstat (limited to 'mm/compaction.c')
-rw-r--r-- | mm/compaction.c | 234 |
1 files changed, 124 insertions, 110 deletions
diff --git a/mm/compaction.c b/mm/compaction.c index 7bc04778f..9affb2908 100644 --- a/mm/compaction.c +++ b/mm/compaction.c @@ -15,11 +15,11 @@ #include <linux/backing-dev.h> #include <linux/sysctl.h> #include <linux/sysfs.h> -#include <linux/balloon_compaction.h> #include <linux/page-isolation.h> #include <linux/kasan.h> #include <linux/kthread.h> #include <linux/freezer.h> +#include <linux/page_owner.h> #include "internal.h" #ifdef CONFIG_COMPACTION @@ -65,13 +65,27 @@ static unsigned long release_freepages(struct list_head *freelist) static void map_pages(struct list_head *list) { - struct page *page; + unsigned int i, order, nr_pages; + struct page *page, *next; + LIST_HEAD(tmp_list); + + list_for_each_entry_safe(page, next, list, lru) { + list_del(&page->lru); + + order = page_private(page); + nr_pages = 1 << order; - list_for_each_entry(page, list, lru) { - arch_alloc_page(page, 0); - kernel_map_pages(page, 1, 1); - kasan_alloc_pages(page, 0); + post_alloc_hook(page, order, __GFP_MOVABLE); + if (order) + split_page(page, order); + + for (i = 0; i < nr_pages; i++) { + list_add(&page->lru, &tmp_list); + page++; + } } + + list_splice(&tmp_list, list); } static inline bool migrate_async_suitable(int migratetype) @@ -81,6 +95,44 @@ static inline bool migrate_async_suitable(int migratetype) #ifdef CONFIG_COMPACTION +int PageMovable(struct page *page) +{ + struct address_space *mapping; + + VM_BUG_ON_PAGE(!PageLocked(page), page); + if (!__PageMovable(page)) + return 0; + + mapping = page_mapping(page); + if (mapping && mapping->a_ops && mapping->a_ops->isolate_page) + return 1; + + return 0; +} +EXPORT_SYMBOL(PageMovable); + +void __SetPageMovable(struct page *page, struct address_space *mapping) +{ + VM_BUG_ON_PAGE(!PageLocked(page), page); + VM_BUG_ON_PAGE((unsigned long)mapping & PAGE_MAPPING_MOVABLE, page); + page->mapping = (void *)((unsigned long)mapping | PAGE_MAPPING_MOVABLE); +} +EXPORT_SYMBOL(__SetPageMovable); + +void __ClearPageMovable(struct page *page) +{ + VM_BUG_ON_PAGE(!PageLocked(page), page); + VM_BUG_ON_PAGE(!PageMovable(page), page); + /* + * Clear registered address_space val with keeping PAGE_MAPPING_MOVABLE + * flag so that VM can catch up released page by driver after isolation. + * With it, VM migration doesn't try to put it back. + */ + page->mapping = (void *)((unsigned long)page->mapping & + PAGE_MAPPING_MOVABLE); +} +EXPORT_SYMBOL(__ClearPageMovable); + /* Do not skip compaction more than 64 times */ #define COMPACT_MAX_DEFER_SHIFT 6 @@ -279,7 +331,7 @@ static bool compact_trylock_irqsave(spinlock_t *lock, unsigned long *flags, { if (cc->mode == MIGRATE_ASYNC) { if (!spin_trylock_irqsave(lock, *flags)) { - cc->contended = COMPACT_CONTENDED_LOCK; + cc->contended = true; return false; } } else { @@ -313,13 +365,13 @@ static bool compact_unlock_should_abort(spinlock_t *lock, } if (fatal_signal_pending(current)) { - cc->contended = COMPACT_CONTENDED_SCHED; + cc->contended = true; return true; } if (need_resched()) { if (cc->mode == MIGRATE_ASYNC) { - cc->contended = COMPACT_CONTENDED_SCHED; + cc->contended = true; return true; } cond_resched(); @@ -342,7 +394,7 @@ static inline bool compact_should_abort(struct compact_control *cc) /* async compaction aborts if contended */ if (need_resched()) { if (cc->mode == MIGRATE_ASYNC) { - cc->contended = COMPACT_CONTENDED_SCHED; + cc->contended = true; return true; } @@ -368,12 +420,13 @@ static unsigned long isolate_freepages_block(struct compact_control *cc, unsigned long flags = 0; bool locked = false; unsigned long blockpfn = *start_pfn; + unsigned int order; cursor = pfn_to_page(blockpfn); /* Isolate free pages. */ for (; blockpfn < end_pfn; blockpfn++, cursor++) { - int isolated, i; + int isolated; struct page *page = cursor; /* @@ -439,17 +492,17 @@ static unsigned long isolate_freepages_block(struct compact_control *cc, goto isolate_fail; } - /* Found a free page, break it into order-0 pages */ - isolated = split_free_page(page); + /* Found a free page, will break it into order-0 pages */ + order = page_order(page); + isolated = __isolate_free_page(page, order); if (!isolated) break; + set_page_private(page, order); total_isolated += isolated; cc->nr_freepages += isolated; - for (i = 0; i < isolated; i++) { - list_add(&page->lru, freelist); - page++; - } + list_add_tail(&page->lru, freelist); + if (!strict && cc->nr_migratepages <= cc->nr_freepages) { blockpfn += isolated; break; @@ -568,7 +621,7 @@ isolate_freepages_range(struct compact_control *cc, */ } - /* split_free_page does not map the pages */ + /* __isolate_free_page() does not map the pages */ map_pages(&freelist); if (pfn < end_pfn) { @@ -593,8 +646,8 @@ static void acct_isolated(struct zone *zone, struct compact_control *cc) list_for_each_entry(page, &cc->migratepages, lru) count[!!page_is_file_cache(page)]++; - mod_zone_page_state(zone, NR_ISOLATED_ANON, count[0]); - mod_zone_page_state(zone, NR_ISOLATED_FILE, count[1]); + mod_node_page_state(zone->zone_pgdat, NR_ISOLATED_ANON, count[0]); + mod_node_page_state(zone->zone_pgdat, NR_ISOLATED_FILE, count[1]); } /* Similar to reclaim, but different enough that they don't share logic */ @@ -602,12 +655,12 @@ static bool too_many_isolated(struct zone *zone) { unsigned long active, inactive, isolated; - inactive = zone_page_state(zone, NR_INACTIVE_FILE) + - zone_page_state(zone, NR_INACTIVE_ANON); - active = zone_page_state(zone, NR_ACTIVE_FILE) + - zone_page_state(zone, NR_ACTIVE_ANON); - isolated = zone_page_state(zone, NR_ISOLATED_FILE) + - zone_page_state(zone, NR_ISOLATED_ANON); + inactive = node_page_state(zone->zone_pgdat, NR_INACTIVE_FILE) + + node_page_state(zone->zone_pgdat, NR_INACTIVE_ANON); + active = node_page_state(zone->zone_pgdat, NR_ACTIVE_FILE) + + node_page_state(zone->zone_pgdat, NR_ACTIVE_ANON); + isolated = node_page_state(zone->zone_pgdat, NR_ISOLATED_FILE) + + node_page_state(zone->zone_pgdat, NR_ISOLATED_ANON); return isolated > (inactive + active) / 2; } @@ -670,7 +723,6 @@ isolate_migratepages_block(struct compact_control *cc, unsigned long low_pfn, /* Time to isolate some pages for migration */ for (; low_pfn < end_pfn; low_pfn++) { - bool is_lru; if (skip_on_failure && low_pfn >= next_skip_pfn) { /* @@ -700,7 +752,7 @@ isolate_migratepages_block(struct compact_control *cc, unsigned long low_pfn, * if contended. */ if (!(low_pfn % SWAP_CLUSTER_MAX) - && compact_unlock_should_abort(&zone->lru_lock, flags, + && compact_unlock_should_abort(zone_lru_lock(zone), flags, &locked, cc)) break; @@ -733,21 +785,6 @@ isolate_migratepages_block(struct compact_control *cc, unsigned long low_pfn, } /* - * Check may be lockless but that's ok as we recheck later. - * It's possible to migrate LRU pages and balloon pages - * Skip any other type of page - */ - is_lru = PageLRU(page); - if (!is_lru) { - if (unlikely(balloon_page_movable(page))) { - if (balloon_page_isolate(page)) { - /* Successfully isolated */ - goto isolate_success; - } - } - } - - /* * Regardless of being on LRU, compound pages such as THP and * hugetlbfs are not to be compacted. We can potentially save * a lot of iterations if we skip them at once. The check is @@ -763,8 +800,30 @@ isolate_migratepages_block(struct compact_control *cc, unsigned long low_pfn, goto isolate_fail; } - if (!is_lru) + /* + * Check may be lockless but that's ok as we recheck later. + * It's possible to migrate LRU and non-lru movable pages. + * Skip any other type of page + */ + if (!PageLRU(page)) { + /* + * __PageMovable can return false positive so we need + * to verify it under page_lock. + */ + if (unlikely(__PageMovable(page)) && + !PageIsolated(page)) { + if (locked) { + spin_unlock_irqrestore(zone_lru_lock(zone), + flags); + locked = false; + } + + if (isolate_movable_page(page, isolate_mode)) + goto isolate_success; + } + goto isolate_fail; + } /* * Migration will fail if an anonymous page is pinned in memory, @@ -777,7 +836,7 @@ isolate_migratepages_block(struct compact_control *cc, unsigned long low_pfn, /* If we already hold the lock, we can skip some rechecking */ if (!locked) { - locked = compact_trylock_irqsave(&zone->lru_lock, + locked = compact_trylock_irqsave(zone_lru_lock(zone), &flags, cc); if (!locked) break; @@ -797,7 +856,7 @@ isolate_migratepages_block(struct compact_control *cc, unsigned long low_pfn, } } - lruvec = mem_cgroup_page_lruvec(page, zone); + lruvec = mem_cgroup_page_lruvec(page, zone->zone_pgdat); /* Try isolate the page */ if (__isolate_lru_page(page, isolate_mode) != 0) @@ -840,7 +899,7 @@ isolate_fail: */ if (nr_isolated) { if (locked) { - spin_unlock_irqrestore(&zone->lru_lock, flags); + spin_unlock_irqrestore(zone_lru_lock(zone), flags); locked = false; } acct_isolated(zone, cc); @@ -868,7 +927,7 @@ isolate_fail: low_pfn = end_pfn; if (locked) - spin_unlock_irqrestore(&zone->lru_lock, flags); + spin_unlock_irqrestore(zone_lru_lock(zone), flags); /* * Update the pageblock-skip information and cached scanner pfn, @@ -1059,7 +1118,7 @@ static void isolate_freepages(struct compact_control *cc) } } - /* split_free_page does not map the pages */ + /* __isolate_free_page() does not map the pages */ map_pages(freelist); /* @@ -1141,7 +1200,7 @@ static isolate_migrate_t isolate_migratepages(struct zone *zone, struct page *page; const isolate_mode_t isolate_mode = (sysctl_compact_unevictable_allowed ? ISOLATE_UNEVICTABLE : 0) | - (cc->mode == MIGRATE_ASYNC ? ISOLATE_ASYNC_MIGRATE : 0); + (cc->mode != MIGRATE_SYNC ? ISOLATE_ASYNC_MIGRATE : 0); /* * Start at where we last stopped, or beginning of the zone as @@ -1560,14 +1619,11 @@ out: trace_mm_compaction_end(start_pfn, cc->migrate_pfn, cc->free_pfn, end_pfn, sync, ret); - if (ret == COMPACT_CONTENDED) - ret = COMPACT_PARTIAL; - return ret; } static enum compact_result compact_zone_order(struct zone *zone, int order, - gfp_t gfp_mask, enum migrate_mode mode, int *contended, + gfp_t gfp_mask, enum compact_priority prio, unsigned int alloc_flags, int classzone_idx) { enum compact_result ret; @@ -1577,7 +1633,8 @@ static enum compact_result compact_zone_order(struct zone *zone, int order, .order = order, .gfp_mask = gfp_mask, .zone = zone, - .mode = mode, + .mode = (prio == COMPACT_PRIO_ASYNC) ? + MIGRATE_ASYNC : MIGRATE_SYNC_LIGHT, .alloc_flags = alloc_flags, .classzone_idx = classzone_idx, .direct_compaction = true, @@ -1590,7 +1647,6 @@ static enum compact_result compact_zone_order(struct zone *zone, int order, VM_BUG_ON(!list_empty(&cc.freepages)); VM_BUG_ON(!list_empty(&cc.migratepages)); - *contended = cc.contended; return ret; } @@ -1603,50 +1659,38 @@ int sysctl_extfrag_threshold = 500; * @alloc_flags: The allocation flags of the current allocation * @ac: The context of current allocation * @mode: The migration mode for async, sync light, or sync migration - * @contended: Return value that determines if compaction was aborted due to - * need_resched() or lock contention * * This is the main entry point for direct page compaction. */ enum compact_result try_to_compact_pages(gfp_t gfp_mask, unsigned int order, unsigned int alloc_flags, const struct alloc_context *ac, - enum migrate_mode mode, int *contended) + enum compact_priority prio) { int may_enter_fs = gfp_mask & __GFP_FS; int may_perform_io = gfp_mask & __GFP_IO; struct zoneref *z; struct zone *zone; enum compact_result rc = COMPACT_SKIPPED; - int all_zones_contended = COMPACT_CONTENDED_LOCK; /* init for &= op */ - - *contended = COMPACT_CONTENDED_NONE; /* Check if the GFP flags allow compaction */ - if (!order || !may_enter_fs || !may_perform_io) + if (!may_enter_fs || !may_perform_io) return COMPACT_SKIPPED; - trace_mm_compaction_try_to_compact_pages(order, gfp_mask, mode); + trace_mm_compaction_try_to_compact_pages(order, gfp_mask, prio); /* Compact each zone in the list */ for_each_zone_zonelist_nodemask(zone, z, ac->zonelist, ac->high_zoneidx, ac->nodemask) { enum compact_result status; - int zone_contended; if (compaction_deferred(zone, order)) { rc = max_t(enum compact_result, COMPACT_DEFERRED, rc); continue; } - status = compact_zone_order(zone, order, gfp_mask, mode, - &zone_contended, alloc_flags, - ac_classzone_idx(ac)); + status = compact_zone_order(zone, order, gfp_mask, prio, + alloc_flags, ac_classzone_idx(ac)); rc = max(status, rc); - /* - * It takes at least one zone that wasn't lock contended - * to clear all_zones_contended. - */ - all_zones_contended &= zone_contended; /* If a normal allocation would succeed, stop compacting */ if (zone_watermark_ok(zone, order, low_wmark_pages(zone), @@ -1658,59 +1702,29 @@ enum compact_result try_to_compact_pages(gfp_t gfp_mask, unsigned int order, * succeeds in this zone. */ compaction_defer_reset(zone, order, false); - /* - * It is possible that async compaction aborted due to - * need_resched() and the watermarks were ok thanks to - * somebody else freeing memory. The allocation can - * however still fail so we better signal the - * need_resched() contention anyway (this will not - * prevent the allocation attempt). - */ - if (zone_contended == COMPACT_CONTENDED_SCHED) - *contended = COMPACT_CONTENDED_SCHED; - goto break_loop; + break; } - if (mode != MIGRATE_ASYNC && (status == COMPACT_COMPLETE || - status == COMPACT_PARTIAL_SKIPPED)) { + if (prio != COMPACT_PRIO_ASYNC && (status == COMPACT_COMPLETE || + status == COMPACT_PARTIAL_SKIPPED)) /* * We think that allocation won't succeed in this zone * so we defer compaction there. If it ends up * succeeding after all, it will be reset. */ defer_compaction(zone, order); - } /* * We might have stopped compacting due to need_resched() in * async compaction, or due to a fatal signal detected. In that - * case do not try further zones and signal need_resched() - * contention. + * case do not try further zones */ - if ((zone_contended == COMPACT_CONTENDED_SCHED) - || fatal_signal_pending(current)) { - *contended = COMPACT_CONTENDED_SCHED; - goto break_loop; - } - - continue; -break_loop: - /* - * We might not have tried all the zones, so be conservative - * and assume they are not all lock contended. - */ - all_zones_contended = 0; - break; + if ((prio == COMPACT_PRIO_ASYNC && need_resched()) + || fatal_signal_pending(current)) + break; } - /* - * If at least one zone wasn't deferred or skipped, we report if all - * zones that were tried were lock contended. - */ - if (rc > COMPACT_INACTIVE && all_zones_contended) - *contended = COMPACT_CONTENDED_LOCK; - return rc; } |