diff options
Diffstat (limited to 'mm/memory_hotplug.c')
-rw-r--r-- | mm/memory_hotplug.c | 119 |
1 files changed, 99 insertions, 20 deletions
diff --git a/mm/memory_hotplug.c b/mm/memory_hotplug.c index e3cbdcaff..9d29ba0f7 100644 --- a/mm/memory_hotplug.c +++ b/mm/memory_hotplug.c @@ -449,6 +449,25 @@ out_fail: return -1; } +static struct zone * __meminit move_pfn_range(int zone_shift, + unsigned long start_pfn, unsigned long end_pfn) +{ + struct zone *zone = page_zone(pfn_to_page(start_pfn)); + int ret = 0; + + if (zone_shift < 0) + ret = move_pfn_range_left(zone + zone_shift, zone, + start_pfn, end_pfn); + else if (zone_shift) + ret = move_pfn_range_right(zone, zone + zone_shift, + start_pfn, end_pfn); + + if (ret) + return NULL; + + return zone + zone_shift; +} + static void __meminit grow_pgdat_span(struct pglist_data *pgdat, unsigned long start_pfn, unsigned long end_pfn) { @@ -1028,6 +1047,37 @@ static void node_states_set_node(int node, struct memory_notify *arg) node_set_state(node, N_MEMORY); } +int zone_can_shift(unsigned long pfn, unsigned long nr_pages, + enum zone_type target) +{ + struct zone *zone = page_zone(pfn_to_page(pfn)); + enum zone_type idx = zone_idx(zone); + int i; + + if (idx < target) { + /* pages must be at end of current zone */ + if (pfn + nr_pages != zone_end_pfn(zone)) + return 0; + + /* no zones in use between current zone and target */ + for (i = idx + 1; i < target; i++) + if (zone_is_initialized(zone - idx + i)) + return 0; + } + + if (target < idx) { + /* pages must be at beginning of current zone */ + if (pfn != zone->zone_start_pfn) + return 0; + + /* no zones in use between current zone and target */ + for (i = target + 1; i < idx; i++) + if (zone_is_initialized(zone - idx + i)) + return 0; + } + + return target - idx; +} /* Must be protected by mem_hotplug_begin() */ int __ref online_pages(unsigned long pfn, unsigned long nr_pages, int online_type) @@ -1039,6 +1089,7 @@ int __ref online_pages(unsigned long pfn, unsigned long nr_pages, int online_typ int nid; int ret; struct memory_notify arg; + int zone_shift = 0; /* * This doesn't need a lock to do pfn_to_page(). @@ -1052,19 +1103,14 @@ int __ref online_pages(unsigned long pfn, unsigned long nr_pages, int online_typ !can_online_high_movable(zone)) return -EINVAL; - if (online_type == MMOP_ONLINE_KERNEL && - zone_idx(zone) == ZONE_MOVABLE) { - if (move_pfn_range_left(zone - 1, zone, pfn, pfn + nr_pages)) - return -EINVAL; - } - if (online_type == MMOP_ONLINE_MOVABLE && - zone_idx(zone) == ZONE_MOVABLE - 1) { - if (move_pfn_range_right(zone, zone + 1, pfn, pfn + nr_pages)) - return -EINVAL; - } + if (online_type == MMOP_ONLINE_KERNEL) + zone_shift = zone_can_shift(pfn, nr_pages, ZONE_NORMAL); + else if (online_type == MMOP_ONLINE_MOVABLE) + zone_shift = zone_can_shift(pfn, nr_pages, ZONE_MOVABLE); - /* Previous code may changed the zone of the pfn range */ - zone = page_zone(pfn_to_page(pfn)); + zone = move_pfn_range(zone_shift, pfn, pfn + nr_pages); + if (!zone) + return -EINVAL; arg.start_pfn = pfn; arg.nr_pages = nr_pages; @@ -1163,15 +1209,17 @@ static pg_data_t __ref *hotadd_new_pgdat(int nid, u64 start) arch_refresh_nodedata(nid, pgdat); } else { - /* Reset the nr_zones and classzone_idx to 0 before reuse */ + /* Reset the nr_zones, order and classzone_idx before reuse */ pgdat->nr_zones = 0; - pgdat->classzone_idx = 0; + pgdat->kswapd_order = 0; + pgdat->kswapd_classzone_idx = 0; } /* we can use NODE_DATA(nid) from here */ /* init node's zones as empty zones, we don't have any present pages.*/ free_area_init_node(nid, zones_size, start_pfn, zholes_size); + pgdat->per_cpu_nodestats = alloc_percpu(struct per_cpu_nodestat); /* * The node we allocated has no zone fallback lists. For avoiding @@ -1202,6 +1250,7 @@ static pg_data_t __ref *hotadd_new_pgdat(int nid, u64 start) static void rollback_node_hotadd(int nid, pg_data_t *pgdat) { arch_refresh_nodedata(nid, NULL); + free_percpu(pgdat->per_cpu_nodestats); arch_free_nodedata(pgdat); return; } @@ -1501,6 +1550,39 @@ static unsigned long scan_movable_pages(unsigned long start, unsigned long end) return 0; } +static struct page *new_node_page(struct page *page, unsigned long private, + int **result) +{ + gfp_t gfp_mask = GFP_USER | __GFP_MOVABLE; + int nid = page_to_nid(page); + nodemask_t nmask = node_states[N_MEMORY]; + struct page *new_page = NULL; + + /* + * TODO: allocate a destination hugepage from a nearest neighbor node, + * accordance with memory policy of the user process if possible. For + * now as a simple work-around, we use the next node for destination. + */ + if (PageHuge(page)) + return alloc_huge_page_node(page_hstate(compound_head(page)), + next_node_in(nid, nmask)); + + node_clear(nid, nmask); + + if (PageHighMem(page) + || (zone_idx(page_zone(page)) == ZONE_MOVABLE)) + gfp_mask |= __GFP_HIGHMEM; + + if (!nodes_empty(nmask)) + new_page = __alloc_pages_nodemask(gfp_mask, 0, + node_zonelist(nid, gfp_mask), &nmask); + if (!new_page) + new_page = __alloc_pages(gfp_mask, 0, + node_zonelist(nid, gfp_mask)); + + return new_page; +} + #define NR_OFFLINE_AT_ONCE_PAGES (256) static int do_migrate_range(unsigned long start_pfn, unsigned long end_pfn) @@ -1540,7 +1622,7 @@ do_migrate_range(unsigned long start_pfn, unsigned long end_pfn) put_page(page); list_add_tail(&page->lru, &source); move_pages--; - inc_zone_page_state(page, NR_ISOLATED_ANON + + inc_node_page_state(page, NR_ISOLATED_ANON + page_is_file_cache(page)); } else { @@ -1564,11 +1646,8 @@ do_migrate_range(unsigned long start_pfn, unsigned long end_pfn) goto out; } - /* - * alloc_migrate_target should be improooooved!! - * migrate_pages returns # of failed pages. - */ - ret = migrate_pages(&source, alloc_migrate_target, NULL, 0, + /* Allocate a new page from the nearest neighbor node */ + ret = migrate_pages(&source, new_node_page, NULL, 0, MIGRATE_SYNC, MR_MEMORY_HOTPLUG); if (ret) putback_movable_pages(&source); |