diff options
Diffstat (limited to 'mm/page-writeback.c')
-rw-r--r-- | mm/page-writeback.c | 150 |
1 files changed, 99 insertions, 51 deletions
diff --git a/mm/page-writeback.c b/mm/page-writeback.c index 5c3de798f..56be15224 100644 --- a/mm/page-writeback.c +++ b/mm/page-writeback.c @@ -275,26 +275,35 @@ static void wb_min_max_ratio(struct bdi_writeback *wb, */ /** - * zone_dirtyable_memory - number of dirtyable pages in a zone - * @zone: the zone + * node_dirtyable_memory - number of dirtyable pages in a node + * @pgdat: the node * - * Returns the zone's number of pages potentially available for dirty - * page cache. This is the base value for the per-zone dirty limits. + * Returns the node's number of pages potentially available for dirty + * page cache. This is the base value for the per-node dirty limits. */ -static unsigned long zone_dirtyable_memory(struct zone *zone) +static unsigned long node_dirtyable_memory(struct pglist_data *pgdat) { - unsigned long nr_pages; + unsigned long nr_pages = 0; + int z; + + for (z = 0; z < MAX_NR_ZONES; z++) { + struct zone *zone = pgdat->node_zones + z; + + if (!populated_zone(zone)) + continue; + + nr_pages += zone_page_state(zone, NR_FREE_PAGES); + } - nr_pages = zone_page_state(zone, NR_FREE_PAGES); /* * Pages reserved for the kernel should not be considered * dirtyable, to prevent a situation where reclaim has to * clean pages in order to balance the zones. */ - nr_pages -= min(nr_pages, zone->totalreserve_pages); + nr_pages -= min(nr_pages, pgdat->totalreserve_pages); - nr_pages += zone_page_state(zone, NR_INACTIVE_FILE); - nr_pages += zone_page_state(zone, NR_ACTIVE_FILE); + nr_pages += node_page_state(pgdat, NR_INACTIVE_FILE); + nr_pages += node_page_state(pgdat, NR_ACTIVE_FILE); return nr_pages; } @@ -307,13 +316,26 @@ static unsigned long highmem_dirtyable_memory(unsigned long total) int i; for_each_node_state(node, N_HIGH_MEMORY) { - for (i = 0; i < MAX_NR_ZONES; i++) { - struct zone *z = &NODE_DATA(node)->node_zones[i]; + for (i = ZONE_NORMAL + 1; i < MAX_NR_ZONES; i++) { + struct zone *z; + unsigned long nr_pages; - if (is_highmem(z)) - x += zone_dirtyable_memory(z); + if (!is_highmem_idx(i)) + continue; + + z = &NODE_DATA(node)->node_zones[i]; + if (!populated_zone(z)) + continue; + + nr_pages = zone_page_state(z, NR_FREE_PAGES); + /* watch for underflows */ + nr_pages -= min(nr_pages, high_wmark_pages(z)); + nr_pages += zone_page_state(z, NR_ZONE_INACTIVE_FILE); + nr_pages += zone_page_state(z, NR_ZONE_ACTIVE_FILE); + x += nr_pages; } } + /* * Unreclaimable memory (kernel memory or anonymous memory * without swap) can bring down the dirtyable pages below @@ -356,8 +378,8 @@ static unsigned long global_dirtyable_memory(void) */ x -= min(x, totalreserve_pages); - x += global_page_state(NR_INACTIVE_FILE); - x += global_page_state(NR_ACTIVE_FILE); + x += global_node_page_state(NR_INACTIVE_FILE); + x += global_node_page_state(NR_ACTIVE_FILE); if (!vm_highmem_is_dirtyable) x -= highmem_dirtyable_memory(x); @@ -453,23 +475,23 @@ void global_dirty_limits(unsigned long *pbackground, unsigned long *pdirty) } /** - * zone_dirty_limit - maximum number of dirty pages allowed in a zone - * @zone: the zone + * node_dirty_limit - maximum number of dirty pages allowed in a node + * @pgdat: the node * - * Returns the maximum number of dirty pages allowed in a zone, based - * on the zone's dirtyable memory. + * Returns the maximum number of dirty pages allowed in a node, based + * on the node's dirtyable memory. */ -static unsigned long zone_dirty_limit(struct zone *zone) +static unsigned long node_dirty_limit(struct pglist_data *pgdat) { - unsigned long zone_memory = zone_dirtyable_memory(zone); + unsigned long node_memory = node_dirtyable_memory(pgdat); struct task_struct *tsk = current; unsigned long dirty; if (vm_dirty_bytes) dirty = DIV_ROUND_UP(vm_dirty_bytes, PAGE_SIZE) * - zone_memory / global_dirtyable_memory(); + node_memory / global_dirtyable_memory(); else - dirty = vm_dirty_ratio * zone_memory / 100; + dirty = vm_dirty_ratio * node_memory / 100; if (tsk->flags & PF_LESS_THROTTLE || rt_task(tsk)) dirty += dirty / 4; @@ -478,19 +500,22 @@ static unsigned long zone_dirty_limit(struct zone *zone) } /** - * zone_dirty_ok - tells whether a zone is within its dirty limits - * @zone: the zone to check + * node_dirty_ok - tells whether a node is within its dirty limits + * @pgdat: the node to check * - * Returns %true when the dirty pages in @zone are within the zone's + * Returns %true when the dirty pages in @pgdat are within the node's * dirty limit, %false if the limit is exceeded. */ -bool zone_dirty_ok(struct zone *zone) +bool node_dirty_ok(struct pglist_data *pgdat) { - unsigned long limit = zone_dirty_limit(zone); + unsigned long limit = node_dirty_limit(pgdat); + unsigned long nr_pages = 0; - return zone_page_state(zone, NR_FILE_DIRTY) + - zone_page_state(zone, NR_UNSTABLE_NFS) + - zone_page_state(zone, NR_WRITEBACK) <= limit; + nr_pages += node_page_state(pgdat, NR_FILE_DIRTY); + nr_pages += node_page_state(pgdat, NR_UNSTABLE_NFS); + nr_pages += node_page_state(pgdat, NR_WRITEBACK); + + return nr_pages <= limit; } int dirty_background_ratio_handler(struct ctl_table *table, int write, @@ -1578,10 +1603,10 @@ static void balance_dirty_pages(struct address_space *mapping, * written to the server's write cache, but has not yet * been flushed to permanent storage. */ - nr_reclaimable = global_page_state(NR_FILE_DIRTY) + - global_page_state(NR_UNSTABLE_NFS); + nr_reclaimable = global_node_page_state(NR_FILE_DIRTY) + + global_node_page_state(NR_UNSTABLE_NFS); gdtc->avail = global_dirtyable_memory(); - gdtc->dirty = nr_reclaimable + global_page_state(NR_WRITEBACK); + gdtc->dirty = nr_reclaimable + global_node_page_state(NR_WRITEBACK); domain_dirty_limits(gdtc); @@ -1761,9 +1786,8 @@ pause: pause, start_time); __set_current_state(TASK_KILLABLE); - atomic_inc(&wb->dirty_sleeping); + wb->dirty_sleep = now; io_schedule_timeout(pause); - atomic_dec(&wb->dirty_sleeping); current->dirty_paused_when = now + pause; current->nr_dirtied = 0; @@ -1920,8 +1944,8 @@ bool wb_over_bg_thresh(struct bdi_writeback *wb) * as we're trying to decide whether to put more under writeback. */ gdtc->avail = global_dirtyable_memory(); - gdtc->dirty = global_page_state(NR_FILE_DIRTY) + - global_page_state(NR_UNSTABLE_NFS); + gdtc->dirty = global_node_page_state(NR_FILE_DIRTY) + + global_node_page_state(NR_UNSTABLE_NFS); domain_dirty_limits(gdtc); if (gdtc->dirty > gdtc->bg_thresh) @@ -1965,8 +1989,8 @@ void throttle_vm_writeout(gfp_t gfp_mask) */ dirty_thresh += dirty_thresh / 10; /* wheeee... */ - if (global_page_state(NR_UNSTABLE_NFS) + - global_page_state(NR_WRITEBACK) <= dirty_thresh) + if (global_node_page_state(NR_UNSTABLE_NFS) + + global_node_page_state(NR_WRITEBACK) <= dirty_thresh) break; congestion_wait(BLK_RW_ASYNC, HZ/10); @@ -1994,8 +2018,8 @@ int dirty_writeback_centisecs_handler(struct ctl_table *table, int write, void laptop_mode_timer_fn(unsigned long data) { struct request_queue *q = (struct request_queue *)data; - int nr_pages = global_page_state(NR_FILE_DIRTY) + - global_page_state(NR_UNSTABLE_NFS); + int nr_pages = global_node_page_state(NR_FILE_DIRTY) + + global_node_page_state(NR_UNSTABLE_NFS); struct bdi_writeback *wb; /* @@ -2446,8 +2470,9 @@ void account_page_dirtied(struct page *page, struct address_space *mapping) wb = inode_to_wb(inode); mem_cgroup_inc_page_stat(page, MEM_CGROUP_STAT_DIRTY); - __inc_zone_page_state(page, NR_FILE_DIRTY); - __inc_zone_page_state(page, NR_DIRTIED); + __inc_node_page_state(page, NR_FILE_DIRTY); + __inc_zone_page_state(page, NR_ZONE_WRITE_PENDING); + __inc_node_page_state(page, NR_DIRTIED); __inc_wb_stat(wb, WB_RECLAIMABLE); __inc_wb_stat(wb, WB_DIRTIED); task_io_account_write(PAGE_SIZE); @@ -2467,7 +2492,8 @@ void account_page_cleaned(struct page *page, struct address_space *mapping, { if (mapping_cap_account_dirty(mapping)) { mem_cgroup_dec_page_stat(page, MEM_CGROUP_STAT_DIRTY); - dec_zone_page_state(page, NR_FILE_DIRTY); + dec_node_page_state(page, NR_FILE_DIRTY); + dec_zone_page_state(page, NR_ZONE_WRITE_PENDING); dec_wb_stat(wb, WB_RECLAIMABLE); task_io_account_cancelled_write(PAGE_SIZE); } @@ -2535,7 +2561,7 @@ void account_page_redirty(struct page *page) wb = unlocked_inode_to_wb_begin(inode, &locked); current->nr_dirtied--; - dec_zone_page_state(page, NR_DIRTIED); + dec_node_page_state(page, NR_DIRTIED); dec_wb_stat(wb, WB_DIRTIED); unlocked_inode_to_wb_end(inode, locked); } @@ -2573,6 +2599,7 @@ int set_page_dirty(struct page *page) { struct address_space *mapping = page_mapping(page); + page = compound_head(page); if (likely(mapping)) { int (*spd)(struct page *) = mapping->a_ops->set_page_dirty; /* @@ -2722,7 +2749,8 @@ int clear_page_dirty_for_io(struct page *page) wb = unlocked_inode_to_wb_begin(inode, &locked); if (TestClearPageDirty(page)) { mem_cgroup_dec_page_stat(page, MEM_CGROUP_STAT_DIRTY); - dec_zone_page_state(page, NR_FILE_DIRTY); + dec_node_page_state(page, NR_FILE_DIRTY); + dec_zone_page_state(page, NR_ZONE_WRITE_PENDING); dec_wb_stat(wb, WB_RECLAIMABLE); ret = 1; } @@ -2757,14 +2785,20 @@ int test_clear_page_writeback(struct page *page) __wb_writeout_inc(wb); } } + + if (mapping->host && !mapping_tagged(mapping, + PAGECACHE_TAG_WRITEBACK)) + sb_clear_inode_writeback(mapping->host); + spin_unlock_irqrestore(&mapping->tree_lock, flags); } else { ret = TestClearPageWriteback(page); } if (ret) { mem_cgroup_dec_page_stat(page, MEM_CGROUP_STAT_WRITEBACK); - dec_zone_page_state(page, NR_WRITEBACK); - inc_zone_page_state(page, NR_WRITTEN); + dec_node_page_state(page, NR_WRITEBACK); + dec_zone_page_state(page, NR_ZONE_WRITE_PENDING); + inc_node_page_state(page, NR_WRITTEN); } unlock_page_memcg(page); return ret; @@ -2784,11 +2818,24 @@ int __test_set_page_writeback(struct page *page, bool keep_write) spin_lock_irqsave(&mapping->tree_lock, flags); ret = TestSetPageWriteback(page); if (!ret) { + bool on_wblist; + + on_wblist = mapping_tagged(mapping, + PAGECACHE_TAG_WRITEBACK); + radix_tree_tag_set(&mapping->page_tree, page_index(page), PAGECACHE_TAG_WRITEBACK); if (bdi_cap_account_writeback(bdi)) __inc_wb_stat(inode_to_wb(inode), WB_WRITEBACK); + + /* + * We can come through here when swapping anonymous + * pages, so we don't necessarily have an inode to track + * for sync. + */ + if (mapping->host && !on_wblist) + sb_mark_inode_writeback(mapping->host); } if (!PageDirty(page)) radix_tree_tag_clear(&mapping->page_tree, @@ -2804,7 +2851,8 @@ int __test_set_page_writeback(struct page *page, bool keep_write) } if (!ret) { mem_cgroup_inc_page_stat(page, MEM_CGROUP_STAT_WRITEBACK); - inc_zone_page_state(page, NR_WRITEBACK); + inc_node_page_state(page, NR_WRITEBACK); + inc_zone_page_state(page, NR_ZONE_WRITE_PENDING); } unlock_page_memcg(page); return ret; |