summaryrefslogtreecommitdiff
path: root/fs/f2fs/node.c
diff options
context:
space:
mode:
Diffstat (limited to 'fs/f2fs/node.c')
-rw-r--r--fs/f2fs/node.c233
1 files changed, 161 insertions, 72 deletions
diff --git a/fs/f2fs/node.c b/fs/f2fs/node.c
index 342597a58..1a33de9d8 100644
--- a/fs/f2fs/node.c
+++ b/fs/f2fs/node.c
@@ -46,11 +46,11 @@ bool available_free_memory(struct f2fs_sb_info *sbi, int type)
*/
if (type == FREE_NIDS) {
mem_size = (nm_i->fcnt * sizeof(struct free_nid)) >>
- PAGE_CACHE_SHIFT;
+ PAGE_SHIFT;
res = mem_size < ((avail_ram * nm_i->ram_thresh / 100) >> 2);
} else if (type == NAT_ENTRIES) {
mem_size = (nm_i->nat_cnt * sizeof(struct nat_entry)) >>
- PAGE_CACHE_SHIFT;
+ PAGE_SHIFT;
res = mem_size < ((avail_ram * nm_i->ram_thresh / 100) >> 2);
} else if (type == DIRTY_DENTS) {
if (sbi->sb->s_bdi->wb.dirty_exceeded)
@@ -62,13 +62,13 @@ bool available_free_memory(struct f2fs_sb_info *sbi, int type)
for (i = 0; i <= UPDATE_INO; i++)
mem_size += (sbi->im[i].ino_num *
- sizeof(struct ino_entry)) >> PAGE_CACHE_SHIFT;
+ sizeof(struct ino_entry)) >> PAGE_SHIFT;
res = mem_size < ((avail_ram * nm_i->ram_thresh / 100) >> 1);
} else if (type == EXTENT_CACHE) {
mem_size = (atomic_read(&sbi->total_ext_tree) *
sizeof(struct extent_tree) +
atomic_read(&sbi->total_ext_node) *
- sizeof(struct extent_node)) >> PAGE_CACHE_SHIFT;
+ sizeof(struct extent_node)) >> PAGE_SHIFT;
res = mem_size < ((avail_ram * nm_i->ram_thresh / 100) >> 1);
} else {
if (!sbi->sb->s_bdi->wb.dirty_exceeded)
@@ -121,7 +121,7 @@ static struct page *get_next_nat_page(struct f2fs_sb_info *sbi, nid_t nid)
src_addr = page_address(src_page);
dst_addr = page_address(dst_page);
- memcpy(dst_addr, src_addr, PAGE_CACHE_SIZE);
+ memcpy(dst_addr, src_addr, PAGE_SIZE);
set_page_dirty(dst_page);
f2fs_put_page(src_page, 1);
@@ -257,15 +257,20 @@ static struct nat_entry *grab_nat_entry(struct f2fs_nm_info *nm_i, nid_t nid)
return new;
}
-static void cache_nat_entry(struct f2fs_nm_info *nm_i, nid_t nid,
+static void cache_nat_entry(struct f2fs_sb_info *sbi, nid_t nid,
struct f2fs_nat_entry *ne)
{
+ struct f2fs_nm_info *nm_i = NM_I(sbi);
struct nat_entry *e;
e = __lookup_nat_cache(nm_i, nid);
if (!e) {
e = grab_nat_entry(nm_i, nid);
node_info_from_raw_nat(&e->ni, ne);
+ } else {
+ f2fs_bug_on(sbi, nat_get_ino(e) != ne->ino ||
+ nat_get_blkaddr(e) != ne->block_addr ||
+ nat_get_version(e) != ne->version);
}
}
@@ -354,7 +359,7 @@ void get_node_info(struct f2fs_sb_info *sbi, nid_t nid, struct node_info *ni)
{
struct f2fs_nm_info *nm_i = NM_I(sbi);
struct curseg_info *curseg = CURSEG_I(sbi, CURSEG_HOT_DATA);
- struct f2fs_summary_block *sum = curseg->sum_blk;
+ struct f2fs_journal *journal = curseg->journal;
nid_t start_nid = START_NID(nid);
struct f2fs_nat_block *nat_blk;
struct page *page = NULL;
@@ -371,23 +376,20 @@ void get_node_info(struct f2fs_sb_info *sbi, nid_t nid, struct node_info *ni)
ni->ino = nat_get_ino(e);
ni->blk_addr = nat_get_blkaddr(e);
ni->version = nat_get_version(e);
- }
- up_read(&nm_i->nat_tree_lock);
- if (e)
+ up_read(&nm_i->nat_tree_lock);
return;
+ }
memset(&ne, 0, sizeof(struct f2fs_nat_entry));
- down_write(&nm_i->nat_tree_lock);
-
/* Check current segment summary */
- mutex_lock(&curseg->curseg_mutex);
- i = lookup_journal_in_cursum(sum, NAT_JOURNAL, nid, 0);
+ down_read(&curseg->journal_rwsem);
+ i = lookup_journal_in_cursum(journal, NAT_JOURNAL, nid, 0);
if (i >= 0) {
- ne = nat_in_journal(sum, i);
+ ne = nat_in_journal(journal, i);
node_info_from_raw_nat(ni, &ne);
}
- mutex_unlock(&curseg->curseg_mutex);
+ up_read(&curseg->journal_rwsem);
if (i >= 0)
goto cache;
@@ -398,19 +400,52 @@ void get_node_info(struct f2fs_sb_info *sbi, nid_t nid, struct node_info *ni)
node_info_from_raw_nat(ni, &ne);
f2fs_put_page(page, 1);
cache:
+ up_read(&nm_i->nat_tree_lock);
/* cache nat entry */
- cache_nat_entry(NM_I(sbi), nid, &ne);
+ down_write(&nm_i->nat_tree_lock);
+ cache_nat_entry(sbi, nid, &ne);
up_write(&nm_i->nat_tree_lock);
}
+pgoff_t get_next_page_offset(struct dnode_of_data *dn, pgoff_t pgofs)
+{
+ const long direct_index = ADDRS_PER_INODE(dn->inode);
+ const long direct_blks = ADDRS_PER_BLOCK;
+ const long indirect_blks = ADDRS_PER_BLOCK * NIDS_PER_BLOCK;
+ unsigned int skipped_unit = ADDRS_PER_BLOCK;
+ int cur_level = dn->cur_level;
+ int max_level = dn->max_level;
+ pgoff_t base = 0;
+
+ if (!dn->max_level)
+ return pgofs + 1;
+
+ while (max_level-- > cur_level)
+ skipped_unit *= NIDS_PER_BLOCK;
+
+ switch (dn->max_level) {
+ case 3:
+ base += 2 * indirect_blks;
+ case 2:
+ base += 2 * direct_blks;
+ case 1:
+ base += direct_index;
+ break;
+ default:
+ f2fs_bug_on(F2FS_I_SB(dn->inode), 1);
+ }
+
+ return ((pgofs - base) / skipped_unit + 1) * skipped_unit + base;
+}
+
/*
* The maximum depth is four.
* Offset[0] will have raw inode offset.
*/
-static int get_node_path(struct f2fs_inode_info *fi, long block,
+static int get_node_path(struct inode *inode, long block,
int offset[4], unsigned int noffset[4])
{
- const long direct_index = ADDRS_PER_INODE(fi);
+ const long direct_index = ADDRS_PER_INODE(inode);
const long direct_blks = ADDRS_PER_BLOCK;
const long dptrs_per_blk = NIDS_PER_BLOCK;
const long indirect_blks = ADDRS_PER_BLOCK * NIDS_PER_BLOCK;
@@ -495,10 +530,10 @@ int get_dnode_of_data(struct dnode_of_data *dn, pgoff_t index, int mode)
int offset[4];
unsigned int noffset[4];
nid_t nids[4];
- int level, i;
+ int level, i = 0;
int err = 0;
- level = get_node_path(F2FS_I(dn->inode), index, offset, noffset);
+ level = get_node_path(dn->inode, index, offset, noffset);
nids[0] = dn->inode->i_ino;
npage[0] = dn->inode_page;
@@ -585,6 +620,10 @@ release_pages:
release_out:
dn->inode_page = NULL;
dn->node_page = NULL;
+ if (err == -ENOENT) {
+ dn->cur_level = i;
+ dn->max_level = level;
+ }
return err;
}
@@ -792,7 +831,7 @@ int truncate_inode_blocks(struct inode *inode, pgoff_t from)
trace_f2fs_truncate_inode_blocks_enter(inode, from);
- level = get_node_path(F2FS_I(inode), from, offset, noffset);
+ level = get_node_path(inode, from, offset, noffset);
restart:
page = get_node_page(sbi, inode->i_ino);
if (IS_ERR(page)) {
@@ -861,7 +900,7 @@ skip_partial:
f2fs_put_page(page, 1);
goto restart;
}
- f2fs_wait_on_page_writeback(page, NODE);
+ f2fs_wait_on_page_writeback(page, NODE, true);
ri->i_nid[offset[0] - NODE_DIR1_BLOCK] = 0;
set_page_dirty(page);
unlock_page(page);
@@ -976,7 +1015,7 @@ struct page *new_node_page(struct dnode_of_data *dn,
new_ni.ino = dn->inode->i_ino;
set_node_addr(sbi, &new_ni, NEW_ADDR, false);
- f2fs_wait_on_page_writeback(page, NODE);
+ f2fs_wait_on_page_writeback(page, NODE, true);
fill_node_footer(page, dn->nid, dn->inode->i_ino, ofs, true);
set_cold_node(dn->inode, page);
SetPageUptodate(page);
@@ -1029,7 +1068,7 @@ static int read_node_page(struct page *page, int rw)
if (PageUptodate(page))
return LOCKED_PAGE;
- fio.blk_addr = ni.blk_addr;
+ fio.new_blkaddr = fio.old_blkaddr = ni.blk_addr;
return f2fs_submit_page_bio(&fio);
}
@@ -1045,12 +1084,11 @@ void ra_node_page(struct f2fs_sb_info *sbi, nid_t nid)
return;
f2fs_bug_on(sbi, check_nid_range(sbi, nid));
- apage = find_get_page(NODE_MAPPING(sbi), nid);
- if (apage && PageUptodate(apage)) {
- f2fs_put_page(apage, 0);
+ rcu_read_lock();
+ apage = radix_tree_lookup(&NODE_MAPPING(sbi)->page_tree, nid);
+ rcu_read_unlock();
+ if (apage)
return;
- }
- f2fs_put_page(apage, 0);
apage = grab_cache_page(NODE_MAPPING(sbi), nid);
if (!apage)
@@ -1063,7 +1101,7 @@ void ra_node_page(struct f2fs_sb_info *sbi, nid_t nid)
/*
* readahead MAX_RA_NODE number of node pages.
*/
-void ra_node_pages(struct page *parent, int start)
+static void ra_node_pages(struct page *parent, int start)
{
struct f2fs_sb_info *sbi = F2FS_P_SB(parent);
struct blk_plug plug;
@@ -1083,7 +1121,7 @@ void ra_node_pages(struct page *parent, int start)
blk_finish_plug(&plug);
}
-struct page *__get_node_page(struct f2fs_sb_info *sbi, pgoff_t nid,
+static struct page *__get_node_page(struct f2fs_sb_info *sbi, pgoff_t nid,
struct page *parent, int start)
{
struct page *page;
@@ -1154,19 +1192,57 @@ void sync_inode_page(struct dnode_of_data *dn)
dn->node_changed = ret ? true: false;
}
+static void flush_inline_data(struct f2fs_sb_info *sbi, nid_t ino)
+{
+ struct inode *inode;
+ struct page *page;
+
+ /* should flush inline_data before evict_inode */
+ inode = ilookup(sbi->sb, ino);
+ if (!inode)
+ return;
+
+ page = pagecache_get_page(inode->i_mapping, 0, FGP_NOWAIT, 0);
+ if (!page)
+ goto iput_out;
+
+ if (!trylock_page(page))
+ goto release_out;
+
+ if (!PageUptodate(page))
+ goto page_out;
+
+ if (!PageDirty(page))
+ goto page_out;
+
+ if (!clear_page_dirty_for_io(page))
+ goto page_out;
+
+ if (!f2fs_write_inline_data(inode, page))
+ inode_dec_dirty_pages(inode);
+ else
+ set_page_dirty(page);
+page_out:
+ unlock_page(page);
+release_out:
+ f2fs_put_page(page, 0);
+iput_out:
+ iput(inode);
+}
+
int sync_node_pages(struct f2fs_sb_info *sbi, nid_t ino,
struct writeback_control *wbc)
{
pgoff_t index, end;
struct pagevec pvec;
int step = ino ? 2 : 0;
- int nwritten = 0, wrote = 0;
+ int nwritten = 0;
pagevec_init(&pvec, 0);
next_step:
index = 0;
- end = LONG_MAX;
+ end = ULONG_MAX;
while (index <= end) {
int i, nr_pages;
@@ -1203,6 +1279,7 @@ next_step:
* If an fsync mode,
* we should not skip writing node pages.
*/
+lock_node:
if (ino && ino_of_node(page) == ino)
lock_page(page);
else if (!trylock_page(page))
@@ -1221,6 +1298,17 @@ continue_unlock:
goto continue_unlock;
}
+ /* flush inline_data */
+ if (!ino && is_inline_node(page)) {
+ clear_inline_node(page);
+ unlock_page(page);
+ flush_inline_data(sbi, ino_of_node(page));
+ goto lock_node;
+ }
+
+ f2fs_wait_on_page_writeback(page, NODE, true);
+
+ BUG_ON(PageWriteback(page));
if (!clear_page_dirty_for_io(page))
goto continue_unlock;
@@ -1238,8 +1326,6 @@ continue_unlock:
if (NODE_MAPPING(sbi)->a_ops->writepage(page, wbc))
unlock_page(page);
- else
- wrote++;
if (--wbc->nr_to_write == 0)
break;
@@ -1257,15 +1343,12 @@ continue_unlock:
step++;
goto next_step;
}
-
- if (wrote)
- f2fs_submit_merged_bio(sbi, NODE, WRITE);
return nwritten;
}
int wait_on_node_pages_writeback(struct f2fs_sb_info *sbi, nid_t ino)
{
- pgoff_t index = 0, end = LONG_MAX;
+ pgoff_t index = 0, end = ULONG_MAX;
struct pagevec pvec;
int ret2 = 0, ret = 0;
@@ -1287,7 +1370,7 @@ int wait_on_node_pages_writeback(struct f2fs_sb_info *sbi, nid_t ino)
continue;
if (ino && ino_of_node(page) == ino) {
- f2fs_wait_on_page_writeback(page, NODE);
+ f2fs_wait_on_page_writeback(page, NODE, true);
if (TestClearPageError(page))
ret = -EIO;
}
@@ -1326,8 +1409,6 @@ static int f2fs_write_node_page(struct page *page,
if (unlikely(f2fs_cp_error(sbi)))
goto redirty_out;
- f2fs_wait_on_page_writeback(page, NODE);
-
/* get old block addr of this node page */
nid = nid_of_node(page);
f2fs_bug_on(sbi, page->index != nid);
@@ -1351,14 +1432,18 @@ static int f2fs_write_node_page(struct page *page,
}
set_page_writeback(page);
- fio.blk_addr = ni.blk_addr;
+ fio.old_blkaddr = ni.blk_addr;
write_node_page(nid, &fio);
- set_node_addr(sbi, &ni, fio.blk_addr, is_fsync_dnode(page));
+ set_node_addr(sbi, &ni, fio.new_blkaddr, is_fsync_dnode(page));
dec_page_count(sbi, F2FS_DIRTY_NODES);
up_read(&sbi->node_write);
+
+ if (wbc->for_reclaim)
+ f2fs_submit_merged_bio_cond(sbi, NULL, page, 0, NODE, WRITE);
+
unlock_page(page);
- if (wbc->for_reclaim || unlikely(f2fs_cp_error(sbi)))
+ if (unlikely(f2fs_cp_error(sbi)))
f2fs_submit_merged_bio(sbi, NODE, WRITE);
return 0;
@@ -1374,8 +1459,6 @@ static int f2fs_write_node_pages(struct address_space *mapping,
struct f2fs_sb_info *sbi = F2FS_M_SB(mapping);
long diff;
- trace_f2fs_writepages(mapping->host, wbc, NODE);
-
/* balancing f2fs's metadata in background */
f2fs_balance_fs_bg(sbi);
@@ -1383,6 +1466,8 @@ static int f2fs_write_node_pages(struct address_space *mapping,
if (get_pages(sbi, F2FS_DIRTY_NODES) < nr_pages_to_skip(sbi, NODE))
goto skip_write;
+ trace_f2fs_writepages(mapping->host, wbc, NODE);
+
diff = nr_pages_to_write(sbi, NODE, wbc);
wbc->sync_mode = WB_SYNC_NONE;
sync_node_pages(sbi, 0, wbc);
@@ -1391,6 +1476,7 @@ static int f2fs_write_node_pages(struct address_space *mapping,
skip_write:
wbc->pages_skipped += get_pages(sbi, F2FS_DIRTY_NODES);
+ trace_f2fs_writepages(mapping->host, wbc, NODE);
return 0;
}
@@ -1526,7 +1612,7 @@ static void build_free_nids(struct f2fs_sb_info *sbi)
{
struct f2fs_nm_info *nm_i = NM_I(sbi);
struct curseg_info *curseg = CURSEG_I(sbi, CURSEG_HOT_DATA);
- struct f2fs_summary_block *sum = curseg->sum_blk;
+ struct f2fs_journal *journal = curseg->journal;
int i = 0;
nid_t nid = nm_i->next_scan_nid;
@@ -1558,16 +1644,18 @@ static void build_free_nids(struct f2fs_sb_info *sbi)
nm_i->next_scan_nid = nid;
/* find free nids from current sum_pages */
- mutex_lock(&curseg->curseg_mutex);
- for (i = 0; i < nats_in_cursum(sum); i++) {
- block_t addr = le32_to_cpu(nat_in_journal(sum, i).block_addr);
- nid = le32_to_cpu(nid_in_journal(sum, i));
+ down_read(&curseg->journal_rwsem);
+ for (i = 0; i < nats_in_cursum(journal); i++) {
+ block_t addr;
+
+ addr = le32_to_cpu(nat_in_journal(journal, i).block_addr);
+ nid = le32_to_cpu(nid_in_journal(journal, i));
if (addr == NULL_ADDR)
add_free_nid(sbi, nid, true);
else
remove_free_nid(nm_i, nid);
}
- mutex_unlock(&curseg->curseg_mutex);
+ up_read(&curseg->journal_rwsem);
up_read(&nm_i->nat_tree_lock);
ra_meta_pages(sbi, NAT_BLOCK_OFFSET(nm_i->next_scan_nid),
@@ -1703,7 +1791,7 @@ void recover_inline_xattr(struct inode *inode, struct page *page)
src_addr = inline_xattr_addr(page);
inline_size = inline_xattr_size(inode);
- f2fs_wait_on_page_writeback(ipage, NODE);
+ f2fs_wait_on_page_writeback(ipage, NODE, true);
memcpy(dst_addr, src_addr, inline_size);
update_inode:
update_inode(inode, ipage);
@@ -1831,16 +1919,16 @@ static void remove_nats_in_journal(struct f2fs_sb_info *sbi)
{
struct f2fs_nm_info *nm_i = NM_I(sbi);
struct curseg_info *curseg = CURSEG_I(sbi, CURSEG_HOT_DATA);
- struct f2fs_summary_block *sum = curseg->sum_blk;
+ struct f2fs_journal *journal = curseg->journal;
int i;
- mutex_lock(&curseg->curseg_mutex);
- for (i = 0; i < nats_in_cursum(sum); i++) {
+ down_write(&curseg->journal_rwsem);
+ for (i = 0; i < nats_in_cursum(journal); i++) {
struct nat_entry *ne;
struct f2fs_nat_entry raw_ne;
- nid_t nid = le32_to_cpu(nid_in_journal(sum, i));
+ nid_t nid = le32_to_cpu(nid_in_journal(journal, i));
- raw_ne = nat_in_journal(sum, i);
+ raw_ne = nat_in_journal(journal, i);
ne = __lookup_nat_cache(nm_i, nid);
if (!ne) {
@@ -1849,8 +1937,8 @@ static void remove_nats_in_journal(struct f2fs_sb_info *sbi)
}
__set_nat_cache_dirty(nm_i, ne);
}
- update_nats_in_cursum(sum, -i);
- mutex_unlock(&curseg->curseg_mutex);
+ update_nats_in_cursum(journal, -i);
+ up_write(&curseg->journal_rwsem);
}
static void __adjust_nat_entry_set(struct nat_entry_set *nes,
@@ -1875,7 +1963,7 @@ static void __flush_nat_entry_set(struct f2fs_sb_info *sbi,
struct nat_entry_set *set)
{
struct curseg_info *curseg = CURSEG_I(sbi, CURSEG_HOT_DATA);
- struct f2fs_summary_block *sum = curseg->sum_blk;
+ struct f2fs_journal *journal = curseg->journal;
nid_t start_nid = set->set * NAT_ENTRY_PER_BLOCK;
bool to_journal = true;
struct f2fs_nat_block *nat_blk;
@@ -1887,11 +1975,11 @@ static void __flush_nat_entry_set(struct f2fs_sb_info *sbi,
* #1, flush nat entries to journal in current hot data summary block.
* #2, flush nat entries to nat page.
*/
- if (!__has_cursum_space(sum, set->entry_cnt, NAT_JOURNAL))
+ if (!__has_cursum_space(journal, set->entry_cnt, NAT_JOURNAL))
to_journal = false;
if (to_journal) {
- mutex_lock(&curseg->curseg_mutex);
+ down_write(&curseg->journal_rwsem);
} else {
page = get_next_nat_page(sbi, start_nid);
nat_blk = page_address(page);
@@ -1908,11 +1996,11 @@ static void __flush_nat_entry_set(struct f2fs_sb_info *sbi,
continue;
if (to_journal) {
- offset = lookup_journal_in_cursum(sum,
+ offset = lookup_journal_in_cursum(journal,
NAT_JOURNAL, nid, 1);
f2fs_bug_on(sbi, offset < 0);
- raw_ne = &nat_in_journal(sum, offset);
- nid_in_journal(sum, offset) = cpu_to_le32(nid);
+ raw_ne = &nat_in_journal(journal, offset);
+ nid_in_journal(journal, offset) = cpu_to_le32(nid);
} else {
raw_ne = &nat_blk->entries[nid - start_nid];
}
@@ -1924,7 +2012,7 @@ static void __flush_nat_entry_set(struct f2fs_sb_info *sbi,
}
if (to_journal)
- mutex_unlock(&curseg->curseg_mutex);
+ up_write(&curseg->journal_rwsem);
else
f2fs_put_page(page, 1);
@@ -1941,7 +2029,7 @@ void flush_nat_entries(struct f2fs_sb_info *sbi)
{
struct f2fs_nm_info *nm_i = NM_I(sbi);
struct curseg_info *curseg = CURSEG_I(sbi, CURSEG_HOT_DATA);
- struct f2fs_summary_block *sum = curseg->sum_blk;
+ struct f2fs_journal *journal = curseg->journal;
struct nat_entry_set *setvec[SETVEC_SIZE];
struct nat_entry_set *set, *tmp;
unsigned int found;
@@ -1958,7 +2046,7 @@ void flush_nat_entries(struct f2fs_sb_info *sbi)
* entries, remove all entries from journal and merge them
* into nat entry set.
*/
- if (!__has_cursum_space(sum, nm_i->dirty_nat_cnt, NAT_JOURNAL))
+ if (!__has_cursum_space(journal, nm_i->dirty_nat_cnt, NAT_JOURNAL))
remove_nats_in_journal(sbi);
while ((found = __gang_lookup_nat_set(nm_i,
@@ -1967,7 +2055,7 @@ void flush_nat_entries(struct f2fs_sb_info *sbi)
set_idx = setvec[found - 1]->set + 1;
for (idx = 0; idx < found; idx++)
__adjust_nat_entry_set(setvec[idx], &sets,
- MAX_NAT_JENTRIES(sum));
+ MAX_NAT_JENTRIES(journal));
}
/* flush dirty nats in nat entry set */
@@ -2000,6 +2088,7 @@ static int init_node_manager(struct f2fs_sb_info *sbi)
nm_i->nat_cnt = 0;
nm_i->ram_thresh = DEF_RAM_THRESHOLD;
nm_i->ra_nid_pages = DEF_RA_NID_PAGES;
+ nm_i->dirty_nats_ratio = DEF_DIRTY_NAT_RATIO_THRESHOLD;
INIT_RADIX_TREE(&nm_i->free_nid_root, GFP_ATOMIC);
INIT_LIST_HEAD(&nm_i->free_nid_list);