diff options
Diffstat (limited to 'fs/ext4/inode.c')
-rw-r--r-- | fs/ext4/inode.c | 534 |
1 files changed, 335 insertions, 199 deletions
diff --git a/fs/ext4/inode.c b/fs/ext4/inode.c index e6218cbc8..250c2df04 100644 --- a/fs/ext4/inode.c +++ b/fs/ext4/inode.c @@ -216,7 +216,6 @@ void ext4_evict_inode(struct inode *inode) } truncate_inode_pages_final(&inode->i_data); - WARN_ON(atomic_read(&EXT4_I(inode)->i_ioend_count)); goto no_delete; } @@ -228,8 +227,6 @@ void ext4_evict_inode(struct inode *inode) ext4_begin_ordered_truncate(inode, 0); truncate_inode_pages_final(&inode->i_data); - WARN_ON(atomic_read(&EXT4_I(inode)->i_ioend_count)); - /* * Protect us against freezing - iput() caller didn't have to have any * protection against it @@ -458,13 +455,13 @@ static void ext4_map_blocks_es_recheck(handle_t *handle, * Otherwise, call with ext4_ind_map_blocks() to handle indirect mapping * based files * - * On success, it returns the number of blocks being mapped or allocated. - * if create==0 and the blocks are pre-allocated and unwritten block, - * the result buffer head is unmapped. If the create ==1, it will make sure - * the buffer head is mapped. + * On success, it returns the number of blocks being mapped or allocated. if + * create==0 and the blocks are pre-allocated and unwritten, the resulting @map + * is marked as unwritten. If the create == 1, it will mark @map as mapped. * * It returns 0 if plain look up failed (blocks have not been allocated), in - * that case, buffer head is unmapped + * that case, @map is returned as unmapped but we still do fill map->m_len to + * indicate the length of a hole starting at map->m_lblk. * * It returns the error in case of allocation failure. */ @@ -507,6 +504,11 @@ int ext4_map_blocks(handle_t *handle, struct inode *inode, retval = map->m_len; map->m_len = retval; } else if (ext4_es_is_delayed(&es) || ext4_es_is_hole(&es)) { + map->m_pblk = 0; + retval = es.es_len - (map->m_lblk - es.es_lblk); + if (retval > map->m_len) + retval = map->m_len; + map->m_len = retval; retval = 0; } else { BUG_ON(1); @@ -682,6 +684,21 @@ out_sem: ret = check_block_validity(inode, map); if (ret != 0) return ret; + + /* + * Inodes with freshly allocated blocks where contents will be + * visible after transaction commit must be on transaction's + * ordered data list. + */ + if (map->m_flags & EXT4_MAP_NEW && + !(map->m_flags & EXT4_MAP_UNWRITTEN) && + !(flags & EXT4_GET_BLOCKS_ZERO) && + !IS_NOQUOTA(inode) && + ext4_should_order_data(inode)) { + ret = ext4_jbd2_file_inode(handle, inode); + if (ret) + return ret; + } } return retval; } @@ -714,16 +731,11 @@ static void ext4_update_bh_state(struct buffer_head *bh, unsigned long flags) cmpxchg(&bh->b_state, old_state, new_state) != old_state)); } -/* Maximum number of blocks we map for direct IO at once. */ -#define DIO_MAX_BLOCKS 4096 - static int _ext4_get_block(struct inode *inode, sector_t iblock, struct buffer_head *bh, int flags) { - handle_t *handle = ext4_journal_current_handle(); struct ext4_map_blocks map; - int ret = 0, started = 0; - int dio_credits; + int ret = 0; if (ext4_has_inline_data(inode)) return -ERANGE; @@ -731,33 +743,14 @@ static int _ext4_get_block(struct inode *inode, sector_t iblock, map.m_lblk = iblock; map.m_len = bh->b_size >> inode->i_blkbits; - if (flags && !handle) { - /* Direct IO write... */ - if (map.m_len > DIO_MAX_BLOCKS) - map.m_len = DIO_MAX_BLOCKS; - dio_credits = ext4_chunk_trans_blocks(inode, map.m_len); - handle = ext4_journal_start(inode, EXT4_HT_MAP_BLOCKS, - dio_credits); - if (IS_ERR(handle)) { - ret = PTR_ERR(handle); - return ret; - } - started = 1; - } - - ret = ext4_map_blocks(handle, inode, &map, flags); + ret = ext4_map_blocks(ext4_journal_current_handle(), inode, &map, + flags); if (ret > 0) { - ext4_io_end_t *io_end = ext4_inode_aio(inode); - map_bh(bh, inode->i_sb, map.m_pblk); ext4_update_bh_state(bh, map.m_flags); - if (io_end && io_end->flag & EXT4_IO_END_UNWRITTEN) - set_buffer_defer_completion(bh); bh->b_size = inode->i_sb->s_blocksize * map.m_len; ret = 0; } - if (started) - ext4_journal_stop(handle); return ret; } @@ -769,6 +762,153 @@ int ext4_get_block(struct inode *inode, sector_t iblock, } /* + * Get block function used when preparing for buffered write if we require + * creating an unwritten extent if blocks haven't been allocated. The extent + * will be converted to written after the IO is complete. + */ +int ext4_get_block_unwritten(struct inode *inode, sector_t iblock, + struct buffer_head *bh_result, int create) +{ + ext4_debug("ext4_get_block_unwritten: inode %lu, create flag %d\n", + inode->i_ino, create); + return _ext4_get_block(inode, iblock, bh_result, + EXT4_GET_BLOCKS_IO_CREATE_EXT); +} + +/* Maximum number of blocks we map for direct IO at once. */ +#define DIO_MAX_BLOCKS 4096 + +/* + * Get blocks function for the cases that need to start a transaction - + * generally difference cases of direct IO and DAX IO. It also handles retries + * in case of ENOSPC. + */ +static int ext4_get_block_trans(struct inode *inode, sector_t iblock, + struct buffer_head *bh_result, int flags) +{ + int dio_credits; + handle_t *handle; + int retries = 0; + int ret; + + /* Trim mapping request to maximum we can map at once for DIO */ + if (bh_result->b_size >> inode->i_blkbits > DIO_MAX_BLOCKS) + bh_result->b_size = DIO_MAX_BLOCKS << inode->i_blkbits; + dio_credits = ext4_chunk_trans_blocks(inode, + bh_result->b_size >> inode->i_blkbits); +retry: + handle = ext4_journal_start(inode, EXT4_HT_MAP_BLOCKS, dio_credits); + if (IS_ERR(handle)) + return PTR_ERR(handle); + + ret = _ext4_get_block(inode, iblock, bh_result, flags); + ext4_journal_stop(handle); + + if (ret == -ENOSPC && ext4_should_retry_alloc(inode->i_sb, &retries)) + goto retry; + return ret; +} + +/* Get block function for DIO reads and writes to inodes without extents */ +int ext4_dio_get_block(struct inode *inode, sector_t iblock, + struct buffer_head *bh, int create) +{ + /* We don't expect handle for direct IO */ + WARN_ON_ONCE(ext4_journal_current_handle()); + + if (!create) + return _ext4_get_block(inode, iblock, bh, 0); + return ext4_get_block_trans(inode, iblock, bh, EXT4_GET_BLOCKS_CREATE); +} + +/* + * Get block function for AIO DIO writes when we create unwritten extent if + * blocks are not allocated yet. The extent will be converted to written + * after IO is complete. + */ +static int ext4_dio_get_block_unwritten_async(struct inode *inode, + sector_t iblock, struct buffer_head *bh_result, int create) +{ + int ret; + + /* We don't expect handle for direct IO */ + WARN_ON_ONCE(ext4_journal_current_handle()); + + ret = ext4_get_block_trans(inode, iblock, bh_result, + EXT4_GET_BLOCKS_IO_CREATE_EXT); + + /* + * When doing DIO using unwritten extents, we need io_end to convert + * unwritten extents to written on IO completion. We allocate io_end + * once we spot unwritten extent and store it in b_private. Generic + * DIO code keeps b_private set and furthermore passes the value to + * our completion callback in 'private' argument. + */ + if (!ret && buffer_unwritten(bh_result)) { + if (!bh_result->b_private) { + ext4_io_end_t *io_end; + + io_end = ext4_init_io_end(inode, GFP_KERNEL); + if (!io_end) + return -ENOMEM; + bh_result->b_private = io_end; + ext4_set_io_unwritten_flag(inode, io_end); + } + set_buffer_defer_completion(bh_result); + } + + return ret; +} + +/* + * Get block function for non-AIO DIO writes when we create unwritten extent if + * blocks are not allocated yet. The extent will be converted to written + * after IO is complete from ext4_ext_direct_IO() function. + */ +static int ext4_dio_get_block_unwritten_sync(struct inode *inode, + sector_t iblock, struct buffer_head *bh_result, int create) +{ + int ret; + + /* We don't expect handle for direct IO */ + WARN_ON_ONCE(ext4_journal_current_handle()); + + ret = ext4_get_block_trans(inode, iblock, bh_result, + EXT4_GET_BLOCKS_IO_CREATE_EXT); + + /* + * Mark inode as having pending DIO writes to unwritten extents. + * ext4_ext_direct_IO() checks this flag and converts extents to + * written. + */ + if (!ret && buffer_unwritten(bh_result)) + ext4_set_inode_state(inode, EXT4_STATE_DIO_UNWRITTEN); + + return ret; +} + +static int ext4_dio_get_block_overwrite(struct inode *inode, sector_t iblock, + struct buffer_head *bh_result, int create) +{ + int ret; + + ext4_debug("ext4_dio_get_block_overwrite: inode %lu, create flag %d\n", + inode->i_ino, create); + /* We don't expect handle for direct IO */ + WARN_ON_ONCE(ext4_journal_current_handle()); + + ret = _ext4_get_block(inode, iblock, bh_result, 0); + /* + * Blocks should have been preallocated! ext4_file_write_iter() checks + * that. + */ + WARN_ON_ONCE(!buffer_mapped(bh_result) || buffer_unwritten(bh_result)); + + return ret; +} + + +/* * `handle' can be NULL if create is zero */ struct buffer_head *ext4_getblk(handle_t *handle, struct inode *inode, @@ -930,7 +1070,7 @@ int do_journal_get_write_access(handle_t *handle, static int ext4_block_write_begin(struct page *page, loff_t pos, unsigned len, get_block_t *get_block) { - unsigned from = pos & (PAGE_CACHE_SIZE - 1); + unsigned from = pos & (PAGE_SIZE - 1); unsigned to = from + len; struct inode *inode = page->mapping->host; unsigned block_start, block_end; @@ -942,15 +1082,15 @@ static int ext4_block_write_begin(struct page *page, loff_t pos, unsigned len, bool decrypt = false; BUG_ON(!PageLocked(page)); - BUG_ON(from > PAGE_CACHE_SIZE); - BUG_ON(to > PAGE_CACHE_SIZE); + BUG_ON(from > PAGE_SIZE); + BUG_ON(to > PAGE_SIZE); BUG_ON(from > to); if (!page_has_buffers(page)) create_empty_buffers(page, blocksize, 0); head = page_buffers(page); bbits = ilog2(blocksize); - block = (sector_t)page->index << (PAGE_CACHE_SHIFT - bbits); + block = (sector_t)page->index << (PAGE_SHIFT - bbits); for (bh = head, block_start = 0; bh != head || !block_start; block++, block_start = block_end, bh = bh->b_this_page) { @@ -1032,8 +1172,8 @@ static int ext4_write_begin(struct file *file, struct address_space *mapping, * we allocate blocks but write fails for some reason */ needed_blocks = ext4_writepage_trans_blocks(inode) + 1; - index = pos >> PAGE_CACHE_SHIFT; - from = pos & (PAGE_CACHE_SIZE - 1); + index = pos >> PAGE_SHIFT; + from = pos & (PAGE_SIZE - 1); to = from + len; if (ext4_test_inode_state(inode, EXT4_STATE_MAY_INLINE_DATA)) { @@ -1061,7 +1201,7 @@ retry_grab: retry_journal: handle = ext4_journal_start(inode, EXT4_HT_WRITE_PAGE, needed_blocks); if (IS_ERR(handle)) { - page_cache_release(page); + put_page(page); return PTR_ERR(handle); } @@ -1069,7 +1209,7 @@ retry_journal: if (page->mapping != mapping) { /* The page got truncated from under us */ unlock_page(page); - page_cache_release(page); + put_page(page); ext4_journal_stop(handle); goto retry_grab; } @@ -1079,13 +1219,14 @@ retry_journal: #ifdef CONFIG_EXT4_FS_ENCRYPTION if (ext4_should_dioread_nolock(inode)) ret = ext4_block_write_begin(page, pos, len, - ext4_get_block_write); + ext4_get_block_unwritten); else ret = ext4_block_write_begin(page, pos, len, ext4_get_block); #else if (ext4_should_dioread_nolock(inode)) - ret = __block_write_begin(page, pos, len, ext4_get_block_write); + ret = __block_write_begin(page, pos, len, + ext4_get_block_unwritten); else ret = __block_write_begin(page, pos, len, ext4_get_block); #endif @@ -1124,7 +1265,7 @@ retry_journal: if (ret == -ENOSPC && ext4_should_retry_alloc(inode->i_sb, &retries)) goto retry_journal; - page_cache_release(page); + put_page(page); return ret; } *pagep = page; @@ -1163,15 +1304,6 @@ static int ext4_write_end(struct file *file, int i_size_changed = 0; trace_ext4_write_end(inode, pos, len, copied); - if (ext4_test_inode_state(inode, EXT4_STATE_ORDERED_MODE)) { - ret = ext4_jbd2_file_inode(handle, inode); - if (ret) { - unlock_page(page); - page_cache_release(page); - goto errout; - } - } - if (ext4_has_inline_data(inode)) { ret = ext4_write_inline_data_end(inode, pos, len, copied, page); @@ -1187,7 +1319,7 @@ static int ext4_write_end(struct file *file, */ i_size_changed = ext4_update_inode_size(inode, pos + copied); unlock_page(page); - page_cache_release(page); + put_page(page); if (old_size < pos) pagecache_isize_extended(inode, old_size, pos); @@ -1271,7 +1403,7 @@ static int ext4_journalled_write_end(struct file *file, int size_changed = 0; trace_ext4_journalled_write_end(inode, pos, len, copied); - from = pos & (PAGE_CACHE_SIZE - 1); + from = pos & (PAGE_SIZE - 1); to = from + len; BUG_ON(!ext4_handle_valid(handle)); @@ -1295,7 +1427,7 @@ static int ext4_journalled_write_end(struct file *file, ext4_set_inode_state(inode, EXT4_STATE_JDATA); EXT4_I(inode)->i_datasync_tid = handle->h_transaction->t_tid; unlock_page(page); - page_cache_release(page); + put_page(page); if (old_size < pos) pagecache_isize_extended(inode, old_size, pos); @@ -1409,7 +1541,7 @@ static void ext4_da_page_release_reservation(struct page *page, int num_clusters; ext4_fsblk_t lblk; - BUG_ON(stop > PAGE_CACHE_SIZE || stop < length); + BUG_ON(stop > PAGE_SIZE || stop < length); head = page_buffers(page); bh = head; @@ -1425,7 +1557,7 @@ static void ext4_da_page_release_reservation(struct page *page, clear_buffer_delay(bh); } else if (contiguous_blks) { lblk = page->index << - (PAGE_CACHE_SHIFT - inode->i_blkbits); + (PAGE_SHIFT - inode->i_blkbits); lblk += (curr_off >> inode->i_blkbits) - contiguous_blks; ext4_es_remove_extent(inode, lblk, contiguous_blks); @@ -1435,7 +1567,7 @@ static void ext4_da_page_release_reservation(struct page *page, } while ((bh = bh->b_this_page) != head); if (contiguous_blks) { - lblk = page->index << (PAGE_CACHE_SHIFT - inode->i_blkbits); + lblk = page->index << (PAGE_SHIFT - inode->i_blkbits); lblk += (curr_off >> inode->i_blkbits) - contiguous_blks; ext4_es_remove_extent(inode, lblk, contiguous_blks); } @@ -1444,7 +1576,7 @@ static void ext4_da_page_release_reservation(struct page *page, * need to release the reserved space for that cluster. */ num_clusters = EXT4_NUM_B2C(sbi, to_release); while (num_clusters > 0) { - lblk = (page->index << (PAGE_CACHE_SHIFT - inode->i_blkbits)) + + lblk = (page->index << (PAGE_SHIFT - inode->i_blkbits)) + ((num_clusters - 1) << sbi->s_cluster_bits); if (sbi->s_cluster_ratio == 1 || !ext4_find_delalloc_cluster(inode, lblk)) @@ -1491,8 +1623,8 @@ static void mpage_release_unused_pages(struct mpage_da_data *mpd, end = mpd->next_page - 1; if (invalidate) { ext4_lblk_t start, last; - start = index << (PAGE_CACHE_SHIFT - inode->i_blkbits); - last = end << (PAGE_CACHE_SHIFT - inode->i_blkbits); + start = index << (PAGE_SHIFT - inode->i_blkbits); + last = end << (PAGE_SHIFT - inode->i_blkbits); ext4_es_remove_extent(inode, start, last - start + 1); } @@ -1508,7 +1640,7 @@ static void mpage_release_unused_pages(struct mpage_da_data *mpd, BUG_ON(!PageLocked(page)); BUG_ON(PageWriteback(page)); if (invalidate) { - block_invalidatepage(page, 0, PAGE_CACHE_SIZE); + block_invalidatepage(page, 0, PAGE_SIZE); ClearPageUptodate(page); } unlock_page(page); @@ -1879,10 +2011,10 @@ static int ext4_writepage(struct page *page, trace_ext4_writepage(page); size = i_size_read(inode); - if (page->index == size >> PAGE_CACHE_SHIFT) - len = size & ~PAGE_CACHE_MASK; + if (page->index == size >> PAGE_SHIFT) + len = size & ~PAGE_MASK; else - len = PAGE_CACHE_SIZE; + len = PAGE_SIZE; page_bufs = page_buffers(page); /* @@ -1906,7 +2038,7 @@ static int ext4_writepage(struct page *page, ext4_bh_delay_or_unwritten)) { redirty_page_for_writepage(wbc, page); if ((current->flags & PF_MEMALLOC) || - (inode->i_sb->s_blocksize == PAGE_CACHE_SIZE)) { + (inode->i_sb->s_blocksize == PAGE_SIZE)) { /* * For memory cleaning there's no point in writing only * some buffers. So just bail out. Warn if we came here @@ -1948,10 +2080,10 @@ static int mpage_submit_page(struct mpage_da_data *mpd, struct page *page) int err; BUG_ON(page->index != mpd->first_page); - if (page->index == size >> PAGE_CACHE_SHIFT) - len = size & ~PAGE_CACHE_MASK; + if (page->index == size >> PAGE_SHIFT) + len = size & ~PAGE_MASK; else - len = PAGE_CACHE_SIZE; + len = PAGE_SIZE; clear_page_dirty_for_io(page); err = ext4_bio_write_page(&mpd->io_submit, page, len, mpd->wbc, false); if (!err) @@ -2085,7 +2217,7 @@ static int mpage_map_and_submit_buffers(struct mpage_da_data *mpd) int nr_pages, i; struct inode *inode = mpd->inode; struct buffer_head *head, *bh; - int bpp_bits = PAGE_CACHE_SHIFT - inode->i_blkbits; + int bpp_bits = PAGE_SHIFT - inode->i_blkbits; pgoff_t start, end; ext4_lblk_t lblk; sector_t pblock; @@ -2146,7 +2278,7 @@ static int mpage_map_and_submit_buffers(struct mpage_da_data *mpd) * supports blocksize < pagesize as we will try to * convert potentially unmapped parts of inode. */ - mpd->io_submit.io_end->size += PAGE_CACHE_SIZE; + mpd->io_submit.io_end->size += PAGE_SIZE; /* Page fully mapped - let IO run! */ err = mpage_submit_page(mpd, page); if (err < 0) { @@ -2298,7 +2430,7 @@ update_disksize: * Update on-disk size after IO is submitted. Races with * truncate are avoided by checking i_size under i_data_sem. */ - disksize = ((loff_t)mpd->first_page) << PAGE_CACHE_SHIFT; + disksize = ((loff_t)mpd->first_page) << PAGE_SHIFT; if (disksize > EXT4_I(inode)->i_disksize) { int err2; loff_t i_size; @@ -2434,7 +2566,7 @@ static int mpage_prepare_extent_to_map(struct mpage_da_data *mpd) mpd->next_page = page->index + 1; /* Add all dirty buffers to mpd */ lblk = ((ext4_lblk_t)page->index) << - (PAGE_CACHE_SHIFT - blkbits); + (PAGE_SHIFT - blkbits); head = page_buffers(page); err = mpage_process_page_bufs(mpd, head, head, lblk); if (err <= 0) @@ -2519,7 +2651,7 @@ static int ext4_writepages(struct address_space *mapping, * We may need to convert up to one extent per block in * the page and we may dirty the inode. */ - rsv_blocks = 1 + (PAGE_CACHE_SIZE >> inode->i_blkbits); + rsv_blocks = 1 + (PAGE_SIZE >> inode->i_blkbits); } /* @@ -2550,8 +2682,8 @@ static int ext4_writepages(struct address_space *mapping, mpd.first_page = writeback_index; mpd.last_page = -1; } else { - mpd.first_page = wbc->range_start >> PAGE_CACHE_SHIFT; - mpd.last_page = wbc->range_end >> PAGE_CACHE_SHIFT; + mpd.first_page = wbc->range_start >> PAGE_SHIFT; + mpd.last_page = wbc->range_end >> PAGE_SHIFT; } mpd.inode = inode; @@ -2710,7 +2842,7 @@ static int ext4_da_write_begin(struct file *file, struct address_space *mapping, struct inode *inode = mapping->host; handle_t *handle; - index = pos >> PAGE_CACHE_SHIFT; + index = pos >> PAGE_SHIFT; if (ext4_nonda_switch(inode->i_sb)) { *fsdata = (void *)FALL_BACK_TO_NONDELALLOC; @@ -2753,7 +2885,7 @@ retry_journal: handle = ext4_journal_start(inode, EXT4_HT_WRITE_PAGE, ext4_da_write_credits(inode, pos, len)); if (IS_ERR(handle)) { - page_cache_release(page); + put_page(page); return PTR_ERR(handle); } @@ -2761,7 +2893,7 @@ retry_journal: if (page->mapping != mapping) { /* The page got truncated from under us */ unlock_page(page); - page_cache_release(page); + put_page(page); ext4_journal_stop(handle); goto retry_grab; } @@ -2789,7 +2921,7 @@ retry_journal: ext4_should_retry_alloc(inode->i_sb, &retries)) goto retry_journal; - page_cache_release(page); + put_page(page); return ret; } @@ -2837,7 +2969,7 @@ static int ext4_da_write_end(struct file *file, len, copied, page, fsdata); trace_ext4_da_write_end(inode, pos, len, copied); - start = pos & (PAGE_CACHE_SIZE - 1); + start = pos & (PAGE_SIZE - 1); end = start + copied - 1; /* @@ -3059,7 +3191,7 @@ static int __ext4_journalled_invalidatepage(struct page *page, /* * If it's a full truncate we just forget about the pending dirtying */ - if (offset == 0 && length == PAGE_CACHE_SIZE) + if (offset == 0 && length == PAGE_SIZE) ClearPageChecked(page); return jbd2_journal_invalidatepage(journal, page, offset, length); @@ -3088,37 +3220,6 @@ static int ext4_releasepage(struct page *page, gfp_t wait) return try_to_free_buffers(page); } -/* - * ext4_get_block used when preparing for a DIO write or buffer write. - * We allocate an uinitialized extent if blocks haven't been allocated. - * The extent will be converted to initialized after the IO is complete. - */ -int ext4_get_block_write(struct inode *inode, sector_t iblock, - struct buffer_head *bh_result, int create) -{ - ext4_debug("ext4_get_block_write: inode %lu, create flag %d\n", - inode->i_ino, create); - return _ext4_get_block(inode, iblock, bh_result, - EXT4_GET_BLOCKS_IO_CREATE_EXT); -} - -static int ext4_get_block_overwrite(struct inode *inode, sector_t iblock, - struct buffer_head *bh_result, int create) -{ - int ret; - - ext4_debug("ext4_get_block_overwrite: inode %lu, create flag %d\n", - inode->i_ino, create); - ret = _ext4_get_block(inode, iblock, bh_result, 0); - /* - * Blocks should have been preallocated! ext4_file_write_iter() checks - * that. - */ - WARN_ON_ONCE(!buffer_mapped(bh_result)); - - return ret; -} - #ifdef CONFIG_FS_DAX int ext4_dax_mmap_get_block(struct inode *inode, sector_t iblock, struct buffer_head *bh_result, int create) @@ -3179,13 +3280,12 @@ out: WARN_ON_ONCE(ret == 0 && create); if (ret > 0) { map_bh(bh_result, inode->i_sb, map.m_pblk); - bh_result->b_state = (bh_result->b_state & ~EXT4_MAP_FLAGS) | - map.m_flags; /* * At least for now we have to clear BH_New so that DAX code * doesn't attempt to zero blocks again in a racy way. */ - bh_result->b_state &= ~(1 << BH_New); + map.m_flags &= ~EXT4_MAP_NEW; + ext4_update_bh_state(bh_result, map.m_flags); bh_result->b_size = map.m_len << inode->i_blkbits; ret = 0; } @@ -3193,24 +3293,32 @@ out: } #endif -static void ext4_end_io_dio(struct kiocb *iocb, loff_t offset, +static int ext4_end_io_dio(struct kiocb *iocb, loff_t offset, ssize_t size, void *private) { - ext4_io_end_t *io_end = iocb->private; + ext4_io_end_t *io_end = private; /* if not async direct IO just return */ if (!io_end) - return; + return 0; ext_debug("ext4_end_io_dio(): io_end 0x%p " "for inode %lu, iocb 0x%p, offset %llu, size %zd\n", - iocb->private, io_end->inode->i_ino, iocb, offset, - size); + io_end, io_end->inode->i_ino, iocb, offset, size); - iocb->private = NULL; + /* + * Error during AIO DIO. We cannot convert unwritten extents as the + * data was not written. Just clear the unwritten flag and drop io_end. + */ + if (size <= 0) { + ext4_clear_io_unwritten_flag(io_end); + size = 0; + } io_end->offset = offset; io_end->size = size; ext4_put_io_end(io_end); + + return 0; } /* @@ -3243,7 +3351,6 @@ static ssize_t ext4_ext_direct_IO(struct kiocb *iocb, struct iov_iter *iter, get_block_t *get_block_func = NULL; int dio_flags = 0; loff_t final_size = offset + count; - ext4_io_end_t *io_end = NULL; /* Use the old path for reads and writes beyond i_size. */ if (iov_iter_rw(iter) != WRITE || final_size > inode->i_size) @@ -3268,16 +3375,17 @@ static ssize_t ext4_ext_direct_IO(struct kiocb *iocb, struct iov_iter *iter, /* * We could direct write to holes and fallocate. * - * Allocated blocks to fill the hole are marked as - * unwritten to prevent parallel buffered read to expose - * the stale data before DIO complete the data IO. + * Allocated blocks to fill the hole are marked as unwritten to prevent + * parallel buffered read to expose the stale data before DIO complete + * the data IO. * - * As to previously fallocated extents, ext4 get_block will - * just simply mark the buffer mapped but still keep the - * extents unwritten. + * As to previously fallocated extents, ext4 get_block will just simply + * mark the buffer mapped but still keep the extents unwritten. * - * For non AIO case, we will convert those unwritten extents - * to written after return back from blockdev_direct_IO. + * For non AIO case, we will convert those unwritten extents to written + * after return back from blockdev_direct_IO. That way we save us from + * allocating io_end structure and also the overhead of offloading + * the extent convertion to a workqueue. * * For async DIO, the conversion needs to be deferred when the * IO is completed. The ext4 end_io callback function will be @@ -3285,30 +3393,13 @@ static ssize_t ext4_ext_direct_IO(struct kiocb *iocb, struct iov_iter *iter, * case, we allocate an io_end structure to hook to the iocb. */ iocb->private = NULL; - if (overwrite) { - get_block_func = ext4_get_block_overwrite; + if (overwrite) + get_block_func = ext4_dio_get_block_overwrite; + else if (is_sync_kiocb(iocb)) { + get_block_func = ext4_dio_get_block_unwritten_sync; + dio_flags = DIO_LOCKING; } else { - ext4_inode_aio_set(inode, NULL); - if (!is_sync_kiocb(iocb)) { - io_end = ext4_init_io_end(inode, GFP_NOFS); - if (!io_end) { - ret = -ENOMEM; - goto retake_lock; - } - /* - * Grab reference for DIO. Will be dropped in - * ext4_end_io_dio() - */ - iocb->private = ext4_get_io_end(io_end); - /* - * we save the io structure for current async direct - * IO, so that later ext4_map_blocks() could flag the - * io structure whether there is a unwritten extents - * needs to be converted when IO is completed. - */ - ext4_inode_aio_set(inode, io_end); - } - get_block_func = ext4_get_block_write; + get_block_func = ext4_dio_get_block_unwritten_async; dio_flags = DIO_LOCKING; } #ifdef CONFIG_EXT4_FS_ENCRYPTION @@ -3323,27 +3414,6 @@ static ssize_t ext4_ext_direct_IO(struct kiocb *iocb, struct iov_iter *iter, get_block_func, ext4_end_io_dio, NULL, dio_flags); - /* - * Put our reference to io_end. This can free the io_end structure e.g. - * in sync IO case or in case of error. It can even perform extent - * conversion if all bios we submitted finished before we got here. - * Note that in that case iocb->private can be already set to NULL - * here. - */ - if (io_end) { - ext4_inode_aio_set(inode, NULL); - ext4_put_io_end(io_end); - /* - * When no IO was submitted ext4_end_io_dio() was not - * called so we have to put iocb's reference. - */ - if (ret <= 0 && ret != -EIOCBQUEUED && iocb->private) { - WARN_ON(iocb->private != io_end); - WARN_ON(io_end->flag & EXT4_IO_END_UNWRITTEN); - ext4_put_io_end(io_end); - iocb->private = NULL; - } - } if (ret > 0 && !overwrite && ext4_test_inode_state(inode, EXT4_STATE_DIO_UNWRITTEN)) { int err; @@ -3358,7 +3428,6 @@ static ssize_t ext4_ext_direct_IO(struct kiocb *iocb, struct iov_iter *iter, ext4_clear_inode_state(inode, EXT4_STATE_DIO_UNWRITTEN); } -retake_lock: if (iov_iter_rw(iter) == WRITE) inode_dio_end(inode); /* take i_mutex locking again if we do a ovewrite dio */ @@ -3491,8 +3560,8 @@ void ext4_set_aops(struct inode *inode) static int __ext4_block_zero_page_range(handle_t *handle, struct address_space *mapping, loff_t from, loff_t length) { - ext4_fsblk_t index = from >> PAGE_CACHE_SHIFT; - unsigned offset = from & (PAGE_CACHE_SIZE-1); + ext4_fsblk_t index = from >> PAGE_SHIFT; + unsigned offset = from & (PAGE_SIZE-1); unsigned blocksize, pos; ext4_lblk_t iblock; struct inode *inode = mapping->host; @@ -3500,14 +3569,14 @@ static int __ext4_block_zero_page_range(handle_t *handle, struct page *page; int err = 0; - page = find_or_create_page(mapping, from >> PAGE_CACHE_SHIFT, + page = find_or_create_page(mapping, from >> PAGE_SHIFT, mapping_gfp_constraint(mapping, ~__GFP_FS)); if (!page) return -ENOMEM; blocksize = inode->i_sb->s_blocksize; - iblock = index << (PAGE_CACHE_SHIFT - inode->i_sb->s_blocksize_bits); + iblock = index << (PAGE_SHIFT - inode->i_sb->s_blocksize_bits); if (!page_has_buffers(page)) create_empty_buffers(page, blocksize, 0); @@ -3549,7 +3618,7 @@ static int __ext4_block_zero_page_range(handle_t *handle, ext4_encrypted_inode(inode)) { /* We expect the key to be set. */ BUG_ON(!ext4_has_encryption_key(inode)); - BUG_ON(blocksize != PAGE_CACHE_SIZE); + BUG_ON(blocksize != PAGE_SIZE); WARN_ON_ONCE(ext4_decrypt(page)); } } @@ -3573,7 +3642,7 @@ static int __ext4_block_zero_page_range(handle_t *handle, unlock: unlock_page(page); - page_cache_release(page); + put_page(page); return err; } @@ -3588,7 +3657,7 @@ static int ext4_block_zero_page_range(handle_t *handle, struct address_space *mapping, loff_t from, loff_t length) { struct inode *inode = mapping->host; - unsigned offset = from & (PAGE_CACHE_SIZE-1); + unsigned offset = from & (PAGE_SIZE-1); unsigned blocksize = inode->i_sb->s_blocksize; unsigned max = blocksize - (offset & (blocksize - 1)); @@ -3613,7 +3682,7 @@ static int ext4_block_zero_page_range(handle_t *handle, static int ext4_block_truncate_page(handle_t *handle, struct address_space *mapping, loff_t from) { - unsigned offset = from & (PAGE_CACHE_SIZE-1); + unsigned offset = from & (PAGE_SIZE-1); unsigned length; unsigned blocksize; struct inode *inode = mapping->host; @@ -3751,7 +3820,7 @@ int ext4_punch_hole(struct inode *inode, loff_t offset, loff_t length) */ if (offset + length > inode->i_size) { length = inode->i_size + - PAGE_CACHE_SIZE - (inode->i_size & (PAGE_CACHE_SIZE - 1)) - + PAGE_SIZE - (inode->i_size & (PAGE_SIZE - 1)) - offset; } @@ -4826,23 +4895,23 @@ static void ext4_wait_for_tail_page_commit(struct inode *inode) tid_t commit_tid = 0; int ret; - offset = inode->i_size & (PAGE_CACHE_SIZE - 1); + offset = inode->i_size & (PAGE_SIZE - 1); /* * All buffers in the last page remain valid? Then there's nothing to - * do. We do the check mainly to optimize the common PAGE_CACHE_SIZE == + * do. We do the check mainly to optimize the common PAGE_SIZE == * blocksize case */ - if (offset > PAGE_CACHE_SIZE - (1 << inode->i_blkbits)) + if (offset > PAGE_SIZE - (1 << inode->i_blkbits)) return; while (1) { page = find_lock_page(inode->i_mapping, - inode->i_size >> PAGE_CACHE_SHIFT); + inode->i_size >> PAGE_SHIFT); if (!page) return; ret = __ext4_journalled_invalidatepage(page, offset, - PAGE_CACHE_SIZE - offset); + PAGE_SIZE - offset); unlock_page(page); - page_cache_release(page); + put_page(page); if (ret != -EBUSY) return; commit_tid = 0; @@ -5481,10 +5550,10 @@ int ext4_page_mkwrite(struct vm_area_struct *vma, struct vm_fault *vmf) goto out; } - if (page->index == size >> PAGE_CACHE_SHIFT) - len = size & ~PAGE_CACHE_MASK; + if (page->index == size >> PAGE_SHIFT) + len = size & ~PAGE_MASK; else - len = PAGE_CACHE_SIZE; + len = PAGE_SIZE; /* * Return if we have all the buffers mapped. This avoids the need to do * journal_start/journal_stop which can block and take a long time @@ -5502,7 +5571,7 @@ int ext4_page_mkwrite(struct vm_area_struct *vma, struct vm_fault *vmf) unlock_page(page); /* OK, we need to fill the hole... */ if (ext4_should_dioread_nolock(inode)) - get_block = ext4_get_block_write; + get_block = ext4_get_block_unwritten; else get_block = ext4_get_block; retry_alloc: @@ -5515,7 +5584,7 @@ retry_alloc: ret = block_page_mkwrite(vma, vmf, get_block); if (!ret && ext4_should_journal_data(inode)) { if (ext4_walk_page_buffers(handle, page_buffers(page), 0, - PAGE_CACHE_SIZE, NULL, do_journal_get_write_access)) { + PAGE_SIZE, NULL, do_journal_get_write_access)) { unlock_page(page); ret = VM_FAULT_SIGBUS; ext4_journal_stop(handle); @@ -5545,3 +5614,70 @@ int ext4_filemap_fault(struct vm_area_struct *vma, struct vm_fault *vmf) return err; } + +/* + * Find the first extent at or after @lblk in an inode that is not a hole. + * Search for @map_len blocks at most. The extent is returned in @result. + * + * The function returns 1 if we found an extent. The function returns 0 in + * case there is no extent at or after @lblk and in that case also sets + * @result->es_len to 0. In case of error, the error code is returned. + */ +int ext4_get_next_extent(struct inode *inode, ext4_lblk_t lblk, + unsigned int map_len, struct extent_status *result) +{ + struct ext4_map_blocks map; + struct extent_status es = {}; + int ret; + + map.m_lblk = lblk; + map.m_len = map_len; + + /* + * For non-extent based files this loop may iterate several times since + * we do not determine full hole size. + */ + while (map.m_len > 0) { + ret = ext4_map_blocks(NULL, inode, &map, 0); + if (ret < 0) + return ret; + /* There's extent covering m_lblk? Just return it. */ + if (ret > 0) { + int status; + + ext4_es_store_pblock(result, map.m_pblk); + result->es_lblk = map.m_lblk; + result->es_len = map.m_len; + if (map.m_flags & EXT4_MAP_UNWRITTEN) + status = EXTENT_STATUS_UNWRITTEN; + else + status = EXTENT_STATUS_WRITTEN; + ext4_es_store_status(result, status); + return 1; + } + ext4_es_find_delayed_extent_range(inode, map.m_lblk, + map.m_lblk + map.m_len - 1, + &es); + /* Is delalloc data before next block in extent tree? */ + if (es.es_len && es.es_lblk < map.m_lblk + map.m_len) { + ext4_lblk_t offset = 0; + + if (es.es_lblk < lblk) + offset = lblk - es.es_lblk; + result->es_lblk = es.es_lblk + offset; + ext4_es_store_pblock(result, + ext4_es_pblock(&es) + offset); + result->es_len = es.es_len - offset; + ext4_es_store_status(result, ext4_es_status(&es)); + + return 1; + } + /* There's a hole at m_lblk, advance us after it */ + map.m_lblk += map.m_len; + map_len -= map.m_len; + map.m_len = map_len; + cond_resched(); + } + result->es_len = 0; + return 0; +} |