diff options
Diffstat (limited to 'fs/btrfs/disk-io.c')
-rw-r--r-- | fs/btrfs/disk-io.c | 130 |
1 files changed, 73 insertions, 57 deletions
diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c index d8d68af5a..4e47849d7 100644 --- a/fs/btrfs/disk-io.c +++ b/fs/btrfs/disk-io.c @@ -25,7 +25,6 @@ #include <linux/buffer_head.h> #include <linux/workqueue.h> #include <linux/kthread.h> -#include <linux/freezer.h> #include <linux/slab.h> #include <linux/migrate.h> #include <linux/ratelimit.h> @@ -50,6 +49,7 @@ #include "raid56.h" #include "sysfs.h" #include "qgroup.h" +#include "compression.h" #ifdef CONFIG_X86 #include <asm/cpufeature.h> @@ -110,8 +110,7 @@ int __init btrfs_end_io_wq_init(void) void btrfs_end_io_wq_exit(void) { - if (btrfs_end_io_wq_cache) - kmem_cache_destroy(btrfs_end_io_wq_cache); + kmem_cache_destroy(btrfs_end_io_wq_cache); } /* @@ -303,7 +302,7 @@ static int csum_tree_block(struct btrfs_fs_info *fs_info, err = map_private_extent_buffer(buf, offset, 32, &kaddr, &map_start, &map_len); if (err) - return 1; + return err; cur_len = min(len, map_len - (offset - map_start)); crc = btrfs_csum_data(kaddr + offset - map_start, crc, cur_len); @@ -313,7 +312,7 @@ static int csum_tree_block(struct btrfs_fs_info *fs_info, if (csum_size > sizeof(inline_result)) { result = kzalloc(csum_size, GFP_NOFS); if (!result) - return 1; + return -ENOMEM; } else { result = (char *)&inline_result; } @@ -334,7 +333,7 @@ static int csum_tree_block(struct btrfs_fs_info *fs_info, val, found, btrfs_header_level(buf)); if (result != (char *)&inline_result) kfree(result); - return 1; + return -EUCLEAN; } } else { write_extent_buffer(buf, result, 0, csum_size); @@ -513,11 +512,21 @@ static int csum_dirty_buffer(struct btrfs_fs_info *fs_info, struct page *page) eb = (struct extent_buffer *)page->private; if (page != eb->pages[0]) return 0; + found_start = btrfs_header_bytenr(eb); - if (WARN_ON(found_start != start || !PageUptodate(page))) - return 0; - csum_tree_block(fs_info, eb, 0); - return 0; + /* + * Please do not consolidate these warnings into a single if. + * It is useful to know what went wrong. + */ + if (WARN_ON(found_start != start)) + return -EUCLEAN; + if (WARN_ON(!PageUptodate(page))) + return -EUCLEAN; + + ASSERT(memcmp_extent_buffer(eb, fs_info->fsid, + btrfs_header_fsid(), BTRFS_FSID_SIZE) == 0); + + return csum_tree_block(fs_info, eb, 0); } static int check_tree_block_fsid(struct btrfs_fs_info *fs_info, @@ -612,6 +621,7 @@ static int btree_readpage_end_io_hook(struct btrfs_io_bio *io_bio, int found_level; struct extent_buffer *eb; struct btrfs_root *root = BTRFS_I(page->mapping->host)->root; + struct btrfs_fs_info *fs_info = root->fs_info; int ret = 0; int reads_done; @@ -637,21 +647,21 @@ static int btree_readpage_end_io_hook(struct btrfs_io_bio *io_bio, found_start = btrfs_header_bytenr(eb); if (found_start != eb->start) { - btrfs_err_rl(eb->fs_info, "bad tree block start %llu %llu", - found_start, eb->start); + btrfs_err_rl(fs_info, "bad tree block start %llu %llu", + found_start, eb->start); ret = -EIO; goto err; } - if (check_tree_block_fsid(root->fs_info, eb)) { - btrfs_err_rl(eb->fs_info, "bad fsid on block %llu", - eb->start); + if (check_tree_block_fsid(fs_info, eb)) { + btrfs_err_rl(fs_info, "bad fsid on block %llu", + eb->start); ret = -EIO; goto err; } found_level = btrfs_header_level(eb); if (found_level >= BTRFS_MAX_LEVEL) { - btrfs_err(root->fs_info, "bad tree block level %d", - (int)btrfs_header_level(eb)); + btrfs_err(fs_info, "bad tree block level %d", + (int)btrfs_header_level(eb)); ret = -EIO; goto err; } @@ -659,11 +669,9 @@ static int btree_readpage_end_io_hook(struct btrfs_io_bio *io_bio, btrfs_set_buffer_lockdep_class(btrfs_header_owner(eb), eb, found_level); - ret = csum_tree_block(root->fs_info, eb, 1); - if (ret) { - ret = -EIO; + ret = csum_tree_block(fs_info, eb, 1); + if (ret) goto err; - } /* * If this is a leaf block and it is corrupt, set the corrupt bit so @@ -680,7 +688,7 @@ static int btree_readpage_end_io_hook(struct btrfs_io_bio *io_bio, err: if (reads_done && test_and_clear_bit(EXTENT_BUFFER_READAHEAD, &eb->bflags)) - btree_readahead_hook(root, eb, eb->start, ret); + btree_readahead_hook(fs_info, eb, eb->start, ret); if (ret) { /* @@ -699,14 +707,13 @@ out: static int btree_io_failed_hook(struct page *page, int failed_mirror) { struct extent_buffer *eb; - struct btrfs_root *root = BTRFS_I(page->mapping->host)->root; eb = (struct extent_buffer *)page->private; set_bit(EXTENT_BUFFER_READ_ERR, &eb->bflags); eb->read_mirror = failed_mirror; atomic_dec(&eb->io_pages); if (test_and_clear_bit(EXTENT_BUFFER_READAHEAD, &eb->bflags)) - btree_readahead_hook(root, eb, eb->start, -EIO); + btree_readahead_hook(eb->fs_info, eb, eb->start, -EIO); return -EIO; /* we fixed nothing */ } @@ -816,7 +823,7 @@ static void run_one_async_done(struct btrfs_work *work) waitqueue_active(&fs_info->async_submit_wait)) wake_up(&fs_info->async_submit_wait); - /* If an error occured we just want to clean up the bio and move on */ + /* If an error occurred we just want to clean up the bio and move on */ if (async->error) { async->bio->bi_error = async->error; bio_endio(async->bio); @@ -931,7 +938,7 @@ static int check_async_write(struct inode *inode, unsigned long bio_flags) if (bio_flags & EXTENT_BIO_TREE_LOG) return 0; #ifdef CONFIG_X86 - if (static_cpu_has_safe(X86_FEATURE_XMM4_2)) + if (static_cpu_has(X86_FEATURE_XMM4_2)) return 0; #endif return 1; @@ -1055,7 +1062,7 @@ static void btree_invalidatepage(struct page *page, unsigned int offset, (unsigned long long)page_offset(page)); ClearPagePrivate(page); set_page_private(page, 0); - page_cache_release(page); + put_page(page); } } @@ -1296,9 +1303,10 @@ static void __setup_root(u32 nodesize, u32 sectorsize, u32 stripesize, spin_lock_init(&root->root_item_lock); } -static struct btrfs_root *btrfs_alloc_root(struct btrfs_fs_info *fs_info) +static struct btrfs_root *btrfs_alloc_root(struct btrfs_fs_info *fs_info, + gfp_t flags) { - struct btrfs_root *root = kzalloc(sizeof(*root), GFP_NOFS); + struct btrfs_root *root = kzalloc(sizeof(*root), flags); if (root) root->fs_info = fs_info; return root; @@ -1310,7 +1318,7 @@ struct btrfs_root *btrfs_alloc_dummy_root(void) { struct btrfs_root *root; - root = btrfs_alloc_root(NULL); + root = btrfs_alloc_root(NULL, GFP_KERNEL); if (!root) return ERR_PTR(-ENOMEM); __setup_root(4096, 4096, 4096, root, NULL, 1); @@ -1332,7 +1340,7 @@ struct btrfs_root *btrfs_create_tree(struct btrfs_trans_handle *trans, int ret = 0; uuid_le uuid; - root = btrfs_alloc_root(fs_info); + root = btrfs_alloc_root(fs_info, GFP_KERNEL); if (!root) return ERR_PTR(-ENOMEM); @@ -1408,7 +1416,7 @@ static struct btrfs_root *alloc_log_tree(struct btrfs_trans_handle *trans, struct btrfs_root *tree_root = fs_info->tree_root; struct extent_buffer *leaf; - root = btrfs_alloc_root(fs_info); + root = btrfs_alloc_root(fs_info, GFP_NOFS); if (!root) return ERR_PTR(-ENOMEM); @@ -1506,7 +1514,7 @@ static struct btrfs_root *btrfs_read_tree_root(struct btrfs_root *tree_root, if (!path) return ERR_PTR(-ENOMEM); - root = btrfs_alloc_root(fs_info); + root = btrfs_alloc_root(fs_info, GFP_NOFS); if (!root) { ret = -ENOMEM; goto alloc_fail; @@ -1756,7 +1764,7 @@ static int setup_bdi(struct btrfs_fs_info *info, struct backing_dev_info *bdi) if (err) return err; - bdi->ra_pages = VM_MAX_READAHEAD * 1024 / PAGE_CACHE_SIZE; + bdi->ra_pages = VM_MAX_READAHEAD * 1024 / PAGE_SIZE; bdi->congested_fn = btrfs_congested_fn; bdi->congested_data = info; bdi->capabilities |= BDI_CAP_CGROUP_WRITEBACK; @@ -1920,14 +1928,12 @@ sleep: if (unlikely(test_bit(BTRFS_FS_STATE_ERROR, &root->fs_info->fs_state))) btrfs_cleanup_transaction(root); - if (!try_to_freeze()) { - set_current_state(TASK_INTERRUPTIBLE); - if (!kthread_should_stop() && - (!btrfs_transaction_blocked(root->fs_info) || - cannot_commit)) - schedule_timeout(delay); - __set_current_state(TASK_RUNNING); - } + set_current_state(TASK_INTERRUPTIBLE); + if (!kthread_should_stop() && + (!btrfs_transaction_blocked(root->fs_info) || + cannot_commit)) + schedule_timeout(delay); + __set_current_state(TASK_RUNNING); } while (!kthread_should_stop()); return 0; } @@ -2272,9 +2278,11 @@ static void btrfs_init_dev_replace_locks(struct btrfs_fs_info *fs_info) fs_info->dev_replace.lock_owner = 0; atomic_set(&fs_info->dev_replace.nesting_level, 0); mutex_init(&fs_info->dev_replace.lock_finishing_cancel_unmount); - mutex_init(&fs_info->dev_replace.lock_management_lock); - mutex_init(&fs_info->dev_replace.lock); + rwlock_init(&fs_info->dev_replace.lock); + atomic_set(&fs_info->dev_replace.read_locks, 0); + atomic_set(&fs_info->dev_replace.blocking_readers, 0); init_waitqueue_head(&fs_info->replace_wait); + init_waitqueue_head(&fs_info->dev_replace.read_lock_wq); } static void btrfs_init_qgroup(struct btrfs_fs_info *fs_info) @@ -2385,7 +2393,7 @@ static int btrfs_replay_log(struct btrfs_fs_info *fs_info, return -EIO; } - log_tree_root = btrfs_alloc_root(fs_info); + log_tree_root = btrfs_alloc_root(fs_info, GFP_KERNEL); if (!log_tree_root) return -ENOMEM; @@ -2510,8 +2518,8 @@ int open_ctree(struct super_block *sb, int backup_index = 0; int max_active; - tree_root = fs_info->tree_root = btrfs_alloc_root(fs_info); - chunk_root = fs_info->chunk_root = btrfs_alloc_root(fs_info); + tree_root = fs_info->tree_root = btrfs_alloc_root(fs_info, GFP_KERNEL); + chunk_root = fs_info->chunk_root = btrfs_alloc_root(fs_info, GFP_KERNEL); if (!tree_root || !chunk_root) { err = -ENOMEM; goto fail; @@ -2534,7 +2542,7 @@ int open_ctree(struct super_block *sb, err = ret; goto fail_bdi; } - fs_info->dirty_metadata_batch = PAGE_CACHE_SIZE * + fs_info->dirty_metadata_batch = PAGE_SIZE * (1 + ilog2(nr_cpu_ids)); ret = percpu_counter_init(&fs_info->delalloc_bytes, 0, GFP_KERNEL); @@ -2603,6 +2611,7 @@ int open_ctree(struct super_block *sb, atomic_set(&fs_info->nr_async_bios, 0); atomic_set(&fs_info->defrag_running, 0); atomic_set(&fs_info->qgroup_op_seq, 0); + atomic_set(&fs_info->reada_works_cnt, 0); atomic64_set(&fs_info->tree_mod_seq, 0); fs_info->sb = sb; fs_info->max_inline = BTRFS_DEFAULT_MAX_INLINE; @@ -2622,7 +2631,7 @@ int open_ctree(struct super_block *sb, INIT_LIST_HEAD(&fs_info->ordered_roots); spin_lock_init(&fs_info->ordered_root_lock); fs_info->delayed_root = kmalloc(sizeof(struct btrfs_delayed_root), - GFP_NOFS); + GFP_KERNEL); if (!fs_info->delayed_root) { err = -ENOMEM; goto fail_iput; @@ -2750,7 +2759,7 @@ int open_ctree(struct super_block *sb, */ fs_info->compress_type = BTRFS_COMPRESS_ZLIB; - ret = btrfs_parse_options(tree_root, options); + ret = btrfs_parse_options(tree_root, options, sb->s_flags); if (ret) { err = ret; goto fail_alloc; @@ -2778,7 +2787,7 @@ int open_ctree(struct super_block *sb, * flag our filesystem as having big metadata blocks if * they are bigger than the page size */ - if (btrfs_super_nodesize(disk_super) > PAGE_CACHE_SIZE) { + if (btrfs_super_nodesize(disk_super) > PAGE_SIZE) { if (!(features & BTRFS_FEATURE_INCOMPAT_BIG_METADATA)) printk(KERN_INFO "BTRFS: flagging fs with big metadata feature\n"); features |= BTRFS_FEATURE_INCOMPAT_BIG_METADATA; @@ -2828,7 +2837,7 @@ int open_ctree(struct super_block *sb, fs_info->bdi.ra_pages *= btrfs_super_num_devices(disk_super); fs_info->bdi.ra_pages = max(fs_info->bdi.ra_pages, - SZ_4M / PAGE_CACHE_SIZE); + SZ_4M / PAGE_SIZE); tree_root->nodesize = nodesize; tree_root->sectorsize = sectorsize; @@ -3029,8 +3038,9 @@ retry_root_backup: if (ret) goto fail_trans_kthread; - /* do not make disk changes in broken FS */ - if (btrfs_super_log_root(disk_super) != 0) { + /* do not make disk changes in broken FS or nologreplay is given */ + if (btrfs_super_log_root(disk_super) != 0 && + !btrfs_test_opt(tree_root, NOLOGREPLAY)) { ret = btrfs_replay_log(fs_info, fs_devices); if (ret) { err = ret; @@ -3146,6 +3156,12 @@ retry_root_backup: fs_info->open = 1; + /* + * backuproot only affect mount behavior, and if open_ctree succeeded, + * no need to keep the flag + */ + btrfs_clear_opt(fs_info->mount_opt, USEBACKUPROOT); + return 0; fail_qgroup: @@ -3200,7 +3216,7 @@ fail: return err; recovery_tree_root: - if (!btrfs_test_opt(tree_root, RECOVERY)) + if (!btrfs_test_opt(tree_root, USEBACKUPROOT)) goto fail_tree_roots; free_root_pointers(fs_info, 0); @@ -4060,9 +4076,9 @@ static int btrfs_check_super_valid(struct btrfs_fs_info *fs_info, ret = -EINVAL; } /* Only PAGE SIZE is supported yet */ - if (sectorsize != PAGE_CACHE_SIZE) { + if (sectorsize != PAGE_SIZE) { printk(KERN_ERR "BTRFS: sectorsize %llu not supported yet, only support %lu\n", - sectorsize, PAGE_CACHE_SIZE); + sectorsize, PAGE_SIZE); ret = -EINVAL; } if (!is_power_of_2(nodesize) || nodesize < sectorsize || |