diff options
Diffstat (limited to 'fs/btrfs')
44 files changed, 1693 insertions, 1201 deletions
diff --git a/fs/btrfs/backref.c b/fs/btrfs/backref.c index f6dac40f8..80e8472d6 100644 --- a/fs/btrfs/backref.c +++ b/fs/btrfs/backref.c @@ -148,8 +148,7 @@ int __init btrfs_prelim_ref_init(void) void btrfs_prelim_ref_exit(void) { - if (btrfs_prelim_ref_cache) - kmem_cache_destroy(btrfs_prelim_ref_cache); + kmem_cache_destroy(btrfs_prelim_ref_cache); } /* @@ -566,17 +565,14 @@ static void __merge_refs(struct list_head *head, int mode) struct __prelim_ref *pos2 = pos1, *tmp; list_for_each_entry_safe_continue(pos2, tmp, head, list) { - struct __prelim_ref *xchg, *ref1 = pos1, *ref2 = pos2; + struct __prelim_ref *ref1 = pos1, *ref2 = pos2; struct extent_inode_elem *eie; if (!ref_for_same_block(ref1, ref2)) continue; if (mode == 1) { - if (!ref1->parent && ref2->parent) { - xchg = ref1; - ref1 = ref2; - ref2 = xchg; - } + if (!ref1->parent && ref2->parent) + swap(ref1, ref2); } else { if (ref1->parent != ref2->parent) continue; diff --git a/fs/btrfs/check-integrity.c b/fs/btrfs/check-integrity.c index 861d47256..516e19d1d 100644 --- a/fs/btrfs/check-integrity.c +++ b/fs/btrfs/check-integrity.c @@ -95,6 +95,7 @@ #include <linux/genhd.h> #include <linux/blkdev.h> #include <linux/vmalloc.h> +#include <linux/string.h> #include "ctree.h" #include "disk-io.h" #include "hash.h" @@ -105,6 +106,7 @@ #include "locking.h" #include "check-integrity.h" #include "rcu-string.h" +#include "compression.h" #define BTRFSIC_BLOCK_HASHTABLE_SIZE 0x10000 #define BTRFSIC_BLOCK_LINK_HASHTABLE_SIZE 0x10000 @@ -176,7 +178,7 @@ struct btrfsic_block { * Elements of this type are allocated dynamically and required because * each block object can refer to and can be ref from multiple blocks. * The key to lookup them in the hashtable is the dev_bytenr of - * the block ref to plus the one from the block refered from. + * the block ref to plus the one from the block referred from. * The fact that they are searchable via a hashtable and that a * ref_cnt is maintained is not required for the btrfs integrity * check algorithm itself, it is only used to make the output more @@ -755,7 +757,7 @@ static int btrfsic_process_superblock(struct btrfsic_state *state, BUG_ON(NULL == l); ret = btrfsic_read_block(state, &tmp_next_block_ctx); - if (ret < (int)PAGE_CACHE_SIZE) { + if (ret < (int)PAGE_SIZE) { printk(KERN_INFO "btrfsic: read @logical %llu failed!\n", tmp_next_block_ctx.start); @@ -1229,15 +1231,15 @@ static void btrfsic_read_from_block_data( size_t offset_in_page; char *kaddr; char *dst = (char *)dstv; - size_t start_offset = block_ctx->start & ((u64)PAGE_CACHE_SIZE - 1); - unsigned long i = (start_offset + offset) >> PAGE_CACHE_SHIFT; + size_t start_offset = block_ctx->start & ((u64)PAGE_SIZE - 1); + unsigned long i = (start_offset + offset) >> PAGE_SHIFT; WARN_ON(offset + len > block_ctx->len); - offset_in_page = (start_offset + offset) & (PAGE_CACHE_SIZE - 1); + offset_in_page = (start_offset + offset) & (PAGE_SIZE - 1); while (len > 0) { - cur = min(len, ((size_t)PAGE_CACHE_SIZE - offset_in_page)); - BUG_ON(i >= DIV_ROUND_UP(block_ctx->len, PAGE_CACHE_SIZE)); + cur = min(len, ((size_t)PAGE_SIZE - offset_in_page)); + BUG_ON(i >= DIV_ROUND_UP(block_ctx->len, PAGE_SIZE)); kaddr = block_ctx->datav[i]; memcpy(dst, kaddr + offset_in_page, cur); @@ -1603,8 +1605,8 @@ static void btrfsic_release_block_ctx(struct btrfsic_block_data_ctx *block_ctx) BUG_ON(!block_ctx->datav); BUG_ON(!block_ctx->pagev); - num_pages = (block_ctx->len + (u64)PAGE_CACHE_SIZE - 1) >> - PAGE_CACHE_SHIFT; + num_pages = (block_ctx->len + (u64)PAGE_SIZE - 1) >> + PAGE_SHIFT; while (num_pages > 0) { num_pages--; if (block_ctx->datav[num_pages]) { @@ -1635,15 +1637,15 @@ static int btrfsic_read_block(struct btrfsic_state *state, BUG_ON(block_ctx->datav); BUG_ON(block_ctx->pagev); BUG_ON(block_ctx->mem_to_free); - if (block_ctx->dev_bytenr & ((u64)PAGE_CACHE_SIZE - 1)) { + if (block_ctx->dev_bytenr & ((u64)PAGE_SIZE - 1)) { printk(KERN_INFO "btrfsic: read_block() with unaligned bytenr %llu\n", block_ctx->dev_bytenr); return -1; } - num_pages = (block_ctx->len + (u64)PAGE_CACHE_SIZE - 1) >> - PAGE_CACHE_SHIFT; + num_pages = (block_ctx->len + (u64)PAGE_SIZE - 1) >> + PAGE_SHIFT; block_ctx->mem_to_free = kzalloc((sizeof(*block_ctx->datav) + sizeof(*block_ctx->pagev)) * num_pages, GFP_NOFS); @@ -1674,8 +1676,8 @@ static int btrfsic_read_block(struct btrfsic_state *state, for (j = i; j < num_pages; j++) { ret = bio_add_page(bio, block_ctx->pagev[j], - PAGE_CACHE_SIZE, 0); - if (PAGE_CACHE_SIZE != ret) + PAGE_SIZE, 0); + if (PAGE_SIZE != ret) break; } if (j == i) { @@ -1691,7 +1693,7 @@ static int btrfsic_read_block(struct btrfsic_state *state, return -1; } bio_put(bio); - dev_bytenr += (j - i) * PAGE_CACHE_SIZE; + dev_bytenr += (j - i) * PAGE_SIZE; i = j; } for (i = 0; i < num_pages; i++) { @@ -1767,9 +1769,9 @@ static int btrfsic_test_for_metadata(struct btrfsic_state *state, u32 crc = ~(u32)0; unsigned int i; - if (num_pages * PAGE_CACHE_SIZE < state->metablock_size) + if (num_pages * PAGE_SIZE < state->metablock_size) return 1; /* not metadata */ - num_pages = state->metablock_size >> PAGE_CACHE_SHIFT; + num_pages = state->metablock_size >> PAGE_SHIFT; h = (struct btrfs_header *)datav[0]; if (memcmp(h->fsid, state->root->fs_info->fsid, BTRFS_UUID_SIZE)) @@ -1777,8 +1779,8 @@ static int btrfsic_test_for_metadata(struct btrfsic_state *state, for (i = 0; i < num_pages; i++) { u8 *data = i ? datav[i] : (datav[i] + BTRFS_CSUM_SIZE); - size_t sublen = i ? PAGE_CACHE_SIZE : - (PAGE_CACHE_SIZE - BTRFS_CSUM_SIZE); + size_t sublen = i ? PAGE_SIZE : + (PAGE_SIZE - BTRFS_CSUM_SIZE); crc = btrfs_crc32c(crc, data, sublen); } @@ -1824,14 +1826,14 @@ again: if (block->is_superblock) { bytenr = btrfs_super_bytenr((struct btrfs_super_block *) mapped_datav[0]); - if (num_pages * PAGE_CACHE_SIZE < + if (num_pages * PAGE_SIZE < BTRFS_SUPER_INFO_SIZE) { printk(KERN_INFO "btrfsic: cannot work with too short bios!\n"); return; } is_metadata = 1; - BUG_ON(BTRFS_SUPER_INFO_SIZE & (PAGE_CACHE_SIZE - 1)); + BUG_ON(BTRFS_SUPER_INFO_SIZE & (PAGE_SIZE - 1)); processed_len = BTRFS_SUPER_INFO_SIZE; if (state->print_mask & BTRFSIC_PRINT_MASK_TREE_BEFORE_SB_WRITE) { @@ -1842,7 +1844,7 @@ again: } if (is_metadata) { if (!block->is_superblock) { - if (num_pages * PAGE_CACHE_SIZE < + if (num_pages * PAGE_SIZE < state->metablock_size) { printk(KERN_INFO "btrfsic: cannot work with too short bios!\n"); @@ -1878,7 +1880,7 @@ again: } block->logical_bytenr = bytenr; } else { - if (num_pages * PAGE_CACHE_SIZE < + if (num_pages * PAGE_SIZE < state->datablock_size) { printk(KERN_INFO "btrfsic: cannot work with too short bios!\n"); @@ -2011,7 +2013,7 @@ again: block->logical_bytenr = bytenr; block->is_metadata = 1; if (block->is_superblock) { - BUG_ON(PAGE_CACHE_SIZE != + BUG_ON(PAGE_SIZE != BTRFS_SUPER_INFO_SIZE); ret = btrfsic_process_written_superblock( state, @@ -2170,8 +2172,8 @@ again: continue_loop: BUG_ON(!processed_len); dev_bytenr += processed_len; - mapped_datav += processed_len >> PAGE_CACHE_SHIFT; - num_pages -= processed_len >> PAGE_CACHE_SHIFT; + mapped_datav += processed_len >> PAGE_SHIFT; + num_pages -= processed_len >> PAGE_SHIFT; goto again; } @@ -2952,7 +2954,7 @@ static void __btrfsic_submit_bio(int rw, struct bio *bio) goto leave; cur_bytenr = dev_bytenr; for (i = 0; i < bio->bi_vcnt; i++) { - BUG_ON(bio->bi_io_vec[i].bv_len != PAGE_CACHE_SIZE); + BUG_ON(bio->bi_io_vec[i].bv_len != PAGE_SIZE); mapped_datav[i] = kmap(bio->bi_io_vec[i].bv_page); if (!mapped_datav[i]) { while (i > 0) { @@ -3035,16 +3037,16 @@ int btrfsic_mount(struct btrfs_root *root, struct list_head *dev_head = &fs_devices->devices; struct btrfs_device *device; - if (root->nodesize & ((u64)PAGE_CACHE_SIZE - 1)) { + if (root->nodesize & ((u64)PAGE_SIZE - 1)) { printk(KERN_INFO - "btrfsic: cannot handle nodesize %d not being a multiple of PAGE_CACHE_SIZE %ld!\n", - root->nodesize, PAGE_CACHE_SIZE); + "btrfsic: cannot handle nodesize %d not being a multiple of PAGE_SIZE %ld!\n", + root->nodesize, PAGE_SIZE); return -1; } - if (root->sectorsize & ((u64)PAGE_CACHE_SIZE - 1)) { + if (root->sectorsize & ((u64)PAGE_SIZE - 1)) { printk(KERN_INFO - "btrfsic: cannot handle sectorsize %d not being a multiple of PAGE_CACHE_SIZE %ld!\n", - root->sectorsize, PAGE_CACHE_SIZE); + "btrfsic: cannot handle sectorsize %d not being a multiple of PAGE_SIZE %ld!\n", + root->sectorsize, PAGE_SIZE); return -1; } state = kzalloc(sizeof(*state), GFP_KERNEL | __GFP_NOWARN | __GFP_REPEAT); @@ -3076,7 +3078,7 @@ int btrfsic_mount(struct btrfs_root *root, list_for_each_entry(device, dev_head, dev_list) { struct btrfsic_dev_state *ds; - char *p; + const char *p; if (!device->bdev || !device->name) continue; @@ -3092,11 +3094,7 @@ int btrfsic_mount(struct btrfs_root *root, ds->state = state; bdevname(ds->bdev, ds->name); ds->name[BDEVNAME_SIZE - 1] = '\0'; - for (p = ds->name; *p != '\0'; p++); - while (p > ds->name && *p != '/') - p--; - if (*p == '/') - p++; + p = kbasename(ds->name); strlcpy(ds->name, p, sizeof(ds->name)); btrfsic_dev_state_hashtable_add(ds, &btrfsic_dev_state_hashtable); diff --git a/fs/btrfs/compression.c b/fs/btrfs/compression.c index 3346cd8f9..ff61a41ac 100644 --- a/fs/btrfs/compression.c +++ b/fs/btrfs/compression.c @@ -119,7 +119,7 @@ static int check_compressed_csum(struct inode *inode, csum = ~(u32)0; kaddr = kmap_atomic(page); - csum = btrfs_csum_data(kaddr, csum, PAGE_CACHE_SIZE); + csum = btrfs_csum_data(kaddr, csum, PAGE_SIZE); btrfs_csum_final(csum, (char *)&csum); kunmap_atomic(kaddr); @@ -190,7 +190,7 @@ csum_failed: for (index = 0; index < cb->nr_pages; index++) { page = cb->compressed_pages[index]; page->mapping = NULL; - page_cache_release(page); + put_page(page); } /* do io completion on the original bio */ @@ -224,8 +224,8 @@ out: static noinline void end_compressed_writeback(struct inode *inode, const struct compressed_bio *cb) { - unsigned long index = cb->start >> PAGE_CACHE_SHIFT; - unsigned long end_index = (cb->start + cb->len - 1) >> PAGE_CACHE_SHIFT; + unsigned long index = cb->start >> PAGE_SHIFT; + unsigned long end_index = (cb->start + cb->len - 1) >> PAGE_SHIFT; struct page *pages[16]; unsigned long nr_pages = end_index - index + 1; int i; @@ -247,7 +247,7 @@ static noinline void end_compressed_writeback(struct inode *inode, if (cb->errors) SetPageError(pages[i]); end_page_writeback(pages[i]); - page_cache_release(pages[i]); + put_page(pages[i]); } nr_pages -= ret; index += ret; @@ -304,7 +304,7 @@ static void end_compressed_bio_write(struct bio *bio) for (index = 0; index < cb->nr_pages; index++) { page = cb->compressed_pages[index]; page->mapping = NULL; - page_cache_release(page); + put_page(page); } /* finally free the cb struct */ @@ -341,7 +341,7 @@ int btrfs_submit_compressed_write(struct inode *inode, u64 start, int ret; int skip_sum = BTRFS_I(inode)->flags & BTRFS_INODE_NODATASUM; - WARN_ON(start & ((u64)PAGE_CACHE_SIZE - 1)); + WARN_ON(start & ((u64)PAGE_SIZE - 1)); cb = kmalloc(compressed_bio_size(root, compressed_len), GFP_NOFS); if (!cb) return -ENOMEM; @@ -374,14 +374,14 @@ int btrfs_submit_compressed_write(struct inode *inode, u64 start, page->mapping = inode->i_mapping; if (bio->bi_iter.bi_size) ret = io_tree->ops->merge_bio_hook(WRITE, page, 0, - PAGE_CACHE_SIZE, + PAGE_SIZE, bio, 0); else ret = 0; page->mapping = NULL; - if (ret || bio_add_page(bio, page, PAGE_CACHE_SIZE, 0) < - PAGE_CACHE_SIZE) { + if (ret || bio_add_page(bio, page, PAGE_SIZE, 0) < + PAGE_SIZE) { bio_get(bio); /* @@ -410,15 +410,15 @@ int btrfs_submit_compressed_write(struct inode *inode, u64 start, BUG_ON(!bio); bio->bi_private = cb; bio->bi_end_io = end_compressed_bio_write; - bio_add_page(bio, page, PAGE_CACHE_SIZE, 0); + bio_add_page(bio, page, PAGE_SIZE, 0); } - if (bytes_left < PAGE_CACHE_SIZE) { + if (bytes_left < PAGE_SIZE) { btrfs_info(BTRFS_I(inode)->root->fs_info, "bytes left %lu compress len %lu nr %lu", bytes_left, cb->compressed_len, cb->nr_pages); } - bytes_left -= PAGE_CACHE_SIZE; - first_byte += PAGE_CACHE_SIZE; + bytes_left -= PAGE_SIZE; + first_byte += PAGE_SIZE; cond_resched(); } bio_get(bio); @@ -457,17 +457,17 @@ static noinline int add_ra_bio_pages(struct inode *inode, int misses = 0; page = cb->orig_bio->bi_io_vec[cb->orig_bio->bi_vcnt - 1].bv_page; - last_offset = (page_offset(page) + PAGE_CACHE_SIZE); + last_offset = (page_offset(page) + PAGE_SIZE); em_tree = &BTRFS_I(inode)->extent_tree; tree = &BTRFS_I(inode)->io_tree; if (isize == 0) return 0; - end_index = (i_size_read(inode) - 1) >> PAGE_CACHE_SHIFT; + end_index = (i_size_read(inode) - 1) >> PAGE_SHIFT; while (last_offset < compressed_end) { - pg_index = last_offset >> PAGE_CACHE_SHIFT; + pg_index = last_offset >> PAGE_SHIFT; if (pg_index > end_index) break; @@ -488,11 +488,11 @@ static noinline int add_ra_bio_pages(struct inode *inode, break; if (add_to_page_cache_lru(page, mapping, pg_index, GFP_NOFS)) { - page_cache_release(page); + put_page(page); goto next; } - end = last_offset + PAGE_CACHE_SIZE - 1; + end = last_offset + PAGE_SIZE - 1; /* * at this point, we have a locked page in the page cache * for these bytes in the file. But, we have to make @@ -502,27 +502,27 @@ static noinline int add_ra_bio_pages(struct inode *inode, lock_extent(tree, last_offset, end); read_lock(&em_tree->lock); em = lookup_extent_mapping(em_tree, last_offset, - PAGE_CACHE_SIZE); + PAGE_SIZE); read_unlock(&em_tree->lock); if (!em || last_offset < em->start || - (last_offset + PAGE_CACHE_SIZE > extent_map_end(em)) || + (last_offset + PAGE_SIZE > extent_map_end(em)) || (em->block_start >> 9) != cb->orig_bio->bi_iter.bi_sector) { free_extent_map(em); unlock_extent(tree, last_offset, end); unlock_page(page); - page_cache_release(page); + put_page(page); break; } free_extent_map(em); if (page->index == end_index) { char *userpage; - size_t zero_offset = isize & (PAGE_CACHE_SIZE - 1); + size_t zero_offset = isize & (PAGE_SIZE - 1); if (zero_offset) { int zeros; - zeros = PAGE_CACHE_SIZE - zero_offset; + zeros = PAGE_SIZE - zero_offset; userpage = kmap_atomic(page); memset(userpage + zero_offset, 0, zeros); flush_dcache_page(page); @@ -531,19 +531,19 @@ static noinline int add_ra_bio_pages(struct inode *inode, } ret = bio_add_page(cb->orig_bio, page, - PAGE_CACHE_SIZE, 0); + PAGE_SIZE, 0); - if (ret == PAGE_CACHE_SIZE) { + if (ret == PAGE_SIZE) { nr_pages++; - page_cache_release(page); + put_page(page); } else { unlock_extent(tree, last_offset, end); unlock_page(page); - page_cache_release(page); + put_page(page); break; } next: - last_offset += PAGE_CACHE_SIZE; + last_offset += PAGE_SIZE; } return 0; } @@ -567,7 +567,7 @@ int btrfs_submit_compressed_read(struct inode *inode, struct bio *bio, struct extent_map_tree *em_tree; struct compressed_bio *cb; struct btrfs_root *root = BTRFS_I(inode)->root; - unsigned long uncompressed_len = bio->bi_vcnt * PAGE_CACHE_SIZE; + unsigned long uncompressed_len = bio->bi_vcnt * PAGE_SIZE; unsigned long compressed_len; unsigned long nr_pages; unsigned long pg_index; @@ -589,7 +589,7 @@ int btrfs_submit_compressed_read(struct inode *inode, struct bio *bio, read_lock(&em_tree->lock); em = lookup_extent_mapping(em_tree, page_offset(bio->bi_io_vec->bv_page), - PAGE_CACHE_SIZE); + PAGE_SIZE); read_unlock(&em_tree->lock); if (!em) return -EIO; @@ -617,7 +617,7 @@ int btrfs_submit_compressed_read(struct inode *inode, struct bio *bio, cb->compress_type = extent_compress_type(bio_flags); cb->orig_bio = bio; - nr_pages = DIV_ROUND_UP(compressed_len, PAGE_CACHE_SIZE); + nr_pages = DIV_ROUND_UP(compressed_len, PAGE_SIZE); cb->compressed_pages = kcalloc(nr_pages, sizeof(struct page *), GFP_NOFS); if (!cb->compressed_pages) @@ -640,7 +640,7 @@ int btrfs_submit_compressed_read(struct inode *inode, struct bio *bio, add_ra_bio_pages(inode, em_start + em_len, cb); /* include any pages we added in add_ra-bio_pages */ - uncompressed_len = bio->bi_vcnt * PAGE_CACHE_SIZE; + uncompressed_len = bio->bi_vcnt * PAGE_SIZE; cb->len = uncompressed_len; comp_bio = compressed_bio_alloc(bdev, cur_disk_byte, GFP_NOFS); @@ -653,18 +653,18 @@ int btrfs_submit_compressed_read(struct inode *inode, struct bio *bio, for (pg_index = 0; pg_index < nr_pages; pg_index++) { page = cb->compressed_pages[pg_index]; page->mapping = inode->i_mapping; - page->index = em_start >> PAGE_CACHE_SHIFT; + page->index = em_start >> PAGE_SHIFT; if (comp_bio->bi_iter.bi_size) ret = tree->ops->merge_bio_hook(READ, page, 0, - PAGE_CACHE_SIZE, + PAGE_SIZE, comp_bio, 0); else ret = 0; page->mapping = NULL; - if (ret || bio_add_page(comp_bio, page, PAGE_CACHE_SIZE, 0) < - PAGE_CACHE_SIZE) { + if (ret || bio_add_page(comp_bio, page, PAGE_SIZE, 0) < + PAGE_SIZE) { bio_get(comp_bio); ret = btrfs_bio_wq_end_io(root->fs_info, comp_bio, @@ -702,9 +702,9 @@ int btrfs_submit_compressed_read(struct inode *inode, struct bio *bio, comp_bio->bi_private = cb; comp_bio->bi_end_io = end_compressed_bio_read; - bio_add_page(comp_bio, page, PAGE_CACHE_SIZE, 0); + bio_add_page(comp_bio, page, PAGE_SIZE, 0); } - cur_disk_byte += PAGE_CACHE_SIZE; + cur_disk_byte += PAGE_SIZE; } bio_get(comp_bio); @@ -1013,8 +1013,8 @@ int btrfs_decompress_buf2page(char *buf, unsigned long buf_start, /* copy bytes from the working buffer into the pages */ while (working_bytes > 0) { - bytes = min(PAGE_CACHE_SIZE - *pg_offset, - PAGE_CACHE_SIZE - buf_offset); + bytes = min(PAGE_SIZE - *pg_offset, + PAGE_SIZE - buf_offset); bytes = min(bytes, working_bytes); kaddr = kmap_atomic(page_out); memcpy(kaddr + *pg_offset, buf + buf_offset, bytes); @@ -1027,7 +1027,7 @@ int btrfs_decompress_buf2page(char *buf, unsigned long buf_start, current_buf_start += bytes; /* check if we need to pick another page */ - if (*pg_offset == PAGE_CACHE_SIZE) { + if (*pg_offset == PAGE_SIZE) { (*pg_index)++; if (*pg_index >= vcnt) return 0; diff --git a/fs/btrfs/compression.h b/fs/btrfs/compression.h index 13a4dc043..f49d8b8c0 100644 --- a/fs/btrfs/compression.h +++ b/fs/btrfs/compression.h @@ -48,6 +48,15 @@ int btrfs_submit_compressed_read(struct inode *inode, struct bio *bio, void btrfs_clear_biovec_end(struct bio_vec *bvec, int vcnt, unsigned long pg_index, unsigned long pg_offset); + +enum btrfs_compression_type { + BTRFS_COMPRESS_NONE = 0, + BTRFS_COMPRESS_ZLIB = 1, + BTRFS_COMPRESS_LZO = 2, + BTRFS_COMPRESS_TYPES = 2, + BTRFS_COMPRESS_LAST = 3, +}; + struct btrfs_compress_op { struct list_head *(*alloc_workspace)(void); diff --git a/fs/btrfs/ctree.c b/fs/btrfs/ctree.c index 769e0ff1b..ec7928a27 100644 --- a/fs/btrfs/ctree.c +++ b/fs/btrfs/ctree.c @@ -19,6 +19,7 @@ #include <linux/sched.h> #include <linux/slab.h> #include <linux/rbtree.h> +#include <linux/vmalloc.h> #include "ctree.h" #include "disk-io.h" #include "transaction.h" @@ -311,7 +312,7 @@ struct tree_mod_root { struct tree_mod_elem { struct rb_node node; - u64 index; /* shifted logical */ + u64 logical; u64 seq; enum mod_log_op op; @@ -435,11 +436,11 @@ void btrfs_put_tree_mod_seq(struct btrfs_fs_info *fs_info, /* * key order of the log: - * index -> sequence + * node/leaf start address -> sequence * - * the index is the shifted logical of the *new* root node for root replace - * operations, or the shifted logical of the affected block for all other - * operations. + * The 'start address' is the logical address of the *new* root node + * for root replace operations, or the logical address of the affected + * block for all other operations. * * Note: must be called with write lock (tree_mod_log_write_lock). */ @@ -460,9 +461,9 @@ __tree_mod_log_insert(struct btrfs_fs_info *fs_info, struct tree_mod_elem *tm) while (*new) { cur = container_of(*new, struct tree_mod_elem, node); parent = *new; - if (cur->index < tm->index) + if (cur->logical < tm->logical) new = &((*new)->rb_left); - else if (cur->index > tm->index) + else if (cur->logical > tm->logical) new = &((*new)->rb_right); else if (cur->seq < tm->seq) new = &((*new)->rb_left); @@ -523,7 +524,7 @@ alloc_tree_mod_elem(struct extent_buffer *eb, int slot, if (!tm) return NULL; - tm->index = eb->start >> PAGE_CACHE_SHIFT; + tm->logical = eb->start; if (op != MOD_LOG_KEY_ADD) { btrfs_node_key(eb, &tm->key, slot); tm->blockptr = btrfs_node_blockptr(eb, slot); @@ -588,7 +589,7 @@ tree_mod_log_insert_move(struct btrfs_fs_info *fs_info, goto free_tms; } - tm->index = eb->start >> PAGE_CACHE_SHIFT; + tm->logical = eb->start; tm->slot = src_slot; tm->move.dst_slot = dst_slot; tm->move.nr_items = nr_items; @@ -699,7 +700,7 @@ tree_mod_log_insert_root(struct btrfs_fs_info *fs_info, goto free_tms; } - tm->index = new_root->start >> PAGE_CACHE_SHIFT; + tm->logical = new_root->start; tm->old_root.logical = old_root->start; tm->old_root.level = btrfs_header_level(old_root); tm->generation = btrfs_header_generation(old_root); @@ -739,16 +740,15 @@ __tree_mod_log_search(struct btrfs_fs_info *fs_info, u64 start, u64 min_seq, struct rb_node *node; struct tree_mod_elem *cur = NULL; struct tree_mod_elem *found = NULL; - u64 index = start >> PAGE_CACHE_SHIFT; tree_mod_log_read_lock(fs_info); tm_root = &fs_info->tree_mod_log; node = tm_root->rb_node; while (node) { cur = container_of(node, struct tree_mod_elem, node); - if (cur->index < index) { + if (cur->logical < start) { node = node->rb_left; - } else if (cur->index > index) { + } else if (cur->logical > start) { node = node->rb_right; } else if (cur->seq < min_seq) { node = node->rb_left; @@ -1230,9 +1230,10 @@ __tree_mod_log_oldest_root(struct btrfs_fs_info *fs_info, return NULL; /* - * the very last operation that's logged for a root is the replacement - * operation (if it is replaced at all). this has the index of the *new* - * root, making it the very first operation that's logged for this root. + * the very last operation that's logged for a root is the + * replacement operation (if it is replaced at all). this has + * the logical address of the *new* root, making it the very + * first operation that's logged for this root. */ while (1) { tm = tree_mod_log_search_oldest(fs_info, root_logical, @@ -1336,7 +1337,7 @@ __tree_mod_log_rewind(struct btrfs_fs_info *fs_info, struct extent_buffer *eb, if (!next) break; tm = container_of(next, struct tree_mod_elem, node); - if (tm->index != first_tm->index) + if (tm->logical != first_tm->logical) break; } tree_mod_log_read_unlock(fs_info); @@ -5361,10 +5362,13 @@ int btrfs_compare_trees(struct btrfs_root *left_root, goto out; } - tmp_buf = kmalloc(left_root->nodesize, GFP_NOFS); + tmp_buf = kmalloc(left_root->nodesize, GFP_KERNEL | __GFP_NOWARN); if (!tmp_buf) { - ret = -ENOMEM; - goto out; + tmp_buf = vmalloc(left_root->nodesize); + if (!tmp_buf) { + ret = -ENOMEM; + goto out; + } } left_path->search_commit_root = 1; @@ -5565,7 +5569,7 @@ int btrfs_compare_trees(struct btrfs_root *left_root, out: btrfs_free_path(left_path); btrfs_free_path(right_path); - kfree(tmp_buf); + kvfree(tmp_buf); return ret; } diff --git a/fs/btrfs/ctree.h b/fs/btrfs/ctree.h index bfe4a337f..208d19938 100644 --- a/fs/btrfs/ctree.h +++ b/fs/btrfs/ctree.h @@ -100,6 +100,9 @@ struct btrfs_ordered_sum; /* tracks free space in block groups. */ #define BTRFS_FREE_SPACE_TREE_OBJECTID 10ULL +/* device stats in the device tree */ +#define BTRFS_DEV_STATS_OBJECTID 0ULL + /* for storing balance parameters in the root tree */ #define BTRFS_BALANCE_OBJECTID -4ULL @@ -715,14 +718,6 @@ struct btrfs_timespec { __le32 nsec; } __attribute__ ((__packed__)); -enum btrfs_compression_type { - BTRFS_COMPRESS_NONE = 0, - BTRFS_COMPRESS_ZLIB = 1, - BTRFS_COMPRESS_LZO = 2, - BTRFS_COMPRESS_TYPES = 2, - BTRFS_COMPRESS_LAST = 3, -}; - struct btrfs_inode_item { /* nfs style generation number */ __le64 generation; @@ -793,7 +788,7 @@ struct btrfs_root_item { /* * This generation number is used to test if the new fields are valid - * and up to date while reading the root item. Everytime the root item + * and up to date while reading the root item. Every time the root item * is written out, the "generation" field is copied into this field. If * anyone ever mounted the fs with an older kernel, we will have * mismatching generation values here and thus must invalidate the @@ -1002,8 +997,10 @@ struct btrfs_dev_replace { pid_t lock_owner; atomic_t nesting_level; struct mutex lock_finishing_cancel_unmount; - struct mutex lock_management_lock; - struct mutex lock; + rwlock_t lock; + atomic_t read_locks; + atomic_t blocking_readers; + wait_queue_head_t read_lock_wq; struct btrfs_scrub_progress scrub_progress; }; @@ -1222,10 +1219,10 @@ struct btrfs_space_info { * we've called update_block_group and dropped the bytes_used counter * and increased the bytes_pinned counter. However this means that * bytes_pinned does not reflect the bytes that will be pinned once the - * delayed refs are flushed, so this counter is inc'ed everytime we call - * btrfs_free_extent so it is a realtime count of what will be freed - * once the transaction is committed. It will be zero'ed everytime the - * transaction commits. + * delayed refs are flushed, so this counter is inc'ed every time we + * call btrfs_free_extent so it is a realtime count of what will be + * freed once the transaction is committed. It will be zero'ed every + * time the transaction commits. */ struct percpu_counter total_bytes_pinned; @@ -1822,6 +1819,9 @@ struct btrfs_fs_info { spinlock_t reada_lock; struct radix_tree_root reada_tree; + /* readahead works cnt */ + atomic_t reada_works_cnt; + /* Extent buffer radix tree */ spinlock_t buffer_lock; struct radix_tree_root buffer_radix; @@ -2185,13 +2185,43 @@ struct btrfs_ioctl_defrag_range_args { */ #define BTRFS_QGROUP_RELATION_KEY 246 +/* + * Obsolete name, see BTRFS_TEMPORARY_ITEM_KEY. + */ #define BTRFS_BALANCE_ITEM_KEY 248 /* - * Persistantly stores the io stats in the device tree. - * One key for all stats, (0, BTRFS_DEV_STATS_KEY, devid). + * The key type for tree items that are stored persistently, but do not need to + * exist for extended period of time. The items can exist in any tree. + * + * [subtype, BTRFS_TEMPORARY_ITEM_KEY, data] + * + * Existing items: + * + * - balance status item + * (BTRFS_BALANCE_OBJECTID, BTRFS_TEMPORARY_ITEM_KEY, 0) */ -#define BTRFS_DEV_STATS_KEY 249 +#define BTRFS_TEMPORARY_ITEM_KEY 248 + +/* + * Obsolete name, see BTRFS_PERSISTENT_ITEM_KEY + */ +#define BTRFS_DEV_STATS_KEY 249 + +/* + * The key type for tree items that are stored persistently and usually exist + * for a long period, eg. filesystem lifetime. The item kinds can be status + * information, stats or preference values. The item can exist in any tree. + * + * [subtype, BTRFS_PERSISTENT_ITEM_KEY, data] + * + * Existing items: + * + * - device statistics, store IO stats in the device tree, one key for all + * stats + * (BTRFS_DEV_STATS_OBJECTID, BTRFS_DEV_STATS_KEY, 0) + */ +#define BTRFS_PERSISTENT_ITEM_KEY 249 /* * Persistantly stores the device replace state in the device tree. @@ -2241,7 +2271,7 @@ struct btrfs_ioctl_defrag_range_args { #define BTRFS_MOUNT_ENOSPC_DEBUG (1 << 15) #define BTRFS_MOUNT_AUTO_DEFRAG (1 << 16) #define BTRFS_MOUNT_INODE_MAP_CACHE (1 << 17) -#define BTRFS_MOUNT_RECOVERY (1 << 18) +#define BTRFS_MOUNT_USEBACKUPROOT (1 << 18) #define BTRFS_MOUNT_SKIP_BALANCE (1 << 19) #define BTRFS_MOUNT_CHECK_INTEGRITY (1 << 20) #define BTRFS_MOUNT_CHECK_INTEGRITY_INCLUDING_EXTENT_DATA (1 << 21) @@ -2250,9 +2280,10 @@ struct btrfs_ioctl_defrag_range_args { #define BTRFS_MOUNT_FRAGMENT_DATA (1 << 24) #define BTRFS_MOUNT_FRAGMENT_METADATA (1 << 25) #define BTRFS_MOUNT_FREE_SPACE_TREE (1 << 26) +#define BTRFS_MOUNT_NOLOGREPLAY (1 << 27) #define BTRFS_DEFAULT_COMMIT_INTERVAL (30) -#define BTRFS_DEFAULT_MAX_INLINE (8192) +#define BTRFS_DEFAULT_MAX_INLINE (2048) #define btrfs_clear_opt(o, opt) ((o) &= ~BTRFS_MOUNT_##opt) #define btrfs_set_opt(o, opt) ((o) |= BTRFS_MOUNT_##opt) @@ -2353,6 +2384,9 @@ struct btrfs_map_token { unsigned long offset; }; +#define BTRFS_BYTES_TO_BLKS(fs_info, bytes) \ + ((bytes) >> (fs_info)->sb->s_blocksize_bits) + static inline void btrfs_init_map_token (struct btrfs_map_token *token) { token->kaddr = NULL; @@ -3448,8 +3482,7 @@ u64 btrfs_csum_bytes_to_leaves(struct btrfs_root *root, u64 csum_bytes); static inline u64 btrfs_calc_trans_metadata_size(struct btrfs_root *root, unsigned num_items) { - return (root->nodesize + root->nodesize * (BTRFS_MAX_LEVEL - 1)) * - 2 * num_items; + return root->nodesize * BTRFS_MAX_LEVEL * 2 * num_items; } /* @@ -4027,7 +4060,7 @@ int btrfs_unlink_subvol(struct btrfs_trans_handle *trans, struct btrfs_root *root, struct inode *dir, u64 objectid, const char *name, int name_len); -int btrfs_truncate_page(struct inode *inode, loff_t from, loff_t len, +int btrfs_truncate_block(struct inode *inode, loff_t from, loff_t len, int front); int btrfs_truncate_inode_items(struct btrfs_trans_handle *trans, struct btrfs_root *root, @@ -4089,6 +4122,8 @@ void btrfs_test_inode_set_ops(struct inode *inode); /* ioctl.c */ long btrfs_ioctl(struct file *file, unsigned int cmd, unsigned long arg); +long btrfs_compat_ioctl(struct file *file, unsigned int cmd, unsigned long arg); +int btrfs_ioctl_get_supported_features(void __user *arg); void btrfs_update_iflags(struct inode *inode); void btrfs_inherit_iflags(struct inode *inode, struct inode *dir); int btrfs_is_empty_uuid(u8 *uuid); @@ -4151,7 +4186,8 @@ void btrfs_sysfs_remove_mounted(struct btrfs_fs_info *fs_info); ssize_t btrfs_listxattr(struct dentry *dentry, char *buffer, size_t size); /* super.c */ -int btrfs_parse_options(struct btrfs_root *root, char *options); +int btrfs_parse_options(struct btrfs_root *root, char *options, + unsigned long new_flags); int btrfs_sync_fs(struct super_block *sb, int wait); #ifdef CONFIG_PRINTK @@ -4525,8 +4561,8 @@ struct reada_control *btrfs_reada_add(struct btrfs_root *root, struct btrfs_key *start, struct btrfs_key *end); int btrfs_reada_wait(void *handle); void btrfs_reada_detach(void *handle); -int btree_readahead_hook(struct btrfs_root *root, struct extent_buffer *eb, - u64 start, int err); +int btree_readahead_hook(struct btrfs_fs_info *fs_info, + struct extent_buffer *eb, u64 start, int err); static inline int is_fstree(u64 rootid) { diff --git a/fs/btrfs/delayed-inode.c b/fs/btrfs/delayed-inode.c index b57daa895..6cef0062f 100644 --- a/fs/btrfs/delayed-inode.c +++ b/fs/btrfs/delayed-inode.c @@ -43,8 +43,7 @@ int __init btrfs_delayed_inode_init(void) void btrfs_delayed_inode_exit(void) { - if (delayed_node_cache) - kmem_cache_destroy(delayed_node_cache); + kmem_cache_destroy(delayed_node_cache); } static inline void btrfs_init_delayed_node( @@ -651,9 +650,14 @@ static int btrfs_delayed_inode_reserve_metadata( goto out; ret = btrfs_block_rsv_migrate(src_rsv, dst_rsv, num_bytes); - if (!WARN_ON(ret)) + if (!ret) goto out; + if (btrfs_test_opt(root, ENOSPC_DEBUG)) { + btrfs_debug(root->fs_info, + "block rsv migrate returned %d", ret); + WARN_ON(1); + } /* * Ok this is a problem, let's just steal from the global rsv * since this really shouldn't happen that often. diff --git a/fs/btrfs/delayed-ref.c b/fs/btrfs/delayed-ref.c index 914ac13bd..430b3689b 100644 --- a/fs/btrfs/delayed-ref.c +++ b/fs/btrfs/delayed-ref.c @@ -929,14 +929,10 @@ btrfs_find_delayed_ref_head(struct btrfs_trans_handle *trans, u64 bytenr) void btrfs_delayed_ref_exit(void) { - if (btrfs_delayed_ref_head_cachep) - kmem_cache_destroy(btrfs_delayed_ref_head_cachep); - if (btrfs_delayed_tree_ref_cachep) - kmem_cache_destroy(btrfs_delayed_tree_ref_cachep); - if (btrfs_delayed_data_ref_cachep) - kmem_cache_destroy(btrfs_delayed_data_ref_cachep); - if (btrfs_delayed_extent_op_cachep) - kmem_cache_destroy(btrfs_delayed_extent_op_cachep); + kmem_cache_destroy(btrfs_delayed_ref_head_cachep); + kmem_cache_destroy(btrfs_delayed_tree_ref_cachep); + kmem_cache_destroy(btrfs_delayed_data_ref_cachep); + kmem_cache_destroy(btrfs_delayed_extent_op_cachep); } int btrfs_delayed_ref_init(void) diff --git a/fs/btrfs/dev-replace.c b/fs/btrfs/dev-replace.c index cbb7dbfb3..26bcb487f 100644 --- a/fs/btrfs/dev-replace.c +++ b/fs/btrfs/dev-replace.c @@ -202,13 +202,13 @@ int btrfs_run_dev_replace(struct btrfs_trans_handle *trans, struct btrfs_dev_replace_item *ptr; struct btrfs_dev_replace *dev_replace = &fs_info->dev_replace; - btrfs_dev_replace_lock(dev_replace); + btrfs_dev_replace_lock(dev_replace, 0); if (!dev_replace->is_valid || !dev_replace->item_needs_writeback) { - btrfs_dev_replace_unlock(dev_replace); + btrfs_dev_replace_unlock(dev_replace, 0); return 0; } - btrfs_dev_replace_unlock(dev_replace); + btrfs_dev_replace_unlock(dev_replace, 0); key.objectid = 0; key.type = BTRFS_DEV_REPLACE_KEY; @@ -264,7 +264,7 @@ int btrfs_run_dev_replace(struct btrfs_trans_handle *trans, ptr = btrfs_item_ptr(eb, path->slots[0], struct btrfs_dev_replace_item); - btrfs_dev_replace_lock(dev_replace); + btrfs_dev_replace_lock(dev_replace, 1); if (dev_replace->srcdev) btrfs_set_dev_replace_src_devid(eb, ptr, dev_replace->srcdev->devid); @@ -287,7 +287,7 @@ int btrfs_run_dev_replace(struct btrfs_trans_handle *trans, btrfs_set_dev_replace_cursor_right(eb, ptr, dev_replace->cursor_right); dev_replace->item_needs_writeback = 0; - btrfs_dev_replace_unlock(dev_replace); + btrfs_dev_replace_unlock(dev_replace, 1); btrfs_mark_buffer_dirty(eb); @@ -356,7 +356,7 @@ int btrfs_dev_replace_start(struct btrfs_root *root, return PTR_ERR(trans); } - btrfs_dev_replace_lock(dev_replace); + btrfs_dev_replace_lock(dev_replace, 1); switch (dev_replace->replace_state) { case BTRFS_IOCTL_DEV_REPLACE_STATE_NEVER_STARTED: case BTRFS_IOCTL_DEV_REPLACE_STATE_FINISHED: @@ -394,8 +394,10 @@ int btrfs_dev_replace_start(struct btrfs_root *root, dev_replace->cursor_right = 0; dev_replace->is_valid = 1; dev_replace->item_needs_writeback = 1; + atomic64_set(&dev_replace->num_write_errors, 0); + atomic64_set(&dev_replace->num_uncorrectable_read_errors, 0); args->result = BTRFS_IOCTL_DEV_REPLACE_RESULT_NO_ERROR; - btrfs_dev_replace_unlock(dev_replace); + btrfs_dev_replace_unlock(dev_replace, 1); ret = btrfs_sysfs_add_device_link(tgt_device->fs_devices, tgt_device); if (ret) @@ -407,7 +409,7 @@ int btrfs_dev_replace_start(struct btrfs_root *root, trans = btrfs_start_transaction(root, 0); if (IS_ERR(trans)) { ret = PTR_ERR(trans); - btrfs_dev_replace_lock(dev_replace); + btrfs_dev_replace_lock(dev_replace, 1); goto leave; } @@ -433,7 +435,7 @@ int btrfs_dev_replace_start(struct btrfs_root *root, leave: dev_replace->srcdev = NULL; dev_replace->tgtdev = NULL; - btrfs_dev_replace_unlock(dev_replace); + btrfs_dev_replace_unlock(dev_replace, 1); btrfs_destroy_dev_replace_tgtdev(fs_info, tgt_device); return ret; } @@ -471,18 +473,18 @@ static int btrfs_dev_replace_finishing(struct btrfs_fs_info *fs_info, /* don't allow cancel or unmount to disturb the finishing procedure */ mutex_lock(&dev_replace->lock_finishing_cancel_unmount); - btrfs_dev_replace_lock(dev_replace); + btrfs_dev_replace_lock(dev_replace, 0); /* was the operation canceled, or is it finished? */ if (dev_replace->replace_state != BTRFS_IOCTL_DEV_REPLACE_STATE_STARTED) { - btrfs_dev_replace_unlock(dev_replace); + btrfs_dev_replace_unlock(dev_replace, 0); mutex_unlock(&dev_replace->lock_finishing_cancel_unmount); return 0; } tgt_device = dev_replace->tgtdev; src_device = dev_replace->srcdev; - btrfs_dev_replace_unlock(dev_replace); + btrfs_dev_replace_unlock(dev_replace, 0); /* * flush all outstanding I/O and inode extent mappings before the @@ -507,7 +509,7 @@ static int btrfs_dev_replace_finishing(struct btrfs_fs_info *fs_info, /* keep away write_all_supers() during the finishing procedure */ mutex_lock(&root->fs_info->fs_devices->device_list_mutex); mutex_lock(&root->fs_info->chunk_mutex); - btrfs_dev_replace_lock(dev_replace); + btrfs_dev_replace_lock(dev_replace, 1); dev_replace->replace_state = scrub_ret ? BTRFS_IOCTL_DEV_REPLACE_STATE_CANCELED : BTRFS_IOCTL_DEV_REPLACE_STATE_FINISHED; @@ -528,7 +530,7 @@ static int btrfs_dev_replace_finishing(struct btrfs_fs_info *fs_info, rcu_str_deref(src_device->name), src_device->devid, rcu_str_deref(tgt_device->name), scrub_ret); - btrfs_dev_replace_unlock(dev_replace); + btrfs_dev_replace_unlock(dev_replace, 1); mutex_unlock(&root->fs_info->chunk_mutex); mutex_unlock(&root->fs_info->fs_devices->device_list_mutex); mutex_unlock(&uuid_mutex); @@ -565,7 +567,7 @@ static int btrfs_dev_replace_finishing(struct btrfs_fs_info *fs_info, list_add(&tgt_device->dev_alloc_list, &fs_info->fs_devices->alloc_list); fs_info->fs_devices->rw_devices++; - btrfs_dev_replace_unlock(dev_replace); + btrfs_dev_replace_unlock(dev_replace, 1); btrfs_rm_dev_replace_blocked(fs_info); @@ -649,7 +651,7 @@ void btrfs_dev_replace_status(struct btrfs_fs_info *fs_info, struct btrfs_dev_replace *dev_replace = &fs_info->dev_replace; struct btrfs_device *srcdev; - btrfs_dev_replace_lock(dev_replace); + btrfs_dev_replace_lock(dev_replace, 0); /* even if !dev_replace_is_valid, the values are good enough for * the replace_status ioctl */ args->result = BTRFS_IOCTL_DEV_REPLACE_RESULT_NO_ERROR; @@ -675,7 +677,7 @@ void btrfs_dev_replace_status(struct btrfs_fs_info *fs_info, div_u64(btrfs_device_get_total_bytes(srcdev), 1000)); break; } - btrfs_dev_replace_unlock(dev_replace); + btrfs_dev_replace_unlock(dev_replace, 0); } int btrfs_dev_replace_cancel(struct btrfs_fs_info *fs_info, @@ -698,13 +700,13 @@ static u64 __btrfs_dev_replace_cancel(struct btrfs_fs_info *fs_info) return -EROFS; mutex_lock(&dev_replace->lock_finishing_cancel_unmount); - btrfs_dev_replace_lock(dev_replace); + btrfs_dev_replace_lock(dev_replace, 1); switch (dev_replace->replace_state) { case BTRFS_IOCTL_DEV_REPLACE_STATE_NEVER_STARTED: case BTRFS_IOCTL_DEV_REPLACE_STATE_FINISHED: case BTRFS_IOCTL_DEV_REPLACE_STATE_CANCELED: result = BTRFS_IOCTL_DEV_REPLACE_RESULT_NOT_STARTED; - btrfs_dev_replace_unlock(dev_replace); + btrfs_dev_replace_unlock(dev_replace, 1); goto leave; case BTRFS_IOCTL_DEV_REPLACE_STATE_STARTED: case BTRFS_IOCTL_DEV_REPLACE_STATE_SUSPENDED: @@ -717,7 +719,7 @@ static u64 __btrfs_dev_replace_cancel(struct btrfs_fs_info *fs_info) dev_replace->replace_state = BTRFS_IOCTL_DEV_REPLACE_STATE_CANCELED; dev_replace->time_stopped = get_seconds(); dev_replace->item_needs_writeback = 1; - btrfs_dev_replace_unlock(dev_replace); + btrfs_dev_replace_unlock(dev_replace, 1); btrfs_scrub_cancel(fs_info); trans = btrfs_start_transaction(root, 0); @@ -740,7 +742,7 @@ void btrfs_dev_replace_suspend_for_unmount(struct btrfs_fs_info *fs_info) struct btrfs_dev_replace *dev_replace = &fs_info->dev_replace; mutex_lock(&dev_replace->lock_finishing_cancel_unmount); - btrfs_dev_replace_lock(dev_replace); + btrfs_dev_replace_lock(dev_replace, 1); switch (dev_replace->replace_state) { case BTRFS_IOCTL_DEV_REPLACE_STATE_NEVER_STARTED: case BTRFS_IOCTL_DEV_REPLACE_STATE_FINISHED: @@ -756,7 +758,7 @@ void btrfs_dev_replace_suspend_for_unmount(struct btrfs_fs_info *fs_info) break; } - btrfs_dev_replace_unlock(dev_replace); + btrfs_dev_replace_unlock(dev_replace, 1); mutex_unlock(&dev_replace->lock_finishing_cancel_unmount); } @@ -766,12 +768,12 @@ int btrfs_resume_dev_replace_async(struct btrfs_fs_info *fs_info) struct task_struct *task; struct btrfs_dev_replace *dev_replace = &fs_info->dev_replace; - btrfs_dev_replace_lock(dev_replace); + btrfs_dev_replace_lock(dev_replace, 1); switch (dev_replace->replace_state) { case BTRFS_IOCTL_DEV_REPLACE_STATE_NEVER_STARTED: case BTRFS_IOCTL_DEV_REPLACE_STATE_FINISHED: case BTRFS_IOCTL_DEV_REPLACE_STATE_CANCELED: - btrfs_dev_replace_unlock(dev_replace); + btrfs_dev_replace_unlock(dev_replace, 1); return 0; case BTRFS_IOCTL_DEV_REPLACE_STATE_STARTED: break; @@ -784,10 +786,10 @@ int btrfs_resume_dev_replace_async(struct btrfs_fs_info *fs_info) btrfs_info(fs_info, "cannot continue dev_replace, tgtdev is missing"); btrfs_info(fs_info, "you may cancel the operation after 'mount -o degraded'"); - btrfs_dev_replace_unlock(dev_replace); + btrfs_dev_replace_unlock(dev_replace, 1); return 0; } - btrfs_dev_replace_unlock(dev_replace); + btrfs_dev_replace_unlock(dev_replace, 1); WARN_ON(atomic_xchg( &fs_info->mutually_exclusive_operation_running, 1)); @@ -802,7 +804,7 @@ static int btrfs_dev_replace_kthread(void *data) struct btrfs_ioctl_dev_replace_args *status_args; u64 progress; - status_args = kzalloc(sizeof(*status_args), GFP_NOFS); + status_args = kzalloc(sizeof(*status_args), GFP_KERNEL); if (status_args) { btrfs_dev_replace_status(fs_info, status_args); progress = status_args->status.progress_1000; @@ -858,55 +860,65 @@ int btrfs_dev_replace_is_ongoing(struct btrfs_dev_replace *dev_replace) * not called and the the filesystem is remounted * in degraded state. This does not stop the * dev_replace procedure. It needs to be canceled - * manually if the cancelation is wanted. + * manually if the cancellation is wanted. */ break; } return 1; } -void btrfs_dev_replace_lock(struct btrfs_dev_replace *dev_replace) +void btrfs_dev_replace_lock(struct btrfs_dev_replace *dev_replace, int rw) { - /* the beginning is just an optimization for the typical case */ - if (atomic_read(&dev_replace->nesting_level) == 0) { -acquire_lock: - /* this is not a nested case where the same thread - * is trying to acqurire the same lock twice */ - mutex_lock(&dev_replace->lock); - mutex_lock(&dev_replace->lock_management_lock); - dev_replace->lock_owner = current->pid; - atomic_inc(&dev_replace->nesting_level); - mutex_unlock(&dev_replace->lock_management_lock); - return; + if (rw == 1) { + /* write */ +again: + wait_event(dev_replace->read_lock_wq, + atomic_read(&dev_replace->blocking_readers) == 0); + write_lock(&dev_replace->lock); + if (atomic_read(&dev_replace->blocking_readers)) { + write_unlock(&dev_replace->lock); + goto again; + } + } else { + read_lock(&dev_replace->lock); + atomic_inc(&dev_replace->read_locks); } +} - mutex_lock(&dev_replace->lock_management_lock); - if (atomic_read(&dev_replace->nesting_level) > 0 && - dev_replace->lock_owner == current->pid) { - WARN_ON(!mutex_is_locked(&dev_replace->lock)); - atomic_inc(&dev_replace->nesting_level); - mutex_unlock(&dev_replace->lock_management_lock); - return; +void btrfs_dev_replace_unlock(struct btrfs_dev_replace *dev_replace, int rw) +{ + if (rw == 1) { + /* write */ + ASSERT(atomic_read(&dev_replace->blocking_readers) == 0); + write_unlock(&dev_replace->lock); + } else { + ASSERT(atomic_read(&dev_replace->read_locks) > 0); + atomic_dec(&dev_replace->read_locks); + read_unlock(&dev_replace->lock); } +} - mutex_unlock(&dev_replace->lock_management_lock); - goto acquire_lock; +/* inc blocking cnt and release read lock */ +void btrfs_dev_replace_set_lock_blocking( + struct btrfs_dev_replace *dev_replace) +{ + /* only set blocking for read lock */ + ASSERT(atomic_read(&dev_replace->read_locks) > 0); + atomic_inc(&dev_replace->blocking_readers); + read_unlock(&dev_replace->lock); } -void btrfs_dev_replace_unlock(struct btrfs_dev_replace *dev_replace) +/* acquire read lock and dec blocking cnt */ +void btrfs_dev_replace_clear_lock_blocking( + struct btrfs_dev_replace *dev_replace) { - WARN_ON(!mutex_is_locked(&dev_replace->lock)); - mutex_lock(&dev_replace->lock_management_lock); - WARN_ON(atomic_read(&dev_replace->nesting_level) < 1); - WARN_ON(dev_replace->lock_owner != current->pid); - atomic_dec(&dev_replace->nesting_level); - if (atomic_read(&dev_replace->nesting_level) == 0) { - dev_replace->lock_owner = 0; - mutex_unlock(&dev_replace->lock_management_lock); - mutex_unlock(&dev_replace->lock); - } else { - mutex_unlock(&dev_replace->lock_management_lock); - } + /* only set blocking for read lock */ + ASSERT(atomic_read(&dev_replace->read_locks) > 0); + ASSERT(atomic_read(&dev_replace->blocking_readers) > 0); + read_lock(&dev_replace->lock); + if (atomic_dec_and_test(&dev_replace->blocking_readers) && + waitqueue_active(&dev_replace->read_lock_wq)) + wake_up(&dev_replace->read_lock_wq); } void btrfs_bio_counter_inc_noblocked(struct btrfs_fs_info *fs_info) diff --git a/fs/btrfs/dev-replace.h b/fs/btrfs/dev-replace.h index 20035cbbf..29e3ef5f9 100644 --- a/fs/btrfs/dev-replace.h +++ b/fs/btrfs/dev-replace.h @@ -34,8 +34,11 @@ int btrfs_dev_replace_cancel(struct btrfs_fs_info *fs_info, void btrfs_dev_replace_suspend_for_unmount(struct btrfs_fs_info *fs_info); int btrfs_resume_dev_replace_async(struct btrfs_fs_info *fs_info); int btrfs_dev_replace_is_ongoing(struct btrfs_dev_replace *dev_replace); -void btrfs_dev_replace_lock(struct btrfs_dev_replace *dev_replace); -void btrfs_dev_replace_unlock(struct btrfs_dev_replace *dev_replace); +void btrfs_dev_replace_lock(struct btrfs_dev_replace *dev_replace, int rw); +void btrfs_dev_replace_unlock(struct btrfs_dev_replace *dev_replace, int rw); +void btrfs_dev_replace_set_lock_blocking(struct btrfs_dev_replace *dev_replace); +void btrfs_dev_replace_clear_lock_blocking( + struct btrfs_dev_replace *dev_replace); static inline void btrfs_dev_replace_stats_inc(atomic64_t *stat_value) { diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c index d8d68af5a..4e47849d7 100644 --- a/fs/btrfs/disk-io.c +++ b/fs/btrfs/disk-io.c @@ -25,7 +25,6 @@ #include <linux/buffer_head.h> #include <linux/workqueue.h> #include <linux/kthread.h> -#include <linux/freezer.h> #include <linux/slab.h> #include <linux/migrate.h> #include <linux/ratelimit.h> @@ -50,6 +49,7 @@ #include "raid56.h" #include "sysfs.h" #include "qgroup.h" +#include "compression.h" #ifdef CONFIG_X86 #include <asm/cpufeature.h> @@ -110,8 +110,7 @@ int __init btrfs_end_io_wq_init(void) void btrfs_end_io_wq_exit(void) { - if (btrfs_end_io_wq_cache) - kmem_cache_destroy(btrfs_end_io_wq_cache); + kmem_cache_destroy(btrfs_end_io_wq_cache); } /* @@ -303,7 +302,7 @@ static int csum_tree_block(struct btrfs_fs_info *fs_info, err = map_private_extent_buffer(buf, offset, 32, &kaddr, &map_start, &map_len); if (err) - return 1; + return err; cur_len = min(len, map_len - (offset - map_start)); crc = btrfs_csum_data(kaddr + offset - map_start, crc, cur_len); @@ -313,7 +312,7 @@ static int csum_tree_block(struct btrfs_fs_info *fs_info, if (csum_size > sizeof(inline_result)) { result = kzalloc(csum_size, GFP_NOFS); if (!result) - return 1; + return -ENOMEM; } else { result = (char *)&inline_result; } @@ -334,7 +333,7 @@ static int csum_tree_block(struct btrfs_fs_info *fs_info, val, found, btrfs_header_level(buf)); if (result != (char *)&inline_result) kfree(result); - return 1; + return -EUCLEAN; } } else { write_extent_buffer(buf, result, 0, csum_size); @@ -513,11 +512,21 @@ static int csum_dirty_buffer(struct btrfs_fs_info *fs_info, struct page *page) eb = (struct extent_buffer *)page->private; if (page != eb->pages[0]) return 0; + found_start = btrfs_header_bytenr(eb); - if (WARN_ON(found_start != start || !PageUptodate(page))) - return 0; - csum_tree_block(fs_info, eb, 0); - return 0; + /* + * Please do not consolidate these warnings into a single if. + * It is useful to know what went wrong. + */ + if (WARN_ON(found_start != start)) + return -EUCLEAN; + if (WARN_ON(!PageUptodate(page))) + return -EUCLEAN; + + ASSERT(memcmp_extent_buffer(eb, fs_info->fsid, + btrfs_header_fsid(), BTRFS_FSID_SIZE) == 0); + + return csum_tree_block(fs_info, eb, 0); } static int check_tree_block_fsid(struct btrfs_fs_info *fs_info, @@ -612,6 +621,7 @@ static int btree_readpage_end_io_hook(struct btrfs_io_bio *io_bio, int found_level; struct extent_buffer *eb; struct btrfs_root *root = BTRFS_I(page->mapping->host)->root; + struct btrfs_fs_info *fs_info = root->fs_info; int ret = 0; int reads_done; @@ -637,21 +647,21 @@ static int btree_readpage_end_io_hook(struct btrfs_io_bio *io_bio, found_start = btrfs_header_bytenr(eb); if (found_start != eb->start) { - btrfs_err_rl(eb->fs_info, "bad tree block start %llu %llu", - found_start, eb->start); + btrfs_err_rl(fs_info, "bad tree block start %llu %llu", + found_start, eb->start); ret = -EIO; goto err; } - if (check_tree_block_fsid(root->fs_info, eb)) { - btrfs_err_rl(eb->fs_info, "bad fsid on block %llu", - eb->start); + if (check_tree_block_fsid(fs_info, eb)) { + btrfs_err_rl(fs_info, "bad fsid on block %llu", + eb->start); ret = -EIO; goto err; } found_level = btrfs_header_level(eb); if (found_level >= BTRFS_MAX_LEVEL) { - btrfs_err(root->fs_info, "bad tree block level %d", - (int)btrfs_header_level(eb)); + btrfs_err(fs_info, "bad tree block level %d", + (int)btrfs_header_level(eb)); ret = -EIO; goto err; } @@ -659,11 +669,9 @@ static int btree_readpage_end_io_hook(struct btrfs_io_bio *io_bio, btrfs_set_buffer_lockdep_class(btrfs_header_owner(eb), eb, found_level); - ret = csum_tree_block(root->fs_info, eb, 1); - if (ret) { - ret = -EIO; + ret = csum_tree_block(fs_info, eb, 1); + if (ret) goto err; - } /* * If this is a leaf block and it is corrupt, set the corrupt bit so @@ -680,7 +688,7 @@ static int btree_readpage_end_io_hook(struct btrfs_io_bio *io_bio, err: if (reads_done && test_and_clear_bit(EXTENT_BUFFER_READAHEAD, &eb->bflags)) - btree_readahead_hook(root, eb, eb->start, ret); + btree_readahead_hook(fs_info, eb, eb->start, ret); if (ret) { /* @@ -699,14 +707,13 @@ out: static int btree_io_failed_hook(struct page *page, int failed_mirror) { struct extent_buffer *eb; - struct btrfs_root *root = BTRFS_I(page->mapping->host)->root; eb = (struct extent_buffer *)page->private; set_bit(EXTENT_BUFFER_READ_ERR, &eb->bflags); eb->read_mirror = failed_mirror; atomic_dec(&eb->io_pages); if (test_and_clear_bit(EXTENT_BUFFER_READAHEAD, &eb->bflags)) - btree_readahead_hook(root, eb, eb->start, -EIO); + btree_readahead_hook(eb->fs_info, eb, eb->start, -EIO); return -EIO; /* we fixed nothing */ } @@ -816,7 +823,7 @@ static void run_one_async_done(struct btrfs_work *work) waitqueue_active(&fs_info->async_submit_wait)) wake_up(&fs_info->async_submit_wait); - /* If an error occured we just want to clean up the bio and move on */ + /* If an error occurred we just want to clean up the bio and move on */ if (async->error) { async->bio->bi_error = async->error; bio_endio(async->bio); @@ -931,7 +938,7 @@ static int check_async_write(struct inode *inode, unsigned long bio_flags) if (bio_flags & EXTENT_BIO_TREE_LOG) return 0; #ifdef CONFIG_X86 - if (static_cpu_has_safe(X86_FEATURE_XMM4_2)) + if (static_cpu_has(X86_FEATURE_XMM4_2)) return 0; #endif return 1; @@ -1055,7 +1062,7 @@ static void btree_invalidatepage(struct page *page, unsigned int offset, (unsigned long long)page_offset(page)); ClearPagePrivate(page); set_page_private(page, 0); - page_cache_release(page); + put_page(page); } } @@ -1296,9 +1303,10 @@ static void __setup_root(u32 nodesize, u32 sectorsize, u32 stripesize, spin_lock_init(&root->root_item_lock); } -static struct btrfs_root *btrfs_alloc_root(struct btrfs_fs_info *fs_info) +static struct btrfs_root *btrfs_alloc_root(struct btrfs_fs_info *fs_info, + gfp_t flags) { - struct btrfs_root *root = kzalloc(sizeof(*root), GFP_NOFS); + struct btrfs_root *root = kzalloc(sizeof(*root), flags); if (root) root->fs_info = fs_info; return root; @@ -1310,7 +1318,7 @@ struct btrfs_root *btrfs_alloc_dummy_root(void) { struct btrfs_root *root; - root = btrfs_alloc_root(NULL); + root = btrfs_alloc_root(NULL, GFP_KERNEL); if (!root) return ERR_PTR(-ENOMEM); __setup_root(4096, 4096, 4096, root, NULL, 1); @@ -1332,7 +1340,7 @@ struct btrfs_root *btrfs_create_tree(struct btrfs_trans_handle *trans, int ret = 0; uuid_le uuid; - root = btrfs_alloc_root(fs_info); + root = btrfs_alloc_root(fs_info, GFP_KERNEL); if (!root) return ERR_PTR(-ENOMEM); @@ -1408,7 +1416,7 @@ static struct btrfs_root *alloc_log_tree(struct btrfs_trans_handle *trans, struct btrfs_root *tree_root = fs_info->tree_root; struct extent_buffer *leaf; - root = btrfs_alloc_root(fs_info); + root = btrfs_alloc_root(fs_info, GFP_NOFS); if (!root) return ERR_PTR(-ENOMEM); @@ -1506,7 +1514,7 @@ static struct btrfs_root *btrfs_read_tree_root(struct btrfs_root *tree_root, if (!path) return ERR_PTR(-ENOMEM); - root = btrfs_alloc_root(fs_info); + root = btrfs_alloc_root(fs_info, GFP_NOFS); if (!root) { ret = -ENOMEM; goto alloc_fail; @@ -1756,7 +1764,7 @@ static int setup_bdi(struct btrfs_fs_info *info, struct backing_dev_info *bdi) if (err) return err; - bdi->ra_pages = VM_MAX_READAHEAD * 1024 / PAGE_CACHE_SIZE; + bdi->ra_pages = VM_MAX_READAHEAD * 1024 / PAGE_SIZE; bdi->congested_fn = btrfs_congested_fn; bdi->congested_data = info; bdi->capabilities |= BDI_CAP_CGROUP_WRITEBACK; @@ -1920,14 +1928,12 @@ sleep: if (unlikely(test_bit(BTRFS_FS_STATE_ERROR, &root->fs_info->fs_state))) btrfs_cleanup_transaction(root); - if (!try_to_freeze()) { - set_current_state(TASK_INTERRUPTIBLE); - if (!kthread_should_stop() && - (!btrfs_transaction_blocked(root->fs_info) || - cannot_commit)) - schedule_timeout(delay); - __set_current_state(TASK_RUNNING); - } + set_current_state(TASK_INTERRUPTIBLE); + if (!kthread_should_stop() && + (!btrfs_transaction_blocked(root->fs_info) || + cannot_commit)) + schedule_timeout(delay); + __set_current_state(TASK_RUNNING); } while (!kthread_should_stop()); return 0; } @@ -2272,9 +2278,11 @@ static void btrfs_init_dev_replace_locks(struct btrfs_fs_info *fs_info) fs_info->dev_replace.lock_owner = 0; atomic_set(&fs_info->dev_replace.nesting_level, 0); mutex_init(&fs_info->dev_replace.lock_finishing_cancel_unmount); - mutex_init(&fs_info->dev_replace.lock_management_lock); - mutex_init(&fs_info->dev_replace.lock); + rwlock_init(&fs_info->dev_replace.lock); + atomic_set(&fs_info->dev_replace.read_locks, 0); + atomic_set(&fs_info->dev_replace.blocking_readers, 0); init_waitqueue_head(&fs_info->replace_wait); + init_waitqueue_head(&fs_info->dev_replace.read_lock_wq); } static void btrfs_init_qgroup(struct btrfs_fs_info *fs_info) @@ -2385,7 +2393,7 @@ static int btrfs_replay_log(struct btrfs_fs_info *fs_info, return -EIO; } - log_tree_root = btrfs_alloc_root(fs_info); + log_tree_root = btrfs_alloc_root(fs_info, GFP_KERNEL); if (!log_tree_root) return -ENOMEM; @@ -2510,8 +2518,8 @@ int open_ctree(struct super_block *sb, int backup_index = 0; int max_active; - tree_root = fs_info->tree_root = btrfs_alloc_root(fs_info); - chunk_root = fs_info->chunk_root = btrfs_alloc_root(fs_info); + tree_root = fs_info->tree_root = btrfs_alloc_root(fs_info, GFP_KERNEL); + chunk_root = fs_info->chunk_root = btrfs_alloc_root(fs_info, GFP_KERNEL); if (!tree_root || !chunk_root) { err = -ENOMEM; goto fail; @@ -2534,7 +2542,7 @@ int open_ctree(struct super_block *sb, err = ret; goto fail_bdi; } - fs_info->dirty_metadata_batch = PAGE_CACHE_SIZE * + fs_info->dirty_metadata_batch = PAGE_SIZE * (1 + ilog2(nr_cpu_ids)); ret = percpu_counter_init(&fs_info->delalloc_bytes, 0, GFP_KERNEL); @@ -2603,6 +2611,7 @@ int open_ctree(struct super_block *sb, atomic_set(&fs_info->nr_async_bios, 0); atomic_set(&fs_info->defrag_running, 0); atomic_set(&fs_info->qgroup_op_seq, 0); + atomic_set(&fs_info->reada_works_cnt, 0); atomic64_set(&fs_info->tree_mod_seq, 0); fs_info->sb = sb; fs_info->max_inline = BTRFS_DEFAULT_MAX_INLINE; @@ -2622,7 +2631,7 @@ int open_ctree(struct super_block *sb, INIT_LIST_HEAD(&fs_info->ordered_roots); spin_lock_init(&fs_info->ordered_root_lock); fs_info->delayed_root = kmalloc(sizeof(struct btrfs_delayed_root), - GFP_NOFS); + GFP_KERNEL); if (!fs_info->delayed_root) { err = -ENOMEM; goto fail_iput; @@ -2750,7 +2759,7 @@ int open_ctree(struct super_block *sb, */ fs_info->compress_type = BTRFS_COMPRESS_ZLIB; - ret = btrfs_parse_options(tree_root, options); + ret = btrfs_parse_options(tree_root, options, sb->s_flags); if (ret) { err = ret; goto fail_alloc; @@ -2778,7 +2787,7 @@ int open_ctree(struct super_block *sb, * flag our filesystem as having big metadata blocks if * they are bigger than the page size */ - if (btrfs_super_nodesize(disk_super) > PAGE_CACHE_SIZE) { + if (btrfs_super_nodesize(disk_super) > PAGE_SIZE) { if (!(features & BTRFS_FEATURE_INCOMPAT_BIG_METADATA)) printk(KERN_INFO "BTRFS: flagging fs with big metadata feature\n"); features |= BTRFS_FEATURE_INCOMPAT_BIG_METADATA; @@ -2828,7 +2837,7 @@ int open_ctree(struct super_block *sb, fs_info->bdi.ra_pages *= btrfs_super_num_devices(disk_super); fs_info->bdi.ra_pages = max(fs_info->bdi.ra_pages, - SZ_4M / PAGE_CACHE_SIZE); + SZ_4M / PAGE_SIZE); tree_root->nodesize = nodesize; tree_root->sectorsize = sectorsize; @@ -3029,8 +3038,9 @@ retry_root_backup: if (ret) goto fail_trans_kthread; - /* do not make disk changes in broken FS */ - if (btrfs_super_log_root(disk_super) != 0) { + /* do not make disk changes in broken FS or nologreplay is given */ + if (btrfs_super_log_root(disk_super) != 0 && + !btrfs_test_opt(tree_root, NOLOGREPLAY)) { ret = btrfs_replay_log(fs_info, fs_devices); if (ret) { err = ret; @@ -3146,6 +3156,12 @@ retry_root_backup: fs_info->open = 1; + /* + * backuproot only affect mount behavior, and if open_ctree succeeded, + * no need to keep the flag + */ + btrfs_clear_opt(fs_info->mount_opt, USEBACKUPROOT); + return 0; fail_qgroup: @@ -3200,7 +3216,7 @@ fail: return err; recovery_tree_root: - if (!btrfs_test_opt(tree_root, RECOVERY)) + if (!btrfs_test_opt(tree_root, USEBACKUPROOT)) goto fail_tree_roots; free_root_pointers(fs_info, 0); @@ -4060,9 +4076,9 @@ static int btrfs_check_super_valid(struct btrfs_fs_info *fs_info, ret = -EINVAL; } /* Only PAGE SIZE is supported yet */ - if (sectorsize != PAGE_CACHE_SIZE) { + if (sectorsize != PAGE_SIZE) { printk(KERN_ERR "BTRFS: sectorsize %llu not supported yet, only support %lu\n", - sectorsize, PAGE_CACHE_SIZE); + sectorsize, PAGE_SIZE); ret = -EINVAL; } if (!is_power_of_2(nodesize) || nodesize < sectorsize || diff --git a/fs/btrfs/extent-tree.c b/fs/btrfs/extent-tree.c index e2287c7c1..84e060eb0 100644 --- a/fs/btrfs/extent-tree.c +++ b/fs/btrfs/extent-tree.c @@ -3452,7 +3452,7 @@ again: num_pages = 1; num_pages *= 16; - num_pages *= PAGE_CACHE_SIZE; + num_pages *= PAGE_SIZE; ret = btrfs_check_data_free_space(inode, 0, num_pages); if (ret) @@ -4639,7 +4639,7 @@ static void shrink_delalloc(struct btrfs_root *root, u64 to_reclaim, u64 orig, loops = 0; while (delalloc_bytes && loops < 3) { max_reclaim = min(delalloc_bytes, to_reclaim); - nr_pages = max_reclaim >> PAGE_CACHE_SHIFT; + nr_pages = max_reclaim >> PAGE_SHIFT; btrfs_writeback_inodes_sb_nr(root, nr_pages, items); /* * We need to wait for the async pages to actually start before @@ -4838,7 +4838,7 @@ static inline int need_do_async_reclaim(struct btrfs_space_info *space_info, u64 thresh = div_factor_fine(space_info->total_bytes, 98); /* If we're just plain full then async reclaim just slows us down. */ - if (space_info->bytes_used >= thresh) + if ((space_info->bytes_used + space_info->bytes_reserved) >= thresh) return 0; return (used >= thresh && !btrfs_fs_closing(fs_info) && @@ -5373,27 +5373,33 @@ static void update_global_block_rsv(struct btrfs_fs_info *fs_info) block_rsv->size = min_t(u64, num_bytes, SZ_512M); - num_bytes = sinfo->bytes_used + sinfo->bytes_pinned + - sinfo->bytes_reserved + sinfo->bytes_readonly + - sinfo->bytes_may_use; - - if (sinfo->total_bytes > num_bytes) { - num_bytes = sinfo->total_bytes - num_bytes; - block_rsv->reserved += num_bytes; - sinfo->bytes_may_use += num_bytes; - trace_btrfs_space_reservation(fs_info, "space_info", - sinfo->flags, num_bytes, 1); - } - - if (block_rsv->reserved >= block_rsv->size) { + if (block_rsv->reserved < block_rsv->size) { + num_bytes = sinfo->bytes_used + sinfo->bytes_pinned + + sinfo->bytes_reserved + sinfo->bytes_readonly + + sinfo->bytes_may_use; + if (sinfo->total_bytes > num_bytes) { + num_bytes = sinfo->total_bytes - num_bytes; + num_bytes = min(num_bytes, + block_rsv->size - block_rsv->reserved); + block_rsv->reserved += num_bytes; + sinfo->bytes_may_use += num_bytes; + trace_btrfs_space_reservation(fs_info, "space_info", + sinfo->flags, num_bytes, + 1); + } + } else if (block_rsv->reserved > block_rsv->size) { num_bytes = block_rsv->reserved - block_rsv->size; sinfo->bytes_may_use -= num_bytes; trace_btrfs_space_reservation(fs_info, "space_info", sinfo->flags, num_bytes, 0); block_rsv->reserved = block_rsv->size; - block_rsv->full = 1; } + if (block_rsv->reserved == block_rsv->size) + block_rsv->full = 1; + else + block_rsv->full = 0; + spin_unlock(&block_rsv->lock); spin_unlock(&sinfo->lock); } @@ -5752,7 +5758,7 @@ out_fail: /* * This is tricky, but first we need to figure out how much we - * free'd from any free-ers that occured during this + * free'd from any free-ers that occurred during this * reservation, so we reset ->csum_bytes to the csum_bytes * before we dropped our lock, and then call the free for the * number of bytes that were freed while we were trying our @@ -7018,7 +7024,7 @@ btrfs_lock_cluster(struct btrfs_block_group_cache *block_group, struct btrfs_free_cluster *cluster, int delalloc) { - struct btrfs_block_group_cache *used_bg; + struct btrfs_block_group_cache *used_bg = NULL; bool locked = false; again: spin_lock(&cluster->refill_lock); @@ -9380,15 +9386,23 @@ int btrfs_can_relocate(struct btrfs_root *root, u64 bytenr) u64 dev_min = 1; u64 dev_nr = 0; u64 target; + int debug; int index; int full = 0; int ret = 0; + debug = btrfs_test_opt(root, ENOSPC_DEBUG); + block_group = btrfs_lookup_block_group(root->fs_info, bytenr); /* odd, couldn't find the block group, leave it alone */ - if (!block_group) + if (!block_group) { + if (debug) + btrfs_warn(root->fs_info, + "can't find block group for bytenr %llu", + bytenr); return -1; + } min_free = btrfs_block_group_used(&block_group->item); @@ -9442,8 +9456,13 @@ int btrfs_can_relocate(struct btrfs_root *root, u64 bytenr) * this is just a balance, so if we were marked as full * we know there is no space for a new chunk */ - if (full) + if (full) { + if (debug) + btrfs_warn(root->fs_info, + "no space to alloc new chunk for block group %llu", + block_group->key.objectid); goto out; + } index = get_block_group_index(block_group); } @@ -9490,6 +9509,10 @@ int btrfs_can_relocate(struct btrfs_root *root, u64 bytenr) ret = -1; } } + if (debug && ret == -1) + btrfs_warn(root->fs_info, + "no space to allocate a new chunk for block group %llu", + block_group->key.objectid); mutex_unlock(&root->fs_info->chunk_mutex); btrfs_end_transaction(trans, root); out: diff --git a/fs/btrfs/extent_io.c b/fs/btrfs/extent_io.c index 392592dc7..d247fc0ee 100644 --- a/fs/btrfs/extent_io.c +++ b/fs/btrfs/extent_io.c @@ -206,10 +206,8 @@ void extent_io_exit(void) * destroy caches. */ rcu_barrier(); - if (extent_state_cache) - kmem_cache_destroy(extent_state_cache); - if (extent_buffer_cache) - kmem_cache_destroy(extent_buffer_cache); + kmem_cache_destroy(extent_state_cache); + kmem_cache_destroy(extent_buffer_cache); if (btrfs_bioset) bioset_free(btrfs_bioset); } @@ -232,7 +230,7 @@ static struct extent_state *alloc_extent_state(gfp_t mask) if (!state) return state; state->state = 0; - state->private = 0; + state->failrec = NULL; RB_CLEAR_NODE(&state->rb_node); btrfs_leak_debug_add(&state->leak_list, &states); atomic_set(&state->refs, 1); @@ -1365,23 +1363,23 @@ int try_lock_extent(struct extent_io_tree *tree, u64 start, u64 end) void extent_range_clear_dirty_for_io(struct inode *inode, u64 start, u64 end) { - unsigned long index = start >> PAGE_CACHE_SHIFT; - unsigned long end_index = end >> PAGE_CACHE_SHIFT; + unsigned long index = start >> PAGE_SHIFT; + unsigned long end_index = end >> PAGE_SHIFT; struct page *page; while (index <= end_index) { page = find_get_page(inode->i_mapping, index); BUG_ON(!page); /* Pages should be in the extent_io_tree */ clear_page_dirty_for_io(page); - page_cache_release(page); + put_page(page); index++; } } void extent_range_redirty_for_io(struct inode *inode, u64 start, u64 end) { - unsigned long index = start >> PAGE_CACHE_SHIFT; - unsigned long end_index = end >> PAGE_CACHE_SHIFT; + unsigned long index = start >> PAGE_SHIFT; + unsigned long end_index = end >> PAGE_SHIFT; struct page *page; while (index <= end_index) { @@ -1389,7 +1387,7 @@ void extent_range_redirty_for_io(struct inode *inode, u64 start, u64 end) BUG_ON(!page); /* Pages should be in the extent_io_tree */ __set_page_dirty_nobuffers(page); account_page_redirty(page); - page_cache_release(page); + put_page(page); index++; } } @@ -1399,15 +1397,15 @@ void extent_range_redirty_for_io(struct inode *inode, u64 start, u64 end) */ static void set_range_writeback(struct extent_io_tree *tree, u64 start, u64 end) { - unsigned long index = start >> PAGE_CACHE_SHIFT; - unsigned long end_index = end >> PAGE_CACHE_SHIFT; + unsigned long index = start >> PAGE_SHIFT; + unsigned long end_index = end >> PAGE_SHIFT; struct page *page; while (index <= end_index) { page = find_get_page(tree->mapping, index); BUG_ON(!page); /* Pages should be in the extent_io_tree */ set_page_writeback(page); - page_cache_release(page); + put_page(page); index++; } } @@ -1558,8 +1556,8 @@ static noinline void __unlock_for_delalloc(struct inode *inode, { int ret; struct page *pages[16]; - unsigned long index = start >> PAGE_CACHE_SHIFT; - unsigned long end_index = end >> PAGE_CACHE_SHIFT; + unsigned long index = start >> PAGE_SHIFT; + unsigned long end_index = end >> PAGE_SHIFT; unsigned long nr_pages = end_index - index + 1; int i; @@ -1573,7 +1571,7 @@ static noinline void __unlock_for_delalloc(struct inode *inode, for (i = 0; i < ret; i++) { if (pages[i] != locked_page) unlock_page(pages[i]); - page_cache_release(pages[i]); + put_page(pages[i]); } nr_pages -= ret; index += ret; @@ -1586,9 +1584,9 @@ static noinline int lock_delalloc_pages(struct inode *inode, u64 delalloc_start, u64 delalloc_end) { - unsigned long index = delalloc_start >> PAGE_CACHE_SHIFT; + unsigned long index = delalloc_start >> PAGE_SHIFT; unsigned long start_index = index; - unsigned long end_index = delalloc_end >> PAGE_CACHE_SHIFT; + unsigned long end_index = delalloc_end >> PAGE_SHIFT; unsigned long pages_locked = 0; struct page *pages[16]; unsigned long nrpages; @@ -1621,11 +1619,11 @@ static noinline int lock_delalloc_pages(struct inode *inode, pages[i]->mapping != inode->i_mapping) { ret = -EAGAIN; unlock_page(pages[i]); - page_cache_release(pages[i]); + put_page(pages[i]); goto done; } } - page_cache_release(pages[i]); + put_page(pages[i]); pages_locked++; } nrpages -= ret; @@ -1638,7 +1636,7 @@ done: __unlock_for_delalloc(inode, locked_page, delalloc_start, ((u64)(start_index + pages_locked - 1)) << - PAGE_CACHE_SHIFT); + PAGE_SHIFT); } return ret; } @@ -1698,7 +1696,7 @@ again: free_extent_state(cached_state); cached_state = NULL; if (!loops) { - max_bytes = PAGE_CACHE_SIZE; + max_bytes = PAGE_SIZE; loops = 1; goto again; } else { @@ -1737,8 +1735,8 @@ void extent_clear_unlock_delalloc(struct inode *inode, u64 start, u64 end, struct extent_io_tree *tree = &BTRFS_I(inode)->io_tree; int ret; struct page *pages[16]; - unsigned long index = start >> PAGE_CACHE_SHIFT; - unsigned long end_index = end >> PAGE_CACHE_SHIFT; + unsigned long index = start >> PAGE_SHIFT; + unsigned long end_index = end >> PAGE_SHIFT; unsigned long nr_pages = end_index - index + 1; int i; @@ -1759,7 +1757,7 @@ void extent_clear_unlock_delalloc(struct inode *inode, u64 start, u64 end, SetPagePrivate2(pages[i]); if (pages[i] == locked_page) { - page_cache_release(pages[i]); + put_page(pages[i]); continue; } if (page_ops & PAGE_CLEAR_DIRTY) @@ -1772,7 +1770,7 @@ void extent_clear_unlock_delalloc(struct inode *inode, u64 start, u64 end, end_page_writeback(pages[i]); if (page_ops & PAGE_UNLOCK) unlock_page(pages[i]); - page_cache_release(pages[i]); + put_page(pages[i]); } nr_pages -= ret; index += ret; @@ -1844,7 +1842,8 @@ out: * set the private field for a given byte offset in the tree. If there isn't * an extent_state there already, this does nothing. */ -static int set_state_private(struct extent_io_tree *tree, u64 start, u64 private) +static noinline int set_state_failrec(struct extent_io_tree *tree, u64 start, + struct io_failure_record *failrec) { struct rb_node *node; struct extent_state *state; @@ -1865,13 +1864,14 @@ static int set_state_private(struct extent_io_tree *tree, u64 start, u64 private ret = -ENOENT; goto out; } - state->private = private; + state->failrec = failrec; out: spin_unlock(&tree->lock); return ret; } -int get_state_private(struct extent_io_tree *tree, u64 start, u64 *private) +static noinline int get_state_failrec(struct extent_io_tree *tree, u64 start, + struct io_failure_record **failrec) { struct rb_node *node; struct extent_state *state; @@ -1892,7 +1892,7 @@ int get_state_private(struct extent_io_tree *tree, u64 start, u64 *private) ret = -ENOENT; goto out; } - *private = state->private; + *failrec = state->failrec; out: spin_unlock(&tree->lock); return ret; @@ -1961,7 +1961,7 @@ int test_range_bit(struct extent_io_tree *tree, u64 start, u64 end, static void check_page_uptodate(struct extent_io_tree *tree, struct page *page) { u64 start = page_offset(page); - u64 end = start + PAGE_CACHE_SIZE - 1; + u64 end = start + PAGE_SIZE - 1; if (test_range_bit(tree, start, end, EXTENT_UPTODATE, 1, NULL)) SetPageUptodate(page); } @@ -1972,7 +1972,7 @@ int free_io_failure(struct inode *inode, struct io_failure_record *rec) int err = 0; struct extent_io_tree *failure_tree = &BTRFS_I(inode)->io_failure_tree; - set_state_private(failure_tree, rec->start, 0); + set_state_failrec(failure_tree, rec->start, NULL); ret = clear_extent_bits(failure_tree, rec->start, rec->start + rec->len - 1, EXTENT_LOCKED | EXTENT_DIRTY, GFP_NOFS); @@ -2071,11 +2071,11 @@ int repair_eb_io_failure(struct btrfs_root *root, struct extent_buffer *eb, struct page *p = eb->pages[i]; ret = repair_io_failure(root->fs_info->btree_inode, start, - PAGE_CACHE_SIZE, start, p, + PAGE_SIZE, start, p, start - page_offset(p), mirror_num); if (ret) break; - start += PAGE_CACHE_SIZE; + start += PAGE_SIZE; } return ret; @@ -2089,7 +2089,6 @@ int clean_io_failure(struct inode *inode, u64 start, struct page *page, unsigned int pg_offset) { u64 private; - u64 private_failure; struct io_failure_record *failrec; struct btrfs_fs_info *fs_info = BTRFS_I(inode)->root->fs_info; struct extent_state *state; @@ -2102,12 +2101,11 @@ int clean_io_failure(struct inode *inode, u64 start, struct page *page, if (!ret) return 0; - ret = get_state_private(&BTRFS_I(inode)->io_failure_tree, start, - &private_failure); + ret = get_state_failrec(&BTRFS_I(inode)->io_failure_tree, start, + &failrec); if (ret) return 0; - failrec = (struct io_failure_record *)(unsigned long) private_failure; BUG_ON(!failrec->this_mirror); if (failrec->in_validation) { @@ -2167,7 +2165,7 @@ void btrfs_free_io_failure_record(struct inode *inode, u64 start, u64 end) next = next_state(state); - failrec = (struct io_failure_record *)(unsigned long)state->private; + failrec = state->failrec; free_extent_state(state); kfree(failrec); @@ -2177,10 +2175,9 @@ void btrfs_free_io_failure_record(struct inode *inode, u64 start, u64 end) } int btrfs_get_io_failure_record(struct inode *inode, u64 start, u64 end, - struct io_failure_record **failrec_ret) + struct io_failure_record **failrec_ret) { struct io_failure_record *failrec; - u64 private; struct extent_map *em; struct extent_io_tree *failure_tree = &BTRFS_I(inode)->io_failure_tree; struct extent_io_tree *tree = &BTRFS_I(inode)->io_tree; @@ -2188,7 +2185,7 @@ int btrfs_get_io_failure_record(struct inode *inode, u64 start, u64 end, int ret; u64 logical; - ret = get_state_private(failure_tree, start, &private); + ret = get_state_failrec(failure_tree, start, &failrec); if (ret) { failrec = kzalloc(sizeof(*failrec), GFP_NOFS); if (!failrec) @@ -2237,8 +2234,7 @@ int btrfs_get_io_failure_record(struct inode *inode, u64 start, u64 end, ret = set_extent_bits(failure_tree, start, end, EXTENT_LOCKED | EXTENT_DIRTY, GFP_NOFS); if (ret >= 0) - ret = set_state_private(failure_tree, start, - (u64)(unsigned long)failrec); + ret = set_state_failrec(failure_tree, start, failrec); /* set the bits in the inode's tree */ if (ret >= 0) ret = set_extent_bits(tree, start, end, EXTENT_DAMAGED, @@ -2248,7 +2244,6 @@ int btrfs_get_io_failure_record(struct inode *inode, u64 start, u64 end, return ret; } } else { - failrec = (struct io_failure_record *)(unsigned long)private; pr_debug("Get IO Failure Record: (found) logical=%llu, start=%llu, len=%llu, validation=%d\n", failrec->logical, failrec->start, failrec->len, failrec->in_validation); @@ -2471,8 +2466,8 @@ static void end_bio_extent_writepage(struct bio *bio) * advance bv_offset and adjust bv_len to compensate. * Print a warning for nonzero offsets, and an error * if they don't add up to a full page. */ - if (bvec->bv_offset || bvec->bv_len != PAGE_CACHE_SIZE) { - if (bvec->bv_offset + bvec->bv_len != PAGE_CACHE_SIZE) + if (bvec->bv_offset || bvec->bv_len != PAGE_SIZE) { + if (bvec->bv_offset + bvec->bv_len != PAGE_SIZE) btrfs_err(BTRFS_I(page->mapping->host)->root->fs_info, "partial page write in btrfs with offset %u and length %u", bvec->bv_offset, bvec->bv_len); @@ -2546,8 +2541,8 @@ static void end_bio_extent_readpage(struct bio *bio) * advance bv_offset and adjust bv_len to compensate. * Print a warning for nonzero offsets, and an error * if they don't add up to a full page. */ - if (bvec->bv_offset || bvec->bv_len != PAGE_CACHE_SIZE) { - if (bvec->bv_offset + bvec->bv_len != PAGE_CACHE_SIZE) + if (bvec->bv_offset || bvec->bv_len != PAGE_SIZE) { + if (bvec->bv_offset + bvec->bv_len != PAGE_SIZE) btrfs_err(BTRFS_I(page->mapping->host)->root->fs_info, "partial page read in btrfs with offset %u and length %u", bvec->bv_offset, bvec->bv_len); @@ -2603,13 +2598,13 @@ static void end_bio_extent_readpage(struct bio *bio) readpage_ok: if (likely(uptodate)) { loff_t i_size = i_size_read(inode); - pgoff_t end_index = i_size >> PAGE_CACHE_SHIFT; + pgoff_t end_index = i_size >> PAGE_SHIFT; unsigned off; /* Zero out the end if this page straddles i_size */ - off = i_size & (PAGE_CACHE_SIZE-1); + off = i_size & (PAGE_SIZE-1); if (page->index == end_index && off) - zero_user_segment(page, off, PAGE_CACHE_SIZE); + zero_user_segment(page, off, PAGE_SIZE); SetPageUptodate(page); } else { ClearPageUptodate(page); @@ -2773,7 +2768,7 @@ static int submit_extent_page(int rw, struct extent_io_tree *tree, struct bio *bio; int contig = 0; int old_compressed = prev_bio_flags & EXTENT_BIO_COMPRESSED; - size_t page_size = min_t(size_t, size, PAGE_CACHE_SIZE); + size_t page_size = min_t(size_t, size, PAGE_SIZE); if (bio_ret && *bio_ret) { bio = *bio_ret; @@ -2826,7 +2821,7 @@ static void attach_extent_buffer_page(struct extent_buffer *eb, { if (!PagePrivate(page)) { SetPagePrivate(page); - page_cache_get(page); + get_page(page); set_page_private(page, (unsigned long)eb); } else { WARN_ON(page->private != (unsigned long)eb); @@ -2837,7 +2832,7 @@ void set_page_extent_mapped(struct page *page) { if (!PagePrivate(page)) { SetPagePrivate(page); - page_cache_get(page); + get_page(page); set_page_private(page, EXTENT_PAGE_PRIVATE); } } @@ -2885,7 +2880,7 @@ static int __do_readpage(struct extent_io_tree *tree, { struct inode *inode = page->mapping->host; u64 start = page_offset(page); - u64 page_end = start + PAGE_CACHE_SIZE - 1; + u64 page_end = start + PAGE_SIZE - 1; u64 end; u64 cur = start; u64 extent_offset; @@ -2914,12 +2909,12 @@ static int __do_readpage(struct extent_io_tree *tree, } } - if (page->index == last_byte >> PAGE_CACHE_SHIFT) { + if (page->index == last_byte >> PAGE_SHIFT) { char *userpage; - size_t zero_offset = last_byte & (PAGE_CACHE_SIZE - 1); + size_t zero_offset = last_byte & (PAGE_SIZE - 1); if (zero_offset) { - iosize = PAGE_CACHE_SIZE - zero_offset; + iosize = PAGE_SIZE - zero_offset; userpage = kmap_atomic(page); memset(userpage + zero_offset, 0, iosize); flush_dcache_page(page); @@ -2927,14 +2922,14 @@ static int __do_readpage(struct extent_io_tree *tree, } } while (cur <= end) { - unsigned long pnr = (last_byte >> PAGE_CACHE_SHIFT) + 1; + unsigned long pnr = (last_byte >> PAGE_SHIFT) + 1; bool force_bio_submit = false; if (cur >= last_byte) { char *userpage; struct extent_state *cached = NULL; - iosize = PAGE_CACHE_SIZE - pg_offset; + iosize = PAGE_SIZE - pg_offset; userpage = kmap_atomic(page); memset(userpage + pg_offset, 0, iosize); flush_dcache_page(page); @@ -3117,7 +3112,7 @@ static inline void __do_contiguous_readpages(struct extent_io_tree *tree, for (index = 0; index < nr_pages; index++) { __do_readpage(tree, pages[index], get_extent, em_cached, bio, mirror_num, bio_flags, rw, prev_em_start); - page_cache_release(pages[index]); + put_page(pages[index]); } } @@ -3139,10 +3134,10 @@ static void __extent_readpages(struct extent_io_tree *tree, page_start = page_offset(pages[index]); if (!end) { start = page_start; - end = start + PAGE_CACHE_SIZE - 1; + end = start + PAGE_SIZE - 1; first_index = index; } else if (end + 1 == page_start) { - end += PAGE_CACHE_SIZE; + end += PAGE_SIZE; } else { __do_contiguous_readpages(tree, &pages[first_index], index - first_index, start, @@ -3150,7 +3145,7 @@ static void __extent_readpages(struct extent_io_tree *tree, bio, mirror_num, bio_flags, rw, prev_em_start); start = page_start; - end = start + PAGE_CACHE_SIZE - 1; + end = start + PAGE_SIZE - 1; first_index = index; } } @@ -3172,12 +3167,13 @@ static int __extent_read_full_page(struct extent_io_tree *tree, struct inode *inode = page->mapping->host; struct btrfs_ordered_extent *ordered; u64 start = page_offset(page); - u64 end = start + PAGE_CACHE_SIZE - 1; + u64 end = start + PAGE_SIZE - 1; int ret; while (1) { lock_extent(tree, start, end); - ordered = btrfs_lookup_ordered_extent(inode, start); + ordered = btrfs_lookup_ordered_range(inode, start, + PAGE_SIZE); if (!ordered) break; unlock_extent(tree, start, end); @@ -3231,7 +3227,7 @@ static noinline_for_stack int writepage_delalloc(struct inode *inode, unsigned long *nr_written) { struct extent_io_tree *tree = epd->tree; - u64 page_end = delalloc_start + PAGE_CACHE_SIZE - 1; + u64 page_end = delalloc_start + PAGE_SIZE - 1; u64 nr_delalloc; u64 delalloc_to_write = 0; u64 delalloc_end = 0; @@ -3268,13 +3264,11 @@ static noinline_for_stack int writepage_delalloc(struct inode *inode, goto done; } /* - * delalloc_end is already one less than the total - * length, so we don't subtract one from - * PAGE_CACHE_SIZE + * delalloc_end is already one less than the total length, so + * we don't subtract one from PAGE_SIZE */ delalloc_to_write += (delalloc_end - delalloc_start + - PAGE_CACHE_SIZE) >> - PAGE_CACHE_SHIFT; + PAGE_SIZE) >> PAGE_SHIFT; delalloc_start = delalloc_end + 1; } if (wbc->nr_to_write < delalloc_to_write) { @@ -3323,7 +3317,7 @@ static noinline_for_stack int __extent_writepage_io(struct inode *inode, { struct extent_io_tree *tree = epd->tree; u64 start = page_offset(page); - u64 page_end = start + PAGE_CACHE_SIZE - 1; + u64 page_end = start + PAGE_SIZE - 1; u64 end; u64 cur = start; u64 extent_offset; @@ -3438,7 +3432,7 @@ static noinline_for_stack int __extent_writepage_io(struct inode *inode, if (ret) { SetPageError(page); } else { - unsigned long max_nr = (i_size >> PAGE_CACHE_SHIFT) + 1; + unsigned long max_nr = (i_size >> PAGE_SHIFT) + 1; set_range_writeback(tree, cur, cur + iosize - 1); if (!PageWriteback(page)) { @@ -3481,12 +3475,12 @@ static int __extent_writepage(struct page *page, struct writeback_control *wbc, struct inode *inode = page->mapping->host; struct extent_page_data *epd = data; u64 start = page_offset(page); - u64 page_end = start + PAGE_CACHE_SIZE - 1; + u64 page_end = start + PAGE_SIZE - 1; int ret; int nr = 0; size_t pg_offset = 0; loff_t i_size = i_size_read(inode); - unsigned long end_index = i_size >> PAGE_CACHE_SHIFT; + unsigned long end_index = i_size >> PAGE_SHIFT; int write_flags; unsigned long nr_written = 0; @@ -3501,10 +3495,10 @@ static int __extent_writepage(struct page *page, struct writeback_control *wbc, ClearPageError(page); - pg_offset = i_size & (PAGE_CACHE_SIZE - 1); + pg_offset = i_size & (PAGE_SIZE - 1); if (page->index > end_index || (page->index == end_index && !pg_offset)) { - page->mapping->a_ops->invalidatepage(page, 0, PAGE_CACHE_SIZE); + page->mapping->a_ops->invalidatepage(page, 0, PAGE_SIZE); unlock_page(page); return 0; } @@ -3514,7 +3508,7 @@ static int __extent_writepage(struct page *page, struct writeback_control *wbc, userpage = kmap_atomic(page); memset(userpage + pg_offset, 0, - PAGE_CACHE_SIZE - pg_offset); + PAGE_SIZE - pg_offset); kunmap_atomic(userpage); flush_dcache_page(page); } @@ -3752,7 +3746,7 @@ static noinline_for_stack int write_one_eb(struct extent_buffer *eb, clear_page_dirty_for_io(p); set_page_writeback(p); ret = submit_extent_page(rw, tree, wbc, p, offset >> 9, - PAGE_CACHE_SIZE, 0, bdev, &epd->bio, + PAGE_SIZE, 0, bdev, &epd->bio, -1, end_bio_extent_buffer_writepage, 0, epd->bio_flags, bio_flags, false); epd->bio_flags = bio_flags; @@ -3764,7 +3758,7 @@ static noinline_for_stack int write_one_eb(struct extent_buffer *eb, ret = -EIO; break; } - offset += PAGE_CACHE_SIZE; + offset += PAGE_SIZE; update_nr_written(p, wbc, 1); unlock_page(p); } @@ -3808,8 +3802,8 @@ int btree_write_cache_pages(struct address_space *mapping, index = mapping->writeback_index; /* Start from prev offset */ end = -1; } else { - index = wbc->range_start >> PAGE_CACHE_SHIFT; - end = wbc->range_end >> PAGE_CACHE_SHIFT; + index = wbc->range_start >> PAGE_SHIFT; + end = wbc->range_end >> PAGE_SHIFT; scanned = 1; } if (wbc->sync_mode == WB_SYNC_ALL) @@ -3952,8 +3946,8 @@ static int extent_write_cache_pages(struct extent_io_tree *tree, index = mapping->writeback_index; /* Start from prev offset */ end = -1; } else { - index = wbc->range_start >> PAGE_CACHE_SHIFT; - end = wbc->range_end >> PAGE_CACHE_SHIFT; + index = wbc->range_start >> PAGE_SHIFT; + end = wbc->range_end >> PAGE_SHIFT; scanned = 1; } if (wbc->sync_mode == WB_SYNC_ALL) @@ -4087,8 +4081,8 @@ int extent_write_locked_range(struct extent_io_tree *tree, struct inode *inode, int ret = 0; struct address_space *mapping = inode->i_mapping; struct page *page; - unsigned long nr_pages = (end - start + PAGE_CACHE_SIZE) >> - PAGE_CACHE_SHIFT; + unsigned long nr_pages = (end - start + PAGE_SIZE) >> + PAGE_SHIFT; struct extent_page_data epd = { .bio = NULL, @@ -4106,18 +4100,18 @@ int extent_write_locked_range(struct extent_io_tree *tree, struct inode *inode, }; while (start <= end) { - page = find_get_page(mapping, start >> PAGE_CACHE_SHIFT); + page = find_get_page(mapping, start >> PAGE_SHIFT); if (clear_page_dirty_for_io(page)) ret = __extent_writepage(page, &wbc_writepages, &epd); else { if (tree->ops && tree->ops->writepage_end_io_hook) tree->ops->writepage_end_io_hook(page, start, - start + PAGE_CACHE_SIZE - 1, + start + PAGE_SIZE - 1, NULL, 1); unlock_page(page); } - page_cache_release(page); - start += PAGE_CACHE_SIZE; + put_page(page); + start += PAGE_SIZE; } flush_epd_write_bio(&epd); @@ -4167,7 +4161,7 @@ int extent_readpages(struct extent_io_tree *tree, list_del(&page->lru); if (add_to_page_cache_lru(page, mapping, page->index, GFP_NOFS)) { - page_cache_release(page); + put_page(page); continue; } @@ -4201,7 +4195,7 @@ int extent_invalidatepage(struct extent_io_tree *tree, { struct extent_state *cached_state = NULL; u64 start = page_offset(page); - u64 end = start + PAGE_CACHE_SIZE - 1; + u64 end = start + PAGE_SIZE - 1; size_t blocksize = page->mapping->host->i_sb->s_blocksize; start += ALIGN(offset, blocksize); @@ -4227,7 +4221,7 @@ static int try_release_extent_state(struct extent_map_tree *map, struct page *page, gfp_t mask) { u64 start = page_offset(page); - u64 end = start + PAGE_CACHE_SIZE - 1; + u64 end = start + PAGE_SIZE - 1; int ret = 1; if (test_range_bit(tree, start, end, @@ -4266,7 +4260,7 @@ int try_release_extent_mapping(struct extent_map_tree *map, { struct extent_map *em; u64 start = page_offset(page); - u64 end = start + PAGE_CACHE_SIZE - 1; + u64 end = start + PAGE_SIZE - 1; if (gfpflags_allow_blocking(mask) && page->mapping->host->i_size > SZ_16M) { @@ -4591,14 +4585,14 @@ static void btrfs_release_extent_buffer_page(struct extent_buffer *eb) ClearPagePrivate(page); set_page_private(page, 0); /* One for the page private */ - page_cache_release(page); + put_page(page); } if (mapped) spin_unlock(&page->mapping->private_lock); /* One for when we alloced the page */ - page_cache_release(page); + put_page(page); } while (index != 0); } @@ -4783,7 +4777,7 @@ struct extent_buffer *find_extent_buffer(struct btrfs_fs_info *fs_info, rcu_read_lock(); eb = radix_tree_lookup(&fs_info->buffer_radix, - start >> PAGE_CACHE_SHIFT); + start >> PAGE_SHIFT); if (eb && atomic_inc_not_zero(&eb->refs)) { rcu_read_unlock(); /* @@ -4833,7 +4827,7 @@ again: goto free_eb; spin_lock(&fs_info->buffer_lock); ret = radix_tree_insert(&fs_info->buffer_radix, - start >> PAGE_CACHE_SHIFT, eb); + start >> PAGE_SHIFT, eb); spin_unlock(&fs_info->buffer_lock); radix_tree_preload_end(); if (ret == -EEXIST) { @@ -4866,7 +4860,7 @@ struct extent_buffer *alloc_extent_buffer(struct btrfs_fs_info *fs_info, unsigned long len = fs_info->tree_root->nodesize; unsigned long num_pages = num_extent_pages(start, len); unsigned long i; - unsigned long index = start >> PAGE_CACHE_SHIFT; + unsigned long index = start >> PAGE_SHIFT; struct extent_buffer *eb; struct extent_buffer *exists = NULL; struct page *p; @@ -4900,7 +4894,7 @@ struct extent_buffer *alloc_extent_buffer(struct btrfs_fs_info *fs_info, if (atomic_inc_not_zero(&exists->refs)) { spin_unlock(&mapping->private_lock); unlock_page(p); - page_cache_release(p); + put_page(p); mark_extent_buffer_accessed(exists, p); goto free_eb; } @@ -4912,7 +4906,7 @@ struct extent_buffer *alloc_extent_buffer(struct btrfs_fs_info *fs_info, */ ClearPagePrivate(p); WARN_ON(PageDirty(p)); - page_cache_release(p); + put_page(p); } attach_extent_buffer_page(eb, p); spin_unlock(&mapping->private_lock); @@ -4935,7 +4929,7 @@ again: spin_lock(&fs_info->buffer_lock); ret = radix_tree_insert(&fs_info->buffer_radix, - start >> PAGE_CACHE_SHIFT, eb); + start >> PAGE_SHIFT, eb); spin_unlock(&fs_info->buffer_lock); radix_tree_preload_end(); if (ret == -EEXIST) { @@ -4998,7 +4992,7 @@ static int release_extent_buffer(struct extent_buffer *eb) spin_lock(&fs_info->buffer_lock); radix_tree_delete(&fs_info->buffer_radix, - eb->start >> PAGE_CACHE_SHIFT); + eb->start >> PAGE_SHIFT); spin_unlock(&fs_info->buffer_lock); } else { spin_unlock(&eb->refs_lock); @@ -5172,8 +5166,8 @@ int read_extent_buffer_pages(struct extent_io_tree *tree, if (start) { WARN_ON(start < eb->start); - start_i = (start >> PAGE_CACHE_SHIFT) - - (eb->start >> PAGE_CACHE_SHIFT); + start_i = (start >> PAGE_SHIFT) - + (eb->start >> PAGE_SHIFT); } else { start_i = 0; } @@ -5256,18 +5250,18 @@ void read_extent_buffer(struct extent_buffer *eb, void *dstv, struct page *page; char *kaddr; char *dst = (char *)dstv; - size_t start_offset = eb->start & ((u64)PAGE_CACHE_SIZE - 1); - unsigned long i = (start_offset + start) >> PAGE_CACHE_SHIFT; + size_t start_offset = eb->start & ((u64)PAGE_SIZE - 1); + unsigned long i = (start_offset + start) >> PAGE_SHIFT; WARN_ON(start > eb->len); WARN_ON(start + len > eb->start + eb->len); - offset = (start_offset + start) & (PAGE_CACHE_SIZE - 1); + offset = (start_offset + start) & (PAGE_SIZE - 1); while (len > 0) { page = eb->pages[i]; - cur = min(len, (PAGE_CACHE_SIZE - offset)); + cur = min(len, (PAGE_SIZE - offset)); kaddr = page_address(page); memcpy(dst, kaddr + offset, cur); @@ -5287,19 +5281,19 @@ int read_extent_buffer_to_user(struct extent_buffer *eb, void __user *dstv, struct page *page; char *kaddr; char __user *dst = (char __user *)dstv; - size_t start_offset = eb->start & ((u64)PAGE_CACHE_SIZE - 1); - unsigned long i = (start_offset + start) >> PAGE_CACHE_SHIFT; + size_t start_offset = eb->start & ((u64)PAGE_SIZE - 1); + unsigned long i = (start_offset + start) >> PAGE_SHIFT; int ret = 0; WARN_ON(start > eb->len); WARN_ON(start + len > eb->start + eb->len); - offset = (start_offset + start) & (PAGE_CACHE_SIZE - 1); + offset = (start_offset + start) & (PAGE_SIZE - 1); while (len > 0) { page = eb->pages[i]; - cur = min(len, (PAGE_CACHE_SIZE - offset)); + cur = min(len, (PAGE_SIZE - offset)); kaddr = page_address(page); if (copy_to_user(dst, kaddr + offset, cur)) { ret = -EFAULT; @@ -5320,13 +5314,13 @@ int map_private_extent_buffer(struct extent_buffer *eb, unsigned long start, unsigned long *map_start, unsigned long *map_len) { - size_t offset = start & (PAGE_CACHE_SIZE - 1); + size_t offset = start & (PAGE_SIZE - 1); char *kaddr; struct page *p; - size_t start_offset = eb->start & ((u64)PAGE_CACHE_SIZE - 1); - unsigned long i = (start_offset + start) >> PAGE_CACHE_SHIFT; + size_t start_offset = eb->start & ((u64)PAGE_SIZE - 1); + unsigned long i = (start_offset + start) >> PAGE_SHIFT; unsigned long end_i = (start_offset + start + min_len - 1) >> - PAGE_CACHE_SHIFT; + PAGE_SHIFT; if (i != end_i) return -EINVAL; @@ -5336,7 +5330,7 @@ int map_private_extent_buffer(struct extent_buffer *eb, unsigned long start, *map_start = 0; } else { offset = 0; - *map_start = ((u64)i << PAGE_CACHE_SHIFT) - start_offset; + *map_start = ((u64)i << PAGE_SHIFT) - start_offset; } if (start + min_len > eb->len) { @@ -5349,7 +5343,7 @@ int map_private_extent_buffer(struct extent_buffer *eb, unsigned long start, p = eb->pages[i]; kaddr = page_address(p); *map = kaddr + offset; - *map_len = PAGE_CACHE_SIZE - offset; + *map_len = PAGE_SIZE - offset; return 0; } @@ -5362,19 +5356,19 @@ int memcmp_extent_buffer(struct extent_buffer *eb, const void *ptrv, struct page *page; char *kaddr; char *ptr = (char *)ptrv; - size_t start_offset = eb->start & ((u64)PAGE_CACHE_SIZE - 1); - unsigned long i = (start_offset + start) >> PAGE_CACHE_SHIFT; + size_t start_offset = eb->start & ((u64)PAGE_SIZE - 1); + unsigned long i = (start_offset + start) >> PAGE_SHIFT; int ret = 0; WARN_ON(start > eb->len); WARN_ON(start + len > eb->start + eb->len); - offset = (start_offset + start) & (PAGE_CACHE_SIZE - 1); + offset = (start_offset + start) & (PAGE_SIZE - 1); while (len > 0) { page = eb->pages[i]; - cur = min(len, (PAGE_CACHE_SIZE - offset)); + cur = min(len, (PAGE_SIZE - offset)); kaddr = page_address(page); ret = memcmp(ptr, kaddr + offset, cur); @@ -5397,19 +5391,19 @@ void write_extent_buffer(struct extent_buffer *eb, const void *srcv, struct page *page; char *kaddr; char *src = (char *)srcv; - size_t start_offset = eb->start & ((u64)PAGE_CACHE_SIZE - 1); - unsigned long i = (start_offset + start) >> PAGE_CACHE_SHIFT; + size_t start_offset = eb->start & ((u64)PAGE_SIZE - 1); + unsigned long i = (start_offset + start) >> PAGE_SHIFT; WARN_ON(start > eb->len); WARN_ON(start + len > eb->start + eb->len); - offset = (start_offset + start) & (PAGE_CACHE_SIZE - 1); + offset = (start_offset + start) & (PAGE_SIZE - 1); while (len > 0) { page = eb->pages[i]; WARN_ON(!PageUptodate(page)); - cur = min(len, PAGE_CACHE_SIZE - offset); + cur = min(len, PAGE_SIZE - offset); kaddr = page_address(page); memcpy(kaddr + offset, src, cur); @@ -5427,19 +5421,19 @@ void memset_extent_buffer(struct extent_buffer *eb, char c, size_t offset; struct page *page; char *kaddr; - size_t start_offset = eb->start & ((u64)PAGE_CACHE_SIZE - 1); - unsigned long i = (start_offset + start) >> PAGE_CACHE_SHIFT; + size_t start_offset = eb->start & ((u64)PAGE_SIZE - 1); + unsigned long i = (start_offset + start) >> PAGE_SHIFT; WARN_ON(start > eb->len); WARN_ON(start + len > eb->start + eb->len); - offset = (start_offset + start) & (PAGE_CACHE_SIZE - 1); + offset = (start_offset + start) & (PAGE_SIZE - 1); while (len > 0) { page = eb->pages[i]; WARN_ON(!PageUptodate(page)); - cur = min(len, PAGE_CACHE_SIZE - offset); + cur = min(len, PAGE_SIZE - offset); kaddr = page_address(page); memset(kaddr + offset, c, cur); @@ -5458,19 +5452,19 @@ void copy_extent_buffer(struct extent_buffer *dst, struct extent_buffer *src, size_t offset; struct page *page; char *kaddr; - size_t start_offset = dst->start & ((u64)PAGE_CACHE_SIZE - 1); - unsigned long i = (start_offset + dst_offset) >> PAGE_CACHE_SHIFT; + size_t start_offset = dst->start & ((u64)PAGE_SIZE - 1); + unsigned long i = (start_offset + dst_offset) >> PAGE_SHIFT; WARN_ON(src->len != dst_len); offset = (start_offset + dst_offset) & - (PAGE_CACHE_SIZE - 1); + (PAGE_SIZE - 1); while (len > 0) { page = dst->pages[i]; WARN_ON(!PageUptodate(page)); - cur = min(len, (unsigned long)(PAGE_CACHE_SIZE - offset)); + cur = min(len, (unsigned long)(PAGE_SIZE - offset)); kaddr = page_address(page); read_extent_buffer(src, kaddr + offset, src_offset, cur); @@ -5512,7 +5506,7 @@ static inline void eb_bitmap_offset(struct extent_buffer *eb, unsigned long *page_index, size_t *page_offset) { - size_t start_offset = eb->start & ((u64)PAGE_CACHE_SIZE - 1); + size_t start_offset = eb->start & ((u64)PAGE_SIZE - 1); size_t byte_offset = BIT_BYTE(nr); size_t offset; @@ -5523,8 +5517,8 @@ static inline void eb_bitmap_offset(struct extent_buffer *eb, */ offset = start_offset + start + byte_offset; - *page_index = offset >> PAGE_CACHE_SHIFT; - *page_offset = offset & (PAGE_CACHE_SIZE - 1); + *page_index = offset >> PAGE_SHIFT; + *page_offset = offset & (PAGE_SIZE - 1); } /** @@ -5576,7 +5570,7 @@ void extent_buffer_bitmap_set(struct extent_buffer *eb, unsigned long start, len -= bits_to_set; bits_to_set = BITS_PER_BYTE; mask_to_set = ~0U; - if (++offset >= PAGE_CACHE_SIZE && len > 0) { + if (++offset >= PAGE_SIZE && len > 0) { offset = 0; page = eb->pages[++i]; WARN_ON(!PageUptodate(page)); @@ -5618,7 +5612,7 @@ void extent_buffer_bitmap_clear(struct extent_buffer *eb, unsigned long start, len -= bits_to_clear; bits_to_clear = BITS_PER_BYTE; mask_to_clear = ~0U; - if (++offset >= PAGE_CACHE_SIZE && len > 0) { + if (++offset >= PAGE_SIZE && len > 0) { offset = 0; page = eb->pages[++i]; WARN_ON(!PageUptodate(page)); @@ -5665,7 +5659,7 @@ void memcpy_extent_buffer(struct extent_buffer *dst, unsigned long dst_offset, size_t cur; size_t dst_off_in_page; size_t src_off_in_page; - size_t start_offset = dst->start & ((u64)PAGE_CACHE_SIZE - 1); + size_t start_offset = dst->start & ((u64)PAGE_SIZE - 1); unsigned long dst_i; unsigned long src_i; @@ -5684,17 +5678,17 @@ void memcpy_extent_buffer(struct extent_buffer *dst, unsigned long dst_offset, while (len > 0) { dst_off_in_page = (start_offset + dst_offset) & - (PAGE_CACHE_SIZE - 1); + (PAGE_SIZE - 1); src_off_in_page = (start_offset + src_offset) & - (PAGE_CACHE_SIZE - 1); + (PAGE_SIZE - 1); - dst_i = (start_offset + dst_offset) >> PAGE_CACHE_SHIFT; - src_i = (start_offset + src_offset) >> PAGE_CACHE_SHIFT; + dst_i = (start_offset + dst_offset) >> PAGE_SHIFT; + src_i = (start_offset + src_offset) >> PAGE_SHIFT; - cur = min(len, (unsigned long)(PAGE_CACHE_SIZE - + cur = min(len, (unsigned long)(PAGE_SIZE - src_off_in_page)); cur = min_t(unsigned long, cur, - (unsigned long)(PAGE_CACHE_SIZE - dst_off_in_page)); + (unsigned long)(PAGE_SIZE - dst_off_in_page)); copy_pages(dst->pages[dst_i], dst->pages[src_i], dst_off_in_page, src_off_in_page, cur); @@ -5713,7 +5707,7 @@ void memmove_extent_buffer(struct extent_buffer *dst, unsigned long dst_offset, size_t src_off_in_page; unsigned long dst_end = dst_offset + len - 1; unsigned long src_end = src_offset + len - 1; - size_t start_offset = dst->start & ((u64)PAGE_CACHE_SIZE - 1); + size_t start_offset = dst->start & ((u64)PAGE_SIZE - 1); unsigned long dst_i; unsigned long src_i; @@ -5732,13 +5726,13 @@ void memmove_extent_buffer(struct extent_buffer *dst, unsigned long dst_offset, return; } while (len > 0) { - dst_i = (start_offset + dst_end) >> PAGE_CACHE_SHIFT; - src_i = (start_offset + src_end) >> PAGE_CACHE_SHIFT; + dst_i = (start_offset + dst_end) >> PAGE_SHIFT; + src_i = (start_offset + src_end) >> PAGE_SHIFT; dst_off_in_page = (start_offset + dst_end) & - (PAGE_CACHE_SIZE - 1); + (PAGE_SIZE - 1); src_off_in_page = (start_offset + src_end) & - (PAGE_CACHE_SIZE - 1); + (PAGE_SIZE - 1); cur = min_t(unsigned long, len, src_off_in_page + 1); cur = min(cur, dst_off_in_page + 1); diff --git a/fs/btrfs/extent_io.h b/fs/btrfs/extent_io.h index 880d5292e..b5e0ade90 100644 --- a/fs/btrfs/extent_io.h +++ b/fs/btrfs/extent_io.h @@ -61,6 +61,7 @@ struct extent_state; struct btrfs_root; struct btrfs_io_bio; +struct io_failure_record; typedef int (extent_submit_bio_hook_t)(struct inode *inode, int rw, struct bio *bio, int mirror_num, @@ -111,8 +112,7 @@ struct extent_state { atomic_t refs; unsigned state; - /* for use by the FS */ - u64 private; + struct io_failure_record *failrec; #ifdef CONFIG_BTRFS_DEBUG struct list_head leak_list; @@ -120,7 +120,7 @@ struct extent_state { }; #define INLINE_EXTENT_BUFFER_PAGES 16 -#define MAX_INLINE_EXTENT_BUFFER_SIZE (INLINE_EXTENT_BUFFER_PAGES * PAGE_CACHE_SIZE) +#define MAX_INLINE_EXTENT_BUFFER_SIZE (INLINE_EXTENT_BUFFER_PAGES * PAGE_SIZE) struct extent_buffer { u64 start; unsigned long len; @@ -342,7 +342,6 @@ int extent_readpages(struct extent_io_tree *tree, get_extent_t get_extent); int extent_fiemap(struct inode *inode, struct fiemap_extent_info *fieinfo, __u64 start, __u64 len, get_extent_t *get_extent); -int get_state_private(struct extent_io_tree *tree, u64 start, u64 *private); void set_page_extent_mapped(struct page *page); struct extent_buffer *alloc_extent_buffer(struct btrfs_fs_info *fs_info, @@ -366,8 +365,8 @@ void wait_on_extent_buffer_writeback(struct extent_buffer *eb); static inline unsigned long num_extent_pages(u64 start, u64 len) { - return ((start + len + PAGE_CACHE_SIZE - 1) >> PAGE_CACHE_SHIFT) - - (start >> PAGE_CACHE_SHIFT); + return ((start + len + PAGE_SIZE - 1) >> PAGE_SHIFT) - + (start >> PAGE_SHIFT); } static inline void extent_buffer_get(struct extent_buffer *eb) diff --git a/fs/btrfs/extent_map.c b/fs/btrfs/extent_map.c index 84fb56d5c..318b048eb 100644 --- a/fs/btrfs/extent_map.c +++ b/fs/btrfs/extent_map.c @@ -4,6 +4,7 @@ #include <linux/hardirq.h> #include "ctree.h" #include "extent_map.h" +#include "compression.h" static struct kmem_cache *extent_map_cache; @@ -20,8 +21,7 @@ int __init extent_map_init(void) void extent_map_exit(void) { - if (extent_map_cache) - kmem_cache_destroy(extent_map_cache); + kmem_cache_destroy(extent_map_cache); } /** @@ -62,7 +62,7 @@ struct extent_map *alloc_extent_map(void) /** * free_extent_map - drop reference count of an extent_map - * @em: extent map beeing releasead + * @em: extent map being releasead * * Drops the reference out on @em by one and free the structure * if the reference count hits zero. @@ -422,7 +422,7 @@ struct extent_map *search_extent_mapping(struct extent_map_tree *tree, /** * remove_extent_mapping - removes an extent_map from the extent tree * @tree: extent tree to remove from - * @em: extent map beeing removed + * @em: extent map being removed * * Removes @em from @tree. No reference counts are dropped, and no checks * are done to see if the range is in use diff --git a/fs/btrfs/file-item.c b/fs/btrfs/file-item.c index a67e1c828..7a7d6e253 100644 --- a/fs/btrfs/file-item.c +++ b/fs/btrfs/file-item.c @@ -25,13 +25,14 @@ #include "transaction.h" #include "volumes.h" #include "print-tree.h" +#include "compression.h" #define __MAX_CSUM_ITEMS(r, size) ((unsigned long)(((BTRFS_LEAF_DATA_SIZE(r) - \ sizeof(struct btrfs_item) * 2) / \ size) - 1)) #define MAX_CSUM_ITEMS(r, size) (min_t(u32, __MAX_CSUM_ITEMS(r, size), \ - PAGE_CACHE_SIZE)) + PAGE_SIZE)) #define MAX_ORDERED_SUM_BYTES(r) ((PAGE_SIZE - \ sizeof(struct btrfs_ordered_sum)) / \ @@ -172,6 +173,7 @@ static int __btrfs_lookup_bio_sums(struct btrfs_root *root, u64 item_start_offset = 0; u64 item_last_offset = 0; u64 disk_bytenr; + u64 page_bytes_left; u32 diff; int nblocks; int bio_index = 0; @@ -201,7 +203,7 @@ static int __btrfs_lookup_bio_sums(struct btrfs_root *root, csum = (u8 *)dst; } - if (bio->bi_iter.bi_size > PAGE_CACHE_SIZE * 8) + if (bio->bi_iter.bi_size > PAGE_SIZE * 8) path->reada = READA_FORWARD; WARN_ON(bio->bi_vcnt <= 0); @@ -220,6 +222,8 @@ static int __btrfs_lookup_bio_sums(struct btrfs_root *root, disk_bytenr = (u64)bio->bi_iter.bi_sector << 9; if (dio) offset = logical_offset; + + page_bytes_left = bvec->bv_len; while (bio_index < bio->bi_vcnt) { if (!dio) offset = page_offset(bvec->bv_page) + bvec->bv_offset; @@ -243,7 +247,7 @@ static int __btrfs_lookup_bio_sums(struct btrfs_root *root, if (BTRFS_I(inode)->root->root_key.objectid == BTRFS_DATA_RELOC_TREE_OBJECTID) { set_extent_bits(io_tree, offset, - offset + bvec->bv_len - 1, + offset + root->sectorsize - 1, EXTENT_NODATASUM, GFP_NOFS); } else { btrfs_info(BTRFS_I(inode)->root->fs_info, @@ -281,13 +285,29 @@ static int __btrfs_lookup_bio_sums(struct btrfs_root *root, found: csum += count * csum_size; nblocks -= count; - bio_index += count; + while (count--) { - disk_bytenr += bvec->bv_len; - offset += bvec->bv_len; - bvec++; + disk_bytenr += root->sectorsize; + offset += root->sectorsize; + page_bytes_left -= root->sectorsize; + if (!page_bytes_left) { + bio_index++; + /* + * make sure we're still inside the + * bio before we update page_bytes_left + */ + if (bio_index >= bio->bi_vcnt) { + WARN_ON_ONCE(count); + goto done; + } + bvec++; + page_bytes_left = bvec->bv_len; + } + } } + +done: btrfs_free_path(path); return 0; } @@ -432,6 +452,8 @@ int btrfs_csum_one_bio(struct btrfs_root *root, struct inode *inode, struct bio_vec *bvec = bio->bi_io_vec; int bio_index = 0; int index; + int nr_sectors; + int i; unsigned long total_bytes = 0; unsigned long this_sum_bytes = 0; u64 offset; @@ -459,41 +481,56 @@ int btrfs_csum_one_bio(struct btrfs_root *root, struct inode *inode, if (!contig) offset = page_offset(bvec->bv_page) + bvec->bv_offset; - if (offset >= ordered->file_offset + ordered->len || - offset < ordered->file_offset) { - unsigned long bytes_left; - sums->len = this_sum_bytes; - this_sum_bytes = 0; - btrfs_add_ordered_sum(inode, ordered, sums); - btrfs_put_ordered_extent(ordered); + data = kmap_atomic(bvec->bv_page); - bytes_left = bio->bi_iter.bi_size - total_bytes; + nr_sectors = BTRFS_BYTES_TO_BLKS(root->fs_info, + bvec->bv_len + root->sectorsize + - 1); + + for (i = 0; i < nr_sectors; i++) { + if (offset >= ordered->file_offset + ordered->len || + offset < ordered->file_offset) { + unsigned long bytes_left; + + kunmap_atomic(data); + sums->len = this_sum_bytes; + this_sum_bytes = 0; + btrfs_add_ordered_sum(inode, ordered, sums); + btrfs_put_ordered_extent(ordered); + + bytes_left = bio->bi_iter.bi_size - total_bytes; + + sums = kzalloc(btrfs_ordered_sum_size(root, bytes_left), + GFP_NOFS); + BUG_ON(!sums); /* -ENOMEM */ + sums->len = bytes_left; + ordered = btrfs_lookup_ordered_extent(inode, + offset); + ASSERT(ordered); /* Logic error */ + sums->bytenr = ((u64)bio->bi_iter.bi_sector << 9) + + total_bytes; + index = 0; + + data = kmap_atomic(bvec->bv_page); + } - sums = kzalloc(btrfs_ordered_sum_size(root, bytes_left), - GFP_NOFS); - BUG_ON(!sums); /* -ENOMEM */ - sums->len = bytes_left; - ordered = btrfs_lookup_ordered_extent(inode, offset); - BUG_ON(!ordered); /* Logic error */ - sums->bytenr = ((u64)bio->bi_iter.bi_sector << 9) + - total_bytes; - index = 0; + sums->sums[index] = ~(u32)0; + sums->sums[index] + = btrfs_csum_data(data + bvec->bv_offset + + (i * root->sectorsize), + sums->sums[index], + root->sectorsize); + btrfs_csum_final(sums->sums[index], + (char *)(sums->sums + index)); + index++; + offset += root->sectorsize; + this_sum_bytes += root->sectorsize; + total_bytes += root->sectorsize; } - data = kmap_atomic(bvec->bv_page); - sums->sums[index] = ~(u32)0; - sums->sums[index] = btrfs_csum_data(data + bvec->bv_offset, - sums->sums[index], - bvec->bv_len); kunmap_atomic(data); - btrfs_csum_final(sums->sums[index], - (char *)(sums->sums + index)); bio_index++; - index++; - total_bytes += bvec->bv_len; - this_sum_bytes += bvec->bv_len; - offset += bvec->bv_len; bvec++; } this_sum_bytes = 0; diff --git a/fs/btrfs/file.c b/fs/btrfs/file.c index 9a30ca640..af5c7fa22 100644 --- a/fs/btrfs/file.c +++ b/fs/btrfs/file.c @@ -41,6 +41,7 @@ #include "locking.h" #include "volumes.h" #include "qgroup.h" +#include "compression.h" static struct kmem_cache *btrfs_inode_defrag_cachep; /* @@ -413,11 +414,11 @@ static noinline int btrfs_copy_from_user(loff_t pos, size_t write_bytes, size_t copied = 0; size_t total_copied = 0; int pg = 0; - int offset = pos & (PAGE_CACHE_SIZE - 1); + int offset = pos & (PAGE_SIZE - 1); while (write_bytes > 0) { size_t count = min_t(size_t, - PAGE_CACHE_SIZE - offset, write_bytes); + PAGE_SIZE - offset, write_bytes); struct page *page = prepared_pages[pg]; /* * Copy data from userspace to the current page @@ -447,7 +448,7 @@ static noinline int btrfs_copy_from_user(loff_t pos, size_t write_bytes, if (unlikely(copied == 0)) break; - if (copied < PAGE_CACHE_SIZE - offset) { + if (copied < PAGE_SIZE - offset) { offset += copied; } else { pg++; @@ -472,7 +473,7 @@ static void btrfs_drop_pages(struct page **pages, size_t num_pages) */ ClearPageChecked(pages[i]); unlock_page(pages[i]); - page_cache_release(pages[i]); + put_page(pages[i]); } } @@ -498,7 +499,7 @@ int btrfs_dirty_pages(struct btrfs_root *root, struct inode *inode, loff_t isize = i_size_read(inode); start_pos = pos & ~((u64)root->sectorsize - 1); - num_bytes = ALIGN(write_bytes + pos - start_pos, root->sectorsize); + num_bytes = round_up(write_bytes + pos - start_pos, root->sectorsize); end_of_last_block = start_pos + num_bytes - 1; err = btrfs_set_extent_delalloc(inode, start_pos, end_of_last_block, @@ -1296,7 +1297,7 @@ static int prepare_uptodate_page(struct inode *inode, { int ret = 0; - if (((pos & (PAGE_CACHE_SIZE - 1)) || force_uptodate) && + if (((pos & (PAGE_SIZE - 1)) || force_uptodate) && !PageUptodate(page)) { ret = btrfs_readpage(NULL, page); if (ret) @@ -1322,7 +1323,7 @@ static noinline int prepare_pages(struct inode *inode, struct page **pages, size_t write_bytes, bool force_uptodate) { int i; - unsigned long index = pos >> PAGE_CACHE_SHIFT; + unsigned long index = pos >> PAGE_SHIFT; gfp_t mask = btrfs_alloc_write_mask(inode->i_mapping); int err = 0; int faili; @@ -1344,7 +1345,7 @@ again: err = prepare_uptodate_page(inode, pages[i], pos + write_bytes, false); if (err) { - page_cache_release(pages[i]); + put_page(pages[i]); if (err == -EAGAIN) { err = 0; goto again; @@ -1359,7 +1360,7 @@ again: fail: while (faili >= 0) { unlock_page(pages[faili]); - page_cache_release(pages[faili]); + put_page(pages[faili]); faili--; } return err; @@ -1379,16 +1380,19 @@ fail: static noinline int lock_and_cleanup_extent_if_need(struct inode *inode, struct page **pages, size_t num_pages, loff_t pos, + size_t write_bytes, u64 *lockstart, u64 *lockend, struct extent_state **cached_state) { + struct btrfs_root *root = BTRFS_I(inode)->root; u64 start_pos; u64 last_pos; int i; int ret = 0; - start_pos = pos & ~((u64)PAGE_CACHE_SIZE - 1); - last_pos = start_pos + ((u64)num_pages << PAGE_CACHE_SHIFT) - 1; + start_pos = round_down(pos, root->sectorsize); + last_pos = start_pos + + round_up(pos + write_bytes - start_pos, root->sectorsize) - 1; if (start_pos < inode->i_size) { struct btrfs_ordered_extent *ordered; @@ -1404,7 +1408,7 @@ lock_and_cleanup_extent_if_need(struct inode *inode, struct page **pages, cached_state, GFP_NOFS); for (i = 0; i < num_pages; i++) { unlock_page(pages[i]); - page_cache_release(pages[i]); + put_page(pages[i]); } btrfs_start_ordered_extent(inode, ordered, 1); btrfs_put_ordered_extent(ordered); @@ -1493,8 +1497,8 @@ static noinline ssize_t __btrfs_buffered_write(struct file *file, bool force_page_uptodate = false; bool need_unlock; - nrptrs = min(DIV_ROUND_UP(iov_iter_count(i), PAGE_CACHE_SIZE), - PAGE_CACHE_SIZE / (sizeof(struct page *))); + nrptrs = min(DIV_ROUND_UP(iov_iter_count(i), PAGE_SIZE), + PAGE_SIZE / (sizeof(struct page *))); nrptrs = min(nrptrs, current->nr_dirtied_pause - current->nr_dirtied); nrptrs = max(nrptrs, 8); pages = kmalloc_array(nrptrs, sizeof(struct page *), GFP_KERNEL); @@ -1502,15 +1506,18 @@ static noinline ssize_t __btrfs_buffered_write(struct file *file, return -ENOMEM; while (iov_iter_count(i) > 0) { - size_t offset = pos & (PAGE_CACHE_SIZE - 1); + size_t offset = pos & (PAGE_SIZE - 1); + size_t sector_offset; size_t write_bytes = min(iov_iter_count(i), - nrptrs * (size_t)PAGE_CACHE_SIZE - + nrptrs * (size_t)PAGE_SIZE - offset); size_t num_pages = DIV_ROUND_UP(write_bytes + offset, - PAGE_CACHE_SIZE); + PAGE_SIZE); size_t reserve_bytes; size_t dirty_pages; size_t copied; + size_t dirty_sectors; + size_t num_sectors; WARN_ON(num_pages > nrptrs); @@ -1523,29 +1530,29 @@ static noinline ssize_t __btrfs_buffered_write(struct file *file, break; } - reserve_bytes = num_pages << PAGE_CACHE_SHIFT; + sector_offset = pos & (root->sectorsize - 1); + reserve_bytes = round_up(write_bytes + sector_offset, + root->sectorsize); - if (BTRFS_I(inode)->flags & (BTRFS_INODE_NODATACOW | - BTRFS_INODE_PREALLOC)) { - ret = check_can_nocow(inode, pos, &write_bytes); - if (ret < 0) - break; - if (ret > 0) { - /* - * For nodata cow case, no need to reserve - * data space. - */ - only_release_metadata = true; - /* - * our prealloc extent may be smaller than - * write_bytes, so scale down. - */ - num_pages = DIV_ROUND_UP(write_bytes + offset, - PAGE_CACHE_SIZE); - reserve_bytes = num_pages << PAGE_CACHE_SHIFT; - goto reserve_metadata; - } + if ((BTRFS_I(inode)->flags & (BTRFS_INODE_NODATACOW | + BTRFS_INODE_PREALLOC)) && + check_can_nocow(inode, pos, &write_bytes) > 0) { + /* + * For nodata cow case, no need to reserve + * data space. + */ + only_release_metadata = true; + /* + * our prealloc extent may be smaller than + * write_bytes, so scale down. + */ + num_pages = DIV_ROUND_UP(write_bytes + offset, + PAGE_SIZE); + reserve_bytes = round_up(write_bytes + sector_offset, + root->sectorsize); + goto reserve_metadata; } + ret = btrfs_check_data_free_space(inode, pos, write_bytes); if (ret < 0) break; @@ -1576,8 +1583,8 @@ again: break; ret = lock_and_cleanup_extent_if_need(inode, pages, num_pages, - pos, &lockstart, &lockend, - &cached_state); + pos, write_bytes, &lockstart, + &lockend, &cached_state); if (ret < 0) { if (ret == -EAGAIN) goto again; @@ -1589,6 +1596,13 @@ again: copied = btrfs_copy_from_user(pos, write_bytes, pages, i); + num_sectors = BTRFS_BYTES_TO_BLKS(root->fs_info, + reserve_bytes); + dirty_sectors = round_up(copied + sector_offset, + root->sectorsize); + dirty_sectors = BTRFS_BYTES_TO_BLKS(root->fs_info, + dirty_sectors); + /* * if we have trouble faulting in the pages, fall * back to one page at a time @@ -1598,23 +1612,30 @@ again: if (copied == 0) { force_page_uptodate = true; + dirty_sectors = 0; dirty_pages = 0; } else { force_page_uptodate = false; dirty_pages = DIV_ROUND_UP(copied + offset, - PAGE_CACHE_SIZE); + PAGE_SIZE); } /* * If we had a short copy we need to release the excess delaloc * bytes we reserved. We need to increment outstanding_extents - * because btrfs_delalloc_release_space will decrement it, but + * because btrfs_delalloc_release_space and + * btrfs_delalloc_release_metadata will decrement it, but * we still have an outstanding extent for the chunk we actually * managed to copy. */ - if (num_pages > dirty_pages) { - release_bytes = (num_pages - dirty_pages) << - PAGE_CACHE_SHIFT; + if (num_sectors > dirty_sectors) { + /* + * we round down because we don't want to count + * any partial blocks actually sent through the + * IO machines + */ + release_bytes = round_down(release_bytes - copied, + root->sectorsize); if (copied > 0) { spin_lock(&BTRFS_I(inode)->lock); BTRFS_I(inode)->outstanding_extents++; @@ -1627,13 +1648,14 @@ again: u64 __pos; __pos = round_down(pos, root->sectorsize) + - (dirty_pages << PAGE_CACHE_SHIFT); + (dirty_pages << PAGE_SHIFT); btrfs_delalloc_release_space(inode, __pos, release_bytes); } } - release_bytes = dirty_pages << PAGE_CACHE_SHIFT; + release_bytes = round_up(copied + sector_offset, + root->sectorsize); if (copied > 0) ret = btrfs_dirty_pages(root, inode, pages, @@ -1654,8 +1676,7 @@ again: if (only_release_metadata && copied > 0) { lockstart = round_down(pos, root->sectorsize); - lockend = lockstart + - (dirty_pages << PAGE_CACHE_SHIFT) - 1; + lockend = round_up(pos + copied, root->sectorsize) - 1; set_extent_bit(&BTRFS_I(inode)->io_tree, lockstart, lockend, EXTENT_NORESERVE, NULL, @@ -1668,7 +1689,7 @@ again: cond_resched(); balance_dirty_pages_ratelimited(inode->i_mapping); - if (dirty_pages < (root->nodesize >> PAGE_CACHE_SHIFT) + 1) + if (dirty_pages < (root->nodesize >> PAGE_SHIFT) + 1) btrfs_btree_balance_dirty(root); pos += copied; @@ -1724,8 +1745,8 @@ static ssize_t __btrfs_direct_write(struct kiocb *iocb, goto out; written += written_buffered; iocb->ki_pos = pos + written_buffered; - invalidate_mapping_pages(file->f_mapping, pos >> PAGE_CACHE_SHIFT, - endbyte >> PAGE_CACHE_SHIFT); + invalidate_mapping_pages(file->f_mapping, pos >> PAGE_SHIFT, + endbyte >> PAGE_SHIFT); out: return written ? written : err; } @@ -1761,6 +1782,8 @@ static ssize_t btrfs_file_write_iter(struct kiocb *iocb, ssize_t err; loff_t pos; size_t count; + loff_t oldsize; + int clean_page = 0; inode_lock(inode); err = generic_write_checks(iocb, from); @@ -1799,14 +1822,17 @@ static ssize_t btrfs_file_write_iter(struct kiocb *iocb, pos = iocb->ki_pos; count = iov_iter_count(from); start_pos = round_down(pos, root->sectorsize); - if (start_pos > i_size_read(inode)) { + oldsize = i_size_read(inode); + if (start_pos > oldsize) { /* Expand hole size to cover write data, preventing empty gap */ end_pos = round_up(pos + count, root->sectorsize); - err = btrfs_cont_expand(inode, i_size_read(inode), end_pos); + err = btrfs_cont_expand(inode, oldsize, end_pos); if (err) { inode_unlock(inode); goto out; } + if (start_pos > round_up(oldsize, root->sectorsize)) + clean_page = 1; } if (sync) @@ -1818,6 +1844,9 @@ static ssize_t btrfs_file_write_iter(struct kiocb *iocb, num_written = __btrfs_buffered_write(file, from, pos); if (num_written > 0) iocb->ki_pos = pos + num_written; + if (clean_page) + pagecache_isize_extended(inode, oldsize, + i_size_read(inode)); } inode_unlock(inode); @@ -1825,7 +1854,7 @@ static ssize_t btrfs_file_write_iter(struct kiocb *iocb, /* * We also have to set last_sub_trans to the current log transid, * otherwise subsequent syncs to a file that's been synced in this - * transaction will appear to have already occured. + * transaction will appear to have already occurred. */ spin_lock(&BTRFS_I(inode)->lock); BTRFS_I(inode)->last_sub_trans = root->log_transid; @@ -1996,10 +2025,11 @@ int btrfs_sync_file(struct file *file, loff_t start, loff_t end, int datasync) */ smp_mb(); if (btrfs_inode_in_log(inode, root->fs_info->generation) || - (BTRFS_I(inode)->last_trans <= - root->fs_info->last_trans_committed && - (full_sync || - !btrfs_have_ordered_extents_in_range(inode, start, len)))) { + (full_sync && BTRFS_I(inode)->last_trans <= + root->fs_info->last_trans_committed) || + (!btrfs_have_ordered_extents_in_range(inode, start, len) && + BTRFS_I(inode)->last_trans + <= root->fs_info->last_trans_committed)) { /* * We'v had everything committed since the last time we were * modified so clear this flag in case it was set for whatever @@ -2293,10 +2323,10 @@ static int btrfs_punch_hole(struct inode *inode, loff_t offset, loff_t len) int ret = 0; int err = 0; unsigned int rsv_count; - bool same_page; + bool same_block; bool no_holes = btrfs_fs_incompat(root->fs_info, NO_HOLES); u64 ino_size; - bool truncated_page = false; + bool truncated_block = false; bool updated_inode = false; ret = btrfs_wait_ordered_range(inode, offset, len); @@ -2304,7 +2334,7 @@ static int btrfs_punch_hole(struct inode *inode, loff_t offset, loff_t len) return ret; inode_lock(inode); - ino_size = round_up(inode->i_size, PAGE_CACHE_SIZE); + ino_size = round_up(inode->i_size, root->sectorsize); ret = find_first_non_hole(inode, &offset, &len); if (ret < 0) goto out_only_mutex; @@ -2317,31 +2347,30 @@ static int btrfs_punch_hole(struct inode *inode, loff_t offset, loff_t len) lockstart = round_up(offset, BTRFS_I(inode)->root->sectorsize); lockend = round_down(offset + len, BTRFS_I(inode)->root->sectorsize) - 1; - same_page = ((offset >> PAGE_CACHE_SHIFT) == - ((offset + len - 1) >> PAGE_CACHE_SHIFT)); - + same_block = (BTRFS_BYTES_TO_BLKS(root->fs_info, offset)) + == (BTRFS_BYTES_TO_BLKS(root->fs_info, offset + len - 1)); /* - * We needn't truncate any page which is beyond the end of the file + * We needn't truncate any block which is beyond the end of the file * because we are sure there is no data there. */ /* - * Only do this if we are in the same page and we aren't doing the - * entire page. + * Only do this if we are in the same block and we aren't doing the + * entire block. */ - if (same_page && len < PAGE_CACHE_SIZE) { + if (same_block && len < root->sectorsize) { if (offset < ino_size) { - truncated_page = true; - ret = btrfs_truncate_page(inode, offset, len, 0); + truncated_block = true; + ret = btrfs_truncate_block(inode, offset, len, 0); } else { ret = 0; } goto out_only_mutex; } - /* zero back part of the first page */ + /* zero back part of the first block */ if (offset < ino_size) { - truncated_page = true; - ret = btrfs_truncate_page(inode, offset, 0, 0); + truncated_block = true; + ret = btrfs_truncate_block(inode, offset, 0, 0); if (ret) { inode_unlock(inode); return ret; @@ -2376,9 +2405,10 @@ static int btrfs_punch_hole(struct inode *inode, loff_t offset, loff_t len) if (!ret) { /* zero the front end of the last page */ if (tail_start + tail_len < ino_size) { - truncated_page = true; - ret = btrfs_truncate_page(inode, - tail_start + tail_len, 0, 1); + truncated_block = true; + ret = btrfs_truncate_block(inode, + tail_start + tail_len, + 0, 1); if (ret) goto out_only_mutex; } @@ -2544,7 +2574,7 @@ out_trans: goto out_free; inode_inc_iversion(inode); - inode->i_mtime = inode->i_ctime = CURRENT_TIME; + inode->i_mtime = inode->i_ctime = current_fs_time(inode->i_sb); trans->block_rsv = &root->fs_info->trans_block_rsv; ret = btrfs_update_inode(trans, root, inode); @@ -2558,7 +2588,7 @@ out: unlock_extent_cached(&BTRFS_I(inode)->io_tree, lockstart, lockend, &cached_state, GFP_NOFS); out_only_mutex: - if (!updated_inode && truncated_page && !ret && !err) { + if (!updated_inode && truncated_block && !ret && !err) { /* * If we only end up zeroing part of a page, we still need to * update the inode item, so that all the time fields are @@ -2611,7 +2641,7 @@ static int add_falloc_range(struct list_head *head, u64 start, u64 len) return 0; } insert: - range = kmalloc(sizeof(*range), GFP_NOFS); + range = kmalloc(sizeof(*range), GFP_KERNEL); if (!range) return -ENOMEM; range->start = start; @@ -2659,9 +2689,12 @@ static long btrfs_fallocate(struct file *file, int mode, return ret; inode_lock(inode); - ret = inode_newsize_ok(inode, alloc_end); - if (ret) - goto out; + + if (!(mode & FALLOC_FL_KEEP_SIZE) && offset + len > inode->i_size) { + ret = inode_newsize_ok(inode, offset + len); + if (ret) + goto out; + } /* * TODO: Move these two operations after we have checked @@ -2678,10 +2711,10 @@ static long btrfs_fallocate(struct file *file, int mode, } else if (offset + len > inode->i_size) { /* * If we are fallocating from the end of the file onward we - * need to zero out the end of the page if i_size lands in the - * middle of a page. + * need to zero out the end of the block if i_size lands in the + * middle of a block. */ - ret = btrfs_truncate_page(inode, inode->i_size, 0, 0); + ret = btrfs_truncate_block(inode, inode->i_size, 0, 0); if (ret) goto out; } @@ -2712,7 +2745,7 @@ static long btrfs_fallocate(struct file *file, int mode, btrfs_put_ordered_extent(ordered); unlock_extent_cached(&BTRFS_I(inode)->io_tree, alloc_start, locked_end, - &cached_state, GFP_NOFS); + &cached_state, GFP_KERNEL); /* * we can't wait on the range with the transaction * running or with the extent lock held @@ -2794,7 +2827,7 @@ static long btrfs_fallocate(struct file *file, int mode, if (IS_ERR(trans)) { ret = PTR_ERR(trans); } else { - inode->i_ctime = CURRENT_TIME; + inode->i_ctime = current_fs_time(inode->i_sb); i_size_write(inode, actual_end); btrfs_ordered_update_i_size(inode, actual_end, NULL); ret = btrfs_update_inode(trans, root, inode); @@ -2806,7 +2839,7 @@ static long btrfs_fallocate(struct file *file, int mode, } out_unlock: unlock_extent_cached(&BTRFS_I(inode)->io_tree, alloc_start, locked_end, - &cached_state, GFP_NOFS); + &cached_state, GFP_KERNEL); out: /* * As we waited the extent range, the data_rsv_map must be empty @@ -2930,7 +2963,7 @@ const struct file_operations btrfs_file_operations = { .fallocate = btrfs_fallocate, .unlocked_ioctl = btrfs_ioctl, #ifdef CONFIG_COMPAT - .compat_ioctl = btrfs_ioctl, + .compat_ioctl = btrfs_compat_ioctl, #endif .copy_file_range = btrfs_copy_file_range, .clone_file_range = btrfs_clone_file_range, @@ -2939,8 +2972,7 @@ const struct file_operations btrfs_file_operations = { void btrfs_auto_defrag_exit(void) { - if (btrfs_inode_defrag_cachep) - kmem_cache_destroy(btrfs_inode_defrag_cachep); + kmem_cache_destroy(btrfs_inode_defrag_cachep); } int btrfs_auto_defrag_init(void) diff --git a/fs/btrfs/free-space-cache.c b/fs/btrfs/free-space-cache.c index 8f835bfa1..5e6062c26 100644 --- a/fs/btrfs/free-space-cache.c +++ b/fs/btrfs/free-space-cache.c @@ -29,7 +29,7 @@ #include "inode-map.h" #include "volumes.h" -#define BITS_PER_BITMAP (PAGE_CACHE_SIZE * 8) +#define BITS_PER_BITMAP (PAGE_SIZE * 8) #define MAX_CACHE_BYTES_PER_GIG SZ_32K struct btrfs_trim_range { @@ -295,7 +295,7 @@ static int readahead_cache(struct inode *inode) return -ENOMEM; file_ra_state_init(ra, inode->i_mapping); - last_index = (i_size_read(inode) - 1) >> PAGE_CACHE_SHIFT; + last_index = (i_size_read(inode) - 1) >> PAGE_SHIFT; page_cache_sync_readahead(inode->i_mapping, ra, NULL, 0, last_index); @@ -310,14 +310,14 @@ static int io_ctl_init(struct btrfs_io_ctl *io_ctl, struct inode *inode, int num_pages; int check_crcs = 0; - num_pages = DIV_ROUND_UP(i_size_read(inode), PAGE_CACHE_SIZE); + num_pages = DIV_ROUND_UP(i_size_read(inode), PAGE_SIZE); if (btrfs_ino(inode) != BTRFS_FREE_INO_OBJECTID) check_crcs = 1; /* Make sure we can fit our crcs into the first page */ if (write && check_crcs && - (num_pages * sizeof(u32)) >= PAGE_CACHE_SIZE) + (num_pages * sizeof(u32)) >= PAGE_SIZE) return -ENOSPC; memset(io_ctl, 0, sizeof(struct btrfs_io_ctl)); @@ -354,9 +354,9 @@ static void io_ctl_map_page(struct btrfs_io_ctl *io_ctl, int clear) io_ctl->page = io_ctl->pages[io_ctl->index++]; io_ctl->cur = page_address(io_ctl->page); io_ctl->orig = io_ctl->cur; - io_ctl->size = PAGE_CACHE_SIZE; + io_ctl->size = PAGE_SIZE; if (clear) - memset(io_ctl->cur, 0, PAGE_CACHE_SIZE); + memset(io_ctl->cur, 0, PAGE_SIZE); } static void io_ctl_drop_pages(struct btrfs_io_ctl *io_ctl) @@ -369,7 +369,7 @@ static void io_ctl_drop_pages(struct btrfs_io_ctl *io_ctl) if (io_ctl->pages[i]) { ClearPageChecked(io_ctl->pages[i]); unlock_page(io_ctl->pages[i]); - page_cache_release(io_ctl->pages[i]); + put_page(io_ctl->pages[i]); } } } @@ -475,7 +475,7 @@ static void io_ctl_set_crc(struct btrfs_io_ctl *io_ctl, int index) offset = sizeof(u32) * io_ctl->num_pages; crc = btrfs_csum_data(io_ctl->orig + offset, crc, - PAGE_CACHE_SIZE - offset); + PAGE_SIZE - offset); btrfs_csum_final(crc, (char *)&crc); io_ctl_unmap_page(io_ctl); tmp = page_address(io_ctl->pages[0]); @@ -503,7 +503,7 @@ static int io_ctl_check_crc(struct btrfs_io_ctl *io_ctl, int index) io_ctl_map_page(io_ctl, 0); crc = btrfs_csum_data(io_ctl->orig + offset, crc, - PAGE_CACHE_SIZE - offset); + PAGE_SIZE - offset); btrfs_csum_final(crc, (char *)&crc); if (val != crc) { btrfs_err_rl(io_ctl->root->fs_info, @@ -561,7 +561,7 @@ static int io_ctl_add_bitmap(struct btrfs_io_ctl *io_ctl, void *bitmap) io_ctl_map_page(io_ctl, 0); } - memcpy(io_ctl->cur, bitmap, PAGE_CACHE_SIZE); + memcpy(io_ctl->cur, bitmap, PAGE_SIZE); io_ctl_set_crc(io_ctl, io_ctl->index - 1); if (io_ctl->index < io_ctl->num_pages) io_ctl_map_page(io_ctl, 0); @@ -621,7 +621,7 @@ static int io_ctl_read_bitmap(struct btrfs_io_ctl *io_ctl, if (ret) return ret; - memcpy(entry->bitmap, io_ctl->cur, PAGE_CACHE_SIZE); + memcpy(entry->bitmap, io_ctl->cur, PAGE_SIZE); io_ctl_unmap_page(io_ctl); return 0; @@ -775,7 +775,7 @@ static int __load_free_space_cache(struct btrfs_root *root, struct inode *inode, } else { ASSERT(num_bitmaps); num_bitmaps--; - e->bitmap = kzalloc(PAGE_CACHE_SIZE, GFP_NOFS); + e->bitmap = kzalloc(PAGE_SIZE, GFP_NOFS); if (!e->bitmap) { kmem_cache_free( btrfs_free_space_cachep, e); @@ -1660,7 +1660,7 @@ static void recalculate_thresholds(struct btrfs_free_space_ctl *ctl) * sure we don't go over our overall goal of MAX_CACHE_BYTES_PER_GIG as * we add more bitmaps. */ - bitmap_bytes = (ctl->total_bitmaps + 1) * PAGE_CACHE_SIZE; + bitmap_bytes = (ctl->total_bitmaps + 1) * PAGE_SIZE; if (bitmap_bytes >= max_bytes) { ctl->extents_thresh = 0; @@ -2111,7 +2111,7 @@ new_bitmap: } /* allocate the bitmap */ - info->bitmap = kzalloc(PAGE_CACHE_SIZE, GFP_NOFS); + info->bitmap = kzalloc(PAGE_SIZE, GFP_NOFS); spin_lock(&ctl->tree_lock); if (!info->bitmap) { ret = -ENOMEM; @@ -3580,7 +3580,7 @@ again: } if (!map) { - map = kzalloc(PAGE_CACHE_SIZE, GFP_NOFS); + map = kzalloc(PAGE_SIZE, GFP_NOFS); if (!map) { kmem_cache_free(btrfs_free_space_cachep, info); return -ENOMEM; diff --git a/fs/btrfs/inode-map.c b/fs/btrfs/inode-map.c index e50316c4a..70107f7c9 100644 --- a/fs/btrfs/inode-map.c +++ b/fs/btrfs/inode-map.c @@ -283,7 +283,7 @@ void btrfs_unpin_free_ino(struct btrfs_root *root) } #define INIT_THRESHOLD ((SZ_32K / 2) / sizeof(struct btrfs_free_space)) -#define INODES_PER_BITMAP (PAGE_CACHE_SIZE * 8) +#define INODES_PER_BITMAP (PAGE_SIZE * 8) /* * The goal is to keep the memory used by the free_ino tree won't @@ -317,7 +317,7 @@ static void recalculate_thresholds(struct btrfs_free_space_ctl *ctl) } ctl->extents_thresh = (max_bitmaps - ctl->total_bitmaps) * - PAGE_CACHE_SIZE / sizeof(*info); + PAGE_SIZE / sizeof(*info); } /* @@ -481,12 +481,12 @@ again: spin_lock(&ctl->tree_lock); prealloc = sizeof(struct btrfs_free_space) * ctl->free_extents; - prealloc = ALIGN(prealloc, PAGE_CACHE_SIZE); - prealloc += ctl->total_bitmaps * PAGE_CACHE_SIZE; + prealloc = ALIGN(prealloc, PAGE_SIZE); + prealloc += ctl->total_bitmaps * PAGE_SIZE; spin_unlock(&ctl->tree_lock); /* Just to make sure we have enough space */ - prealloc += 8 * PAGE_CACHE_SIZE; + prealloc += 8 * PAGE_SIZE; ret = btrfs_delalloc_reserve_space(inode, 0, prealloc); if (ret) @@ -556,6 +556,9 @@ int btrfs_find_free_objectid(struct btrfs_root *root, u64 *objectid) mutex_lock(&root->objectid_mutex); if (unlikely(root->highest_objectid >= BTRFS_LAST_FREE_OBJECTID)) { + btrfs_warn(root->fs_info, + "the objectid of root %llu reaches its highest value", + root->root_key.objectid); ret = -ENOSPC; goto out; } diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c index d96f5cf38..167fc3d49 100644 --- a/fs/btrfs/inode.c +++ b/fs/btrfs/inode.c @@ -194,7 +194,7 @@ static int insert_inline_extent(struct btrfs_trans_handle *trans, while (compressed_size > 0) { cpage = compressed_pages[i]; cur_size = min_t(unsigned long, compressed_size, - PAGE_CACHE_SIZE); + PAGE_SIZE); kaddr = kmap_atomic(cpage); write_extent_buffer(leaf, kaddr, ptr, cur_size); @@ -208,13 +208,13 @@ static int insert_inline_extent(struct btrfs_trans_handle *trans, compress_type); } else { page = find_get_page(inode->i_mapping, - start >> PAGE_CACHE_SHIFT); + start >> PAGE_SHIFT); btrfs_set_file_extent_compression(leaf, ei, 0); kaddr = kmap_atomic(page); - offset = start & (PAGE_CACHE_SIZE - 1); + offset = start & (PAGE_SIZE - 1); write_extent_buffer(leaf, kaddr + offset, ptr, size); kunmap_atomic(kaddr); - page_cache_release(page); + put_page(page); } btrfs_mark_buffer_dirty(leaf); btrfs_release_path(path); @@ -263,7 +263,7 @@ static noinline int cow_file_range_inline(struct btrfs_root *root, data_len = compressed_size; if (start > 0 || - actual_end > PAGE_CACHE_SIZE || + actual_end > root->sectorsize || data_len > BTRFS_MAX_INLINE_DATA_SIZE(root) || (!compressed_size && (actual_end & (root->sectorsize - 1)) == 0) || @@ -322,7 +322,7 @@ out: * And at reserve time, it's always aligned to page size, so * just free one page here. */ - btrfs_qgroup_free_data(inode, 0, PAGE_CACHE_SIZE); + btrfs_qgroup_free_data(inode, 0, PAGE_SIZE); btrfs_free_path(path); btrfs_end_transaction(trans, root); return ret; @@ -435,8 +435,8 @@ static noinline void compress_file_range(struct inode *inode, actual_end = min_t(u64, isize, end + 1); again: will_compress = 0; - nr_pages = (end >> PAGE_CACHE_SHIFT) - (start >> PAGE_CACHE_SHIFT) + 1; - nr_pages = min_t(unsigned long, nr_pages, SZ_128K / PAGE_CACHE_SIZE); + nr_pages = (end >> PAGE_SHIFT) - (start >> PAGE_SHIFT) + 1; + nr_pages = min_t(unsigned long, nr_pages, SZ_128K / PAGE_SIZE); /* * we don't want to send crud past the end of i_size through @@ -514,7 +514,7 @@ again: if (!ret) { unsigned long offset = total_compressed & - (PAGE_CACHE_SIZE - 1); + (PAGE_SIZE - 1); struct page *page = pages[nr_pages_ret - 1]; char *kaddr; @@ -524,7 +524,7 @@ again: if (offset) { kaddr = kmap_atomic(page); memset(kaddr + offset, 0, - PAGE_CACHE_SIZE - offset); + PAGE_SIZE - offset); kunmap_atomic(kaddr); } will_compress = 1; @@ -580,7 +580,7 @@ cont: * one last check to make sure the compression is really a * win, compare the page count read with the blocks on disk */ - total_in = ALIGN(total_in, PAGE_CACHE_SIZE); + total_in = ALIGN(total_in, PAGE_SIZE); if (total_compressed >= total_in) { will_compress = 0; } else { @@ -594,7 +594,7 @@ cont: */ for (i = 0; i < nr_pages_ret; i++) { WARN_ON(pages[i]->mapping); - page_cache_release(pages[i]); + put_page(pages[i]); } kfree(pages); pages = NULL; @@ -650,7 +650,7 @@ cleanup_and_bail_uncompressed: free_pages_out: for (i = 0; i < nr_pages_ret; i++) { WARN_ON(pages[i]->mapping); - page_cache_release(pages[i]); + put_page(pages[i]); } kfree(pages); } @@ -664,7 +664,7 @@ static void free_async_extent_pages(struct async_extent *async_extent) for (i = 0; i < async_extent->nr_pages; i++) { WARN_ON(async_extent->pages[i]->mapping); - page_cache_release(async_extent->pages[i]); + put_page(async_extent->pages[i]); } kfree(async_extent->pages); async_extent->nr_pages = 0; @@ -966,7 +966,7 @@ static noinline int cow_file_range(struct inode *inode, PAGE_END_WRITEBACK); *nr_written = *nr_written + - (end - start + PAGE_CACHE_SIZE) / PAGE_CACHE_SIZE; + (end - start + PAGE_SIZE) / PAGE_SIZE; *page_started = 1; goto out; } else if (ret < 0) { @@ -1106,8 +1106,8 @@ static noinline void async_cow_submit(struct btrfs_work *work) async_cow = container_of(work, struct async_cow, work); root = async_cow->root; - nr_pages = (async_cow->end - async_cow->start + PAGE_CACHE_SIZE) >> - PAGE_CACHE_SHIFT; + nr_pages = (async_cow->end - async_cow->start + PAGE_SIZE) >> + PAGE_SHIFT; /* * atomic_sub_return implies a barrier for waitqueue_active @@ -1164,8 +1164,8 @@ static int cow_file_range_async(struct inode *inode, struct page *locked_page, async_cow_start, async_cow_submit, async_cow_free); - nr_pages = (cur_end - start + PAGE_CACHE_SIZE) >> - PAGE_CACHE_SHIFT; + nr_pages = (cur_end - start + PAGE_SIZE) >> + PAGE_SHIFT; atomic_add(nr_pages, &root->fs_info->async_delalloc_pages); btrfs_queue_work(root->fs_info->delalloc_workers, @@ -1960,7 +1960,7 @@ static noinline int add_pending_csums(struct btrfs_trans_handle *trans, int btrfs_set_extent_delalloc(struct inode *inode, u64 start, u64 end, struct extent_state **cached_state) { - WARN_ON((end & (PAGE_CACHE_SIZE - 1)) == 0); + WARN_ON((end & (PAGE_SIZE - 1)) == 0); return set_extent_delalloc(&BTRFS_I(inode)->io_tree, start, end, cached_state, GFP_NOFS); } @@ -1993,7 +1993,7 @@ again: inode = page->mapping->host; page_start = page_offset(page); - page_end = page_offset(page) + PAGE_CACHE_SIZE - 1; + page_end = page_offset(page) + PAGE_SIZE - 1; lock_extent_bits(&BTRFS_I(inode)->io_tree, page_start, page_end, &cached_state); @@ -2002,7 +2002,8 @@ again: if (PagePrivate2(page)) goto out; - ordered = btrfs_lookup_ordered_extent(inode, page_start); + ordered = btrfs_lookup_ordered_range(inode, page_start, + PAGE_SIZE); if (ordered) { unlock_extent_cached(&BTRFS_I(inode)->io_tree, page_start, page_end, &cached_state, GFP_NOFS); @@ -2013,7 +2014,7 @@ again: } ret = btrfs_delalloc_reserve_space(inode, page_start, - PAGE_CACHE_SIZE); + PAGE_SIZE); if (ret) { mapping_set_error(page->mapping, ret); end_extent_writepage(page, ret, page_start, page_end); @@ -2029,7 +2030,7 @@ out: &cached_state, GFP_NOFS); out_page: unlock_page(page); - page_cache_release(page); + put_page(page); kfree(fixup); } @@ -2062,7 +2063,7 @@ static int btrfs_writepage_start_hook(struct page *page, u64 start, u64 end) return -EAGAIN; SetPageChecked(page); - page_cache_get(page); + get_page(page); btrfs_init_work(&fixup->work, btrfs_fixup_helper, btrfs_writepage_fixup_worker, NULL, NULL); fixup->page = page; @@ -4013,7 +4014,8 @@ err: btrfs_i_size_write(dir, dir->i_size - name_len * 2); inode_inc_iversion(inode); inode_inc_iversion(dir); - inode->i_ctime = dir->i_mtime = dir->i_ctime = CURRENT_TIME; + inode->i_ctime = dir->i_mtime = + dir->i_ctime = current_fs_time(inode->i_sb); ret = btrfs_update_inode(trans, root, dir); out: return ret; @@ -4156,7 +4158,7 @@ int btrfs_unlink_subvol(struct btrfs_trans_handle *trans, btrfs_i_size_write(dir, dir->i_size - name_len * 2); inode_inc_iversion(dir); - dir->i_mtime = dir->i_ctime = CURRENT_TIME; + dir->i_mtime = dir->i_ctime = current_fs_time(dir->i_sb); ret = btrfs_update_inode_fallback(trans, root, dir); if (ret) btrfs_abort_transaction(trans, root, ret); @@ -4211,11 +4213,20 @@ static int truncate_space_check(struct btrfs_trans_handle *trans, { int ret; + /* + * This is only used to apply pressure to the enospc system, we don't + * intend to use this reservation at all. + */ bytes_deleted = btrfs_csum_bytes_to_leaves(root, bytes_deleted); + bytes_deleted *= root->nodesize; ret = btrfs_block_rsv_add(root, &root->fs_info->trans_block_rsv, bytes_deleted, BTRFS_RESERVE_NO_FLUSH); - if (!ret) + if (!ret) { + trace_btrfs_space_reservation(root->fs_info, "transaction", + trans->transid, + bytes_deleted, 1); trans->bytes_reserved += bytes_deleted; + } return ret; } @@ -4236,7 +4247,7 @@ static int truncate_inline_extent(struct inode *inode, if (btrfs_file_extent_compression(leaf, fi) != BTRFS_COMPRESS_NONE) { loff_t offset = new_size; - loff_t page_end = ALIGN(offset, PAGE_CACHE_SIZE); + loff_t page_end = ALIGN(offset, PAGE_SIZE); /* * Zero out the remaining of the last page of our inline extent, @@ -4248,7 +4259,8 @@ static int truncate_inline_extent(struct inode *inode, * read the extent item from disk (data not in the page cache). */ btrfs_release_path(path); - return btrfs_truncate_page(inode, offset, page_end - offset, 0); + return btrfs_truncate_block(inode, offset, page_end - offset, + 0); } btrfs_set_file_extent_ram_bytes(leaf, fi, size); @@ -4601,17 +4613,17 @@ error: } /* - * btrfs_truncate_page - read, zero a chunk and write a page + * btrfs_truncate_block - read, zero a chunk and write a block * @inode - inode that we're zeroing * @from - the offset to start zeroing * @len - the length to zero, 0 to zero the entire range respective to the * offset * @front - zero up to the offset instead of from the offset on * - * This will find the page for the "from" offset and cow the page and zero the + * This will find the block for the "from" offset and cow the block and zero the * part we want to zero. This is used with truncate and hole punching. */ -int btrfs_truncate_page(struct inode *inode, loff_t from, loff_t len, +int btrfs_truncate_block(struct inode *inode, loff_t from, loff_t len, int front) { struct address_space *mapping = inode->i_mapping; @@ -4621,19 +4633,20 @@ int btrfs_truncate_page(struct inode *inode, loff_t from, loff_t len, struct extent_state *cached_state = NULL; char *kaddr; u32 blocksize = root->sectorsize; - pgoff_t index = from >> PAGE_CACHE_SHIFT; - unsigned offset = from & (PAGE_CACHE_SIZE-1); + pgoff_t index = from >> PAGE_SHIFT; + unsigned offset = from & (blocksize - 1); struct page *page; gfp_t mask = btrfs_alloc_write_mask(mapping); int ret = 0; - u64 page_start; - u64 page_end; + u64 block_start; + u64 block_end; if ((offset & (blocksize - 1)) == 0 && (!len || ((len & (blocksize - 1)) == 0))) goto out; + ret = btrfs_delalloc_reserve_space(inode, - round_down(from, PAGE_CACHE_SIZE), PAGE_CACHE_SIZE); + round_down(from, blocksize), blocksize); if (ret) goto out; @@ -4641,21 +4654,21 @@ again: page = find_or_create_page(mapping, index, mask); if (!page) { btrfs_delalloc_release_space(inode, - round_down(from, PAGE_CACHE_SIZE), - PAGE_CACHE_SIZE); + round_down(from, blocksize), + blocksize); ret = -ENOMEM; goto out; } - page_start = page_offset(page); - page_end = page_start + PAGE_CACHE_SIZE - 1; + block_start = round_down(from, blocksize); + block_end = block_start + blocksize - 1; if (!PageUptodate(page)) { ret = btrfs_readpage(NULL, page); lock_page(page); if (page->mapping != mapping) { unlock_page(page); - page_cache_release(page); + put_page(page); goto again; } if (!PageUptodate(page)) { @@ -4665,55 +4678,57 @@ again: } wait_on_page_writeback(page); - lock_extent_bits(io_tree, page_start, page_end, &cached_state); + lock_extent_bits(io_tree, block_start, block_end, &cached_state); set_page_extent_mapped(page); - ordered = btrfs_lookup_ordered_extent(inode, page_start); + ordered = btrfs_lookup_ordered_extent(inode, block_start); if (ordered) { - unlock_extent_cached(io_tree, page_start, page_end, + unlock_extent_cached(io_tree, block_start, block_end, &cached_state, GFP_NOFS); unlock_page(page); - page_cache_release(page); + put_page(page); btrfs_start_ordered_extent(inode, ordered, 1); btrfs_put_ordered_extent(ordered); goto again; } - clear_extent_bit(&BTRFS_I(inode)->io_tree, page_start, page_end, + clear_extent_bit(&BTRFS_I(inode)->io_tree, block_start, block_end, EXTENT_DIRTY | EXTENT_DELALLOC | EXTENT_DO_ACCOUNTING | EXTENT_DEFRAG, 0, 0, &cached_state, GFP_NOFS); - ret = btrfs_set_extent_delalloc(inode, page_start, page_end, + ret = btrfs_set_extent_delalloc(inode, block_start, block_end, &cached_state); if (ret) { - unlock_extent_cached(io_tree, page_start, page_end, + unlock_extent_cached(io_tree, block_start, block_end, &cached_state, GFP_NOFS); goto out_unlock; } - if (offset != PAGE_CACHE_SIZE) { + if (offset != blocksize) { if (!len) - len = PAGE_CACHE_SIZE - offset; + len = blocksize - offset; kaddr = kmap(page); if (front) - memset(kaddr, 0, offset); + memset(kaddr + (block_start - page_offset(page)), + 0, offset); else - memset(kaddr + offset, 0, len); + memset(kaddr + (block_start - page_offset(page)) + offset, + 0, len); flush_dcache_page(page); kunmap(page); } ClearPageChecked(page); set_page_dirty(page); - unlock_extent_cached(io_tree, page_start, page_end, &cached_state, + unlock_extent_cached(io_tree, block_start, block_end, &cached_state, GFP_NOFS); out_unlock: if (ret) - btrfs_delalloc_release_space(inode, page_start, - PAGE_CACHE_SIZE); + btrfs_delalloc_release_space(inode, block_start, + blocksize); unlock_page(page); - page_cache_release(page); + put_page(page); out: return ret; } @@ -4782,11 +4797,11 @@ int btrfs_cont_expand(struct inode *inode, loff_t oldsize, loff_t size) int err = 0; /* - * If our size started in the middle of a page we need to zero out the - * rest of the page before we expand the i_size, otherwise we could + * If our size started in the middle of a block we need to zero out the + * rest of the block before we expand the i_size, otherwise we could * expose stale data. */ - err = btrfs_truncate_page(inode, oldsize, 0, 0); + err = btrfs_truncate_block(inode, oldsize, 0, 0); if (err) return err; @@ -4895,7 +4910,6 @@ static int btrfs_setsize(struct inode *inode, struct iattr *attr) } if (newsize > oldsize) { - truncate_pagecache(inode, newsize); /* * Don't do an expanding truncate while snapshoting is ongoing. * This is to ensure the snapshot captures a fully consistent @@ -4918,6 +4932,7 @@ static int btrfs_setsize(struct inode *inode, struct iattr *attr) i_size_write(inode, newsize); btrfs_ordered_update_i_size(inode, i_size_read(inode), NULL); + pagecache_isize_extended(inode, oldsize, newsize); ret = btrfs_update_inode(trans, root, inode); btrfs_end_write_no_snapshoting(root); btrfs_end_transaction(trans, root); @@ -5588,7 +5603,7 @@ static struct inode *new_simple_dir(struct super_block *s, inode->i_op = &btrfs_dir_ro_inode_operations; inode->i_fop = &simple_dir_operations; inode->i_mode = S_IFDIR | S_IRUGO | S_IWUSR | S_IXUGO; - inode->i_mtime = CURRENT_TIME; + inode->i_mtime = current_fs_time(inode->i_sb); inode->i_atime = inode->i_mtime; inode->i_ctime = inode->i_mtime; BTRFS_I(inode)->i_otime = inode->i_mtime; @@ -5790,7 +5805,7 @@ static int btrfs_real_readdir(struct file *file, struct dir_context *ctx) if (name_len <= sizeof(tmp_name)) { name_ptr = tmp_name; } else { - name_ptr = kmalloc(name_len, GFP_NOFS); + name_ptr = kmalloc(name_len, GFP_KERNEL); if (!name_ptr) { ret = -ENOMEM; goto err; @@ -6172,7 +6187,7 @@ static struct inode *btrfs_new_inode(struct btrfs_trans_handle *trans, inode_init_owner(inode, dir, mode); inode_set_bytes(inode, 0); - inode->i_mtime = CURRENT_TIME; + inode->i_mtime = current_fs_time(inode->i_sb); inode->i_atime = inode->i_mtime; inode->i_ctime = inode->i_mtime; BTRFS_I(inode)->i_otime = inode->i_mtime; @@ -6285,7 +6300,8 @@ int btrfs_add_link(struct btrfs_trans_handle *trans, btrfs_i_size_write(parent_inode, parent_inode->i_size + name_len * 2); inode_inc_iversion(parent_inode); - parent_inode->i_mtime = parent_inode->i_ctime = CURRENT_TIME; + parent_inode->i_mtime = parent_inode->i_ctime = + current_fs_time(parent_inode->i_sb); ret = btrfs_update_inode(trans, root, parent_inode); if (ret) btrfs_abort_transaction(trans, root, ret); @@ -6503,7 +6519,7 @@ static int btrfs_link(struct dentry *old_dentry, struct inode *dir, BTRFS_I(inode)->dir_index = 0ULL; inc_nlink(inode); inode_inc_iversion(inode); - inode->i_ctime = CURRENT_TIME; + inode->i_ctime = current_fs_time(inode->i_sb); ihold(inode); set_bit(BTRFS_INODE_COPY_EVERYTHING, &BTRFS_I(inode)->runtime_flags); @@ -6701,7 +6717,7 @@ static noinline int uncompress_inline(struct btrfs_path *path, read_extent_buffer(leaf, tmp, ptr, inline_size); - max_size = min_t(unsigned long, PAGE_CACHE_SIZE, max_size); + max_size = min_t(unsigned long, PAGE_SIZE, max_size); ret = btrfs_decompress(compress_type, tmp, page, extent_offset, inline_size, max_size); kfree(tmp); @@ -6863,8 +6879,8 @@ next: size = btrfs_file_extent_inline_len(leaf, path->slots[0], item); extent_offset = page_offset(page) + pg_offset - extent_start; - copy_size = min_t(u64, PAGE_CACHE_SIZE - pg_offset, - size - extent_offset); + copy_size = min_t(u64, PAGE_SIZE - pg_offset, + size - extent_offset); em->start = extent_start + extent_offset; em->len = ALIGN(copy_size, root->sectorsize); em->orig_block_len = em->len; @@ -6883,9 +6899,9 @@ next: map = kmap(page); read_extent_buffer(leaf, map + pg_offset, ptr, copy_size); - if (pg_offset + copy_size < PAGE_CACHE_SIZE) { + if (pg_offset + copy_size < PAGE_SIZE) { memset(map + pg_offset + copy_size, 0, - PAGE_CACHE_SIZE - pg_offset - + PAGE_SIZE - pg_offset - copy_size); } kunmap(page); @@ -7320,12 +7336,12 @@ bool btrfs_page_exists_in_range(struct inode *inode, loff_t start, loff_t end) int start_idx; int end_idx; - start_idx = start >> PAGE_CACHE_SHIFT; + start_idx = start >> PAGE_SHIFT; /* * end is the last byte in the last page. end == start is legal */ - end_idx = end >> PAGE_CACHE_SHIFT; + end_idx = end >> PAGE_SHIFT; rcu_read_lock(); @@ -7366,7 +7382,7 @@ bool btrfs_page_exists_in_range(struct inode *inode, loff_t start, loff_t end) * include/linux/pagemap.h for details. */ if (unlikely(page != *pagep)) { - page_cache_release(page); + put_page(page); page = NULL; } } @@ -7374,7 +7390,7 @@ bool btrfs_page_exists_in_range(struct inode *inode, loff_t start, loff_t end) if (page) { if (page->index <= end_idx) found = true; - page_cache_release(page); + put_page(page); } rcu_read_unlock(); @@ -7414,7 +7430,26 @@ static int lock_extent_direct(struct inode *inode, u64 lockstart, u64 lockend, cached_state, GFP_NOFS); if (ordered) { - btrfs_start_ordered_extent(inode, ordered, 1); + /* + * If we are doing a DIO read and the ordered extent we + * found is for a buffered write, we can not wait for it + * to complete and retry, because if we do so we can + * deadlock with concurrent buffered writes on page + * locks. This happens only if our DIO read covers more + * than one extent map, if at this point has already + * created an ordered extent for a previous extent map + * and locked its range in the inode's io tree, and a + * concurrent write against that previous extent map's + * range and this range started (we unlock the ranges + * in the io tree only when the bios complete and + * buffered writes always lock pages before attempting + * to lock range in the io tree). + */ + if (writing || + test_bit(BTRFS_ORDERED_DIRECT, &ordered->flags)) + btrfs_start_ordered_extent(inode, ordered, 1); + else + ret = -ENOTBLK; btrfs_put_ordered_extent(ordered); } else { /* @@ -7431,9 +7466,11 @@ static int lock_extent_direct(struct inode *inode, u64 lockstart, u64 lockend, * that page. */ ret = -ENOTBLK; - break; } + if (ret) + break; + cond_resched(); } @@ -7764,9 +7801,9 @@ static int btrfs_check_dio_repairable(struct inode *inode, } static int dio_read_error(struct inode *inode, struct bio *failed_bio, - struct page *page, u64 start, u64 end, - int failed_mirror, bio_end_io_t *repair_endio, - void *repair_arg) + struct page *page, unsigned int pgoff, + u64 start, u64 end, int failed_mirror, + bio_end_io_t *repair_endio, void *repair_arg) { struct io_failure_record *failrec; struct bio *bio; @@ -7787,7 +7824,9 @@ static int dio_read_error(struct inode *inode, struct bio *failed_bio, return -EIO; } - if (failed_bio->bi_vcnt > 1) + if ((failed_bio->bi_vcnt > 1) + || (failed_bio->bi_io_vec->bv_len + > BTRFS_I(inode)->root->sectorsize)) read_mode = READ_SYNC | REQ_FAILFAST_DEV; else read_mode = READ_SYNC; @@ -7795,7 +7834,7 @@ static int dio_read_error(struct inode *inode, struct bio *failed_bio, isector = start - btrfs_io_bio(failed_bio)->logical; isector >>= inode->i_sb->s_blocksize_bits; bio = btrfs_create_repair_bio(inode, failed_bio, failrec, page, - 0, isector, repair_endio, repair_arg); + pgoff, isector, repair_endio, repair_arg); if (!bio) { free_io_failure(inode, failrec); return -EIO; @@ -7825,12 +7864,17 @@ struct btrfs_retry_complete { static void btrfs_retry_endio_nocsum(struct bio *bio) { struct btrfs_retry_complete *done = bio->bi_private; + struct inode *inode; struct bio_vec *bvec; int i; if (bio->bi_error) goto end; + ASSERT(bio->bi_vcnt == 1); + inode = bio->bi_io_vec->bv_page->mapping->host; + ASSERT(bio->bi_io_vec->bv_len == BTRFS_I(inode)->root->sectorsize); + done->uptodate = 1; bio_for_each_segment_all(bvec, bio, i) clean_io_failure(done->inode, done->start, bvec->bv_page, 0); @@ -7842,25 +7886,35 @@ end: static int __btrfs_correct_data_nocsum(struct inode *inode, struct btrfs_io_bio *io_bio) { + struct btrfs_fs_info *fs_info; struct bio_vec *bvec; struct btrfs_retry_complete done; u64 start; + unsigned int pgoff; + u32 sectorsize; + int nr_sectors; int i; int ret; + fs_info = BTRFS_I(inode)->root->fs_info; + sectorsize = BTRFS_I(inode)->root->sectorsize; + start = io_bio->logical; done.inode = inode; bio_for_each_segment_all(bvec, &io_bio->bio, i) { -try_again: + nr_sectors = BTRFS_BYTES_TO_BLKS(fs_info, bvec->bv_len); + pgoff = bvec->bv_offset; + +next_block_or_try_again: done.uptodate = 0; done.start = start; init_completion(&done.done); - ret = dio_read_error(inode, &io_bio->bio, bvec->bv_page, start, - start + bvec->bv_len - 1, - io_bio->mirror_num, - btrfs_retry_endio_nocsum, &done); + ret = dio_read_error(inode, &io_bio->bio, bvec->bv_page, + pgoff, start, start + sectorsize - 1, + io_bio->mirror_num, + btrfs_retry_endio_nocsum, &done); if (ret) return ret; @@ -7868,10 +7922,15 @@ try_again: if (!done.uptodate) { /* We might have another mirror, so try again */ - goto try_again; + goto next_block_or_try_again; } - start += bvec->bv_len; + start += sectorsize; + + if (nr_sectors--) { + pgoff += sectorsize; + goto next_block_or_try_again; + } } return 0; @@ -7881,7 +7940,9 @@ static void btrfs_retry_endio(struct bio *bio) { struct btrfs_retry_complete *done = bio->bi_private; struct btrfs_io_bio *io_bio = btrfs_io_bio(bio); + struct inode *inode; struct bio_vec *bvec; + u64 start; int uptodate; int ret; int i; @@ -7890,13 +7951,20 @@ static void btrfs_retry_endio(struct bio *bio) goto end; uptodate = 1; + + start = done->start; + + ASSERT(bio->bi_vcnt == 1); + inode = bio->bi_io_vec->bv_page->mapping->host; + ASSERT(bio->bi_io_vec->bv_len == BTRFS_I(inode)->root->sectorsize); + bio_for_each_segment_all(bvec, bio, i) { ret = __readpage_endio_check(done->inode, io_bio, i, - bvec->bv_page, 0, - done->start, bvec->bv_len); + bvec->bv_page, bvec->bv_offset, + done->start, bvec->bv_len); if (!ret) clean_io_failure(done->inode, done->start, - bvec->bv_page, 0); + bvec->bv_page, bvec->bv_offset); else uptodate = 0; } @@ -7910,20 +7978,34 @@ end: static int __btrfs_subio_endio_read(struct inode *inode, struct btrfs_io_bio *io_bio, int err) { + struct btrfs_fs_info *fs_info; struct bio_vec *bvec; struct btrfs_retry_complete done; u64 start; u64 offset = 0; + u32 sectorsize; + int nr_sectors; + unsigned int pgoff; + int csum_pos; int i; int ret; + fs_info = BTRFS_I(inode)->root->fs_info; + sectorsize = BTRFS_I(inode)->root->sectorsize; + err = 0; start = io_bio->logical; done.inode = inode; bio_for_each_segment_all(bvec, &io_bio->bio, i) { - ret = __readpage_endio_check(inode, io_bio, i, bvec->bv_page, - 0, start, bvec->bv_len); + nr_sectors = BTRFS_BYTES_TO_BLKS(fs_info, bvec->bv_len); + + pgoff = bvec->bv_offset; +next_block: + csum_pos = BTRFS_BYTES_TO_BLKS(fs_info, offset); + ret = __readpage_endio_check(inode, io_bio, csum_pos, + bvec->bv_page, pgoff, start, + sectorsize); if (likely(!ret)) goto next; try_again: @@ -7931,10 +8013,10 @@ try_again: done.start = start; init_completion(&done.done); - ret = dio_read_error(inode, &io_bio->bio, bvec->bv_page, start, - start + bvec->bv_len - 1, - io_bio->mirror_num, - btrfs_retry_endio, &done); + ret = dio_read_error(inode, &io_bio->bio, bvec->bv_page, + pgoff, start, start + sectorsize - 1, + io_bio->mirror_num, + btrfs_retry_endio, &done); if (ret) { err = ret; goto next; @@ -7947,8 +8029,15 @@ try_again: goto try_again; } next: - offset += bvec->bv_len; - start += bvec->bv_len; + offset += sectorsize; + start += sectorsize; + + ASSERT(nr_sectors); + + if (--nr_sectors) { + pgoff += sectorsize; + goto next_block; + } } return err; @@ -8202,9 +8291,11 @@ static int btrfs_submit_direct_hook(int rw, struct btrfs_dio_private *dip, u64 file_offset = dip->logical_offset; u64 submit_len = 0; u64 map_length; - int nr_pages = 0; - int ret; + u32 blocksize = root->sectorsize; int async_submit = 0; + int nr_sectors; + int ret; + int i; map_length = orig_bio->bi_iter.bi_size; ret = btrfs_map_block(root->fs_info, rw, start_sector << 9, @@ -8234,9 +8325,12 @@ static int btrfs_submit_direct_hook(int rw, struct btrfs_dio_private *dip, atomic_inc(&dip->pending_bios); while (bvec <= (orig_bio->bi_io_vec + orig_bio->bi_vcnt - 1)) { - if (map_length < submit_len + bvec->bv_len || - bio_add_page(bio, bvec->bv_page, bvec->bv_len, - bvec->bv_offset) < bvec->bv_len) { + nr_sectors = BTRFS_BYTES_TO_BLKS(root->fs_info, bvec->bv_len); + i = 0; +next_block: + if (unlikely(map_length < submit_len + blocksize || + bio_add_page(bio, bvec->bv_page, blocksize, + bvec->bv_offset + (i * blocksize)) < blocksize)) { /* * inc the count before we submit the bio so * we know the end IO handler won't happen before @@ -8257,7 +8351,6 @@ static int btrfs_submit_direct_hook(int rw, struct btrfs_dio_private *dip, file_offset += submit_len; submit_len = 0; - nr_pages = 0; bio = btrfs_dio_bio_alloc(orig_bio->bi_bdev, start_sector, GFP_NOFS); @@ -8275,9 +8368,14 @@ static int btrfs_submit_direct_hook(int rw, struct btrfs_dio_private *dip, bio_put(bio); goto out_err; } + + goto next_block; } else { - submit_len += bvec->bv_len; - nr_pages++; + submit_len += blocksize; + if (--nr_sectors) { + i++; + goto next_block; + } bvec++; } } @@ -8621,7 +8719,7 @@ static int __btrfs_releasepage(struct page *page, gfp_t gfp_flags) if (ret == 1) { ClearPagePrivate(page); set_page_private(page, 0); - page_cache_release(page); + put_page(page); } return ret; } @@ -8641,7 +8739,9 @@ static void btrfs_invalidatepage(struct page *page, unsigned int offset, struct btrfs_ordered_extent *ordered; struct extent_state *cached_state = NULL; u64 page_start = page_offset(page); - u64 page_end = page_start + PAGE_CACHE_SIZE - 1; + u64 page_end = page_start + PAGE_SIZE - 1; + u64 start; + u64 end; int inode_evicting = inode->i_state & I_FREEING; /* @@ -8661,14 +8761,18 @@ static void btrfs_invalidatepage(struct page *page, unsigned int offset, if (!inode_evicting) lock_extent_bits(tree, page_start, page_end, &cached_state); - ordered = btrfs_lookup_ordered_extent(inode, page_start); +again: + start = page_start; + ordered = btrfs_lookup_ordered_range(inode, start, + page_end - start + 1); if (ordered) { + end = min(page_end, ordered->file_offset + ordered->len - 1); /* * IO on this page will never be started, so we need * to account for any ordered extents now */ if (!inode_evicting) - clear_extent_bit(tree, page_start, page_end, + clear_extent_bit(tree, start, end, EXTENT_DIRTY | EXTENT_DELALLOC | EXTENT_LOCKED | EXTENT_DO_ACCOUNTING | EXTENT_DEFRAG, 1, 0, &cached_state, @@ -8685,22 +8789,26 @@ static void btrfs_invalidatepage(struct page *page, unsigned int offset, spin_lock_irq(&tree->lock); set_bit(BTRFS_ORDERED_TRUNCATED, &ordered->flags); - new_len = page_start - ordered->file_offset; + new_len = start - ordered->file_offset; if (new_len < ordered->truncated_len) ordered->truncated_len = new_len; spin_unlock_irq(&tree->lock); if (btrfs_dec_test_ordered_pending(inode, &ordered, - page_start, - PAGE_CACHE_SIZE, 1)) + start, + end - start + 1, 1)) btrfs_finish_ordered_io(ordered); } btrfs_put_ordered_extent(ordered); if (!inode_evicting) { cached_state = NULL; - lock_extent_bits(tree, page_start, page_end, + lock_extent_bits(tree, start, end, &cached_state); } + + start = end + 1; + if (start < page_end) + goto again; } /* @@ -8714,7 +8822,7 @@ static void btrfs_invalidatepage(struct page *page, unsigned int offset, * 2) Not written to disk * This means the reserved space should be freed here. */ - btrfs_qgroup_free_data(inode, page_start, PAGE_CACHE_SIZE); + btrfs_qgroup_free_data(inode, page_start, PAGE_SIZE); if (!inode_evicting) { clear_extent_bit(tree, page_start, page_end, EXTENT_LOCKED | EXTENT_DIRTY | @@ -8729,7 +8837,7 @@ static void btrfs_invalidatepage(struct page *page, unsigned int offset, if (PagePrivate(page)) { ClearPagePrivate(page); set_page_private(page, 0); - page_cache_release(page); + put_page(page); } } @@ -8761,15 +8869,28 @@ int btrfs_page_mkwrite(struct vm_area_struct *vma, struct vm_fault *vmf) loff_t size; int ret; int reserved = 0; + u64 reserved_space; u64 page_start; u64 page_end; + u64 end; + + reserved_space = PAGE_SIZE; sb_start_pagefault(inode->i_sb); page_start = page_offset(page); - page_end = page_start + PAGE_CACHE_SIZE - 1; + page_end = page_start + PAGE_SIZE - 1; + end = page_end; + /* + * Reserving delalloc space after obtaining the page lock can lead to + * deadlock. For example, if a dirty page is locked by this function + * and the call to btrfs_delalloc_reserve_space() ends up triggering + * dirty page write out, then the btrfs_writepage() function could + * end up waiting indefinitely to get a lock on the page currently + * being processed by btrfs_page_mkwrite() function. + */ ret = btrfs_delalloc_reserve_space(inode, page_start, - PAGE_CACHE_SIZE); + reserved_space); if (!ret) { ret = file_update_time(vma->vm_file); reserved = 1; @@ -8803,7 +8924,7 @@ again: * we can't set the delalloc bits if there are pending ordered * extents. Drop our locks and wait for them to finish */ - ordered = btrfs_lookup_ordered_extent(inode, page_start); + ordered = btrfs_lookup_ordered_range(inode, page_start, page_end); if (ordered) { unlock_extent_cached(io_tree, page_start, page_end, &cached_state, GFP_NOFS); @@ -8813,6 +8934,18 @@ again: goto again; } + if (page->index == ((size - 1) >> PAGE_SHIFT)) { + reserved_space = round_up(size - page_start, root->sectorsize); + if (reserved_space < PAGE_SIZE) { + end = page_start + reserved_space - 1; + spin_lock(&BTRFS_I(inode)->lock); + BTRFS_I(inode)->outstanding_extents++; + spin_unlock(&BTRFS_I(inode)->lock); + btrfs_delalloc_release_space(inode, page_start, + PAGE_SIZE - reserved_space); + } + } + /* * XXX - page_mkwrite gets called every time the page is dirtied, even * if it was already dirty, so for space accounting reasons we need to @@ -8820,12 +8953,12 @@ again: * is probably a better way to do this, but for now keep consistent with * prepare_pages in the normal write path. */ - clear_extent_bit(&BTRFS_I(inode)->io_tree, page_start, page_end, + clear_extent_bit(&BTRFS_I(inode)->io_tree, page_start, end, EXTENT_DIRTY | EXTENT_DELALLOC | EXTENT_DO_ACCOUNTING | EXTENT_DEFRAG, 0, 0, &cached_state, GFP_NOFS); - ret = btrfs_set_extent_delalloc(inode, page_start, page_end, + ret = btrfs_set_extent_delalloc(inode, page_start, end, &cached_state); if (ret) { unlock_extent_cached(io_tree, page_start, page_end, @@ -8836,14 +8969,14 @@ again: ret = 0; /* page is wholly or partially inside EOF */ - if (page_start + PAGE_CACHE_SIZE > size) - zero_start = size & ~PAGE_CACHE_MASK; + if (page_start + PAGE_SIZE > size) + zero_start = size & ~PAGE_MASK; else - zero_start = PAGE_CACHE_SIZE; + zero_start = PAGE_SIZE; - if (zero_start != PAGE_CACHE_SIZE) { + if (zero_start != PAGE_SIZE) { kaddr = kmap(page); - memset(kaddr + zero_start, 0, PAGE_CACHE_SIZE - zero_start); + memset(kaddr + zero_start, 0, PAGE_SIZE - zero_start); flush_dcache_page(page); kunmap(page); } @@ -8864,7 +8997,7 @@ out_unlock: } unlock_page(page); out: - btrfs_delalloc_release_space(inode, page_start, PAGE_CACHE_SIZE); + btrfs_delalloc_release_space(inode, page_start, reserved_space); out_noreserve: sb_end_pagefault(inode->i_sb); return ret; @@ -9190,16 +9323,11 @@ void btrfs_destroy_cachep(void) * destroy cache. */ rcu_barrier(); - if (btrfs_inode_cachep) - kmem_cache_destroy(btrfs_inode_cachep); - if (btrfs_trans_handle_cachep) - kmem_cache_destroy(btrfs_trans_handle_cachep); - if (btrfs_transaction_cachep) - kmem_cache_destroy(btrfs_transaction_cachep); - if (btrfs_path_cachep) - kmem_cache_destroy(btrfs_path_cachep); - if (btrfs_free_space_cachep) - kmem_cache_destroy(btrfs_free_space_cachep); + kmem_cache_destroy(btrfs_inode_cachep); + kmem_cache_destroy(btrfs_trans_handle_cachep); + kmem_cache_destroy(btrfs_transaction_cachep); + kmem_cache_destroy(btrfs_path_cachep); + kmem_cache_destroy(btrfs_free_space_cachep); } int btrfs_init_cachep(void) @@ -9250,7 +9378,6 @@ static int btrfs_getattr(struct vfsmount *mnt, generic_fillattr(inode, stat); stat->dev = BTRFS_I(inode)->root->anon_dev; - stat->blksize = PAGE_CACHE_SIZE; spin_lock(&BTRFS_I(inode)->lock); delalloc_bytes = BTRFS_I(inode)->delalloc_bytes; @@ -9268,7 +9395,6 @@ static int btrfs_rename(struct inode *old_dir, struct dentry *old_dentry, struct btrfs_root *dest = BTRFS_I(new_dir)->root; struct inode *new_inode = d_inode(new_dentry); struct inode *old_inode = d_inode(old_dentry); - struct timespec ctime = CURRENT_TIME; u64 index = 0; u64 root_objectid; int ret; @@ -9365,9 +9491,9 @@ static int btrfs_rename(struct inode *old_dir, struct dentry *old_dentry, inode_inc_iversion(old_dir); inode_inc_iversion(new_dir); inode_inc_iversion(old_inode); - old_dir->i_ctime = old_dir->i_mtime = ctime; - new_dir->i_ctime = new_dir->i_mtime = ctime; - old_inode->i_ctime = ctime; + old_dir->i_ctime = old_dir->i_mtime = + new_dir->i_ctime = new_dir->i_mtime = + old_inode->i_ctime = current_fs_time(old_dir->i_sb); if (old_dentry->d_parent != new_dentry->d_parent) btrfs_record_unlink_dir(trans, old_dir, old_inode, 1); @@ -9392,7 +9518,7 @@ static int btrfs_rename(struct inode *old_dir, struct dentry *old_dentry, if (new_inode) { inode_inc_iversion(new_inode); - new_inode->i_ctime = CURRENT_TIME; + new_inode->i_ctime = current_fs_time(new_inode->i_sb); if (unlikely(btrfs_ino(new_inode) == BTRFS_EMPTY_SUBVOL_DIR_OBJECTID)) { root_objectid = BTRFS_I(new_inode)->location.objectid; @@ -9870,7 +9996,7 @@ next: *alloc_hint = ins.objectid + ins.offset; inode_inc_iversion(inode); - inode->i_ctime = CURRENT_TIME; + inode->i_ctime = current_fs_time(inode->i_sb); BTRFS_I(inode)->flags |= BTRFS_INODE_PREALLOC; if (!(mode & FALLOC_FL_KEEP_SIZE) && (actual_len > inode->i_size) && @@ -10058,7 +10184,7 @@ static const struct file_operations btrfs_dir_file_operations = { .iterate = btrfs_real_readdir, .unlocked_ioctl = btrfs_ioctl, #ifdef CONFIG_COMPAT - .compat_ioctl = btrfs_ioctl, + .compat_ioctl = btrfs_compat_ioctl, #endif .release = btrfs_release_file, .fsync = btrfs_sync_file, diff --git a/fs/btrfs/ioctl.c b/fs/btrfs/ioctl.c index 48aee9846..f545f81f6 100644 --- a/fs/btrfs/ioctl.c +++ b/fs/btrfs/ioctl.c @@ -59,6 +59,8 @@ #include "props.h" #include "sysfs.h" #include "qgroup.h" +#include "tree-log.h" +#include "compression.h" #ifdef CONFIG_64BIT /* If we have a 32-bit userspace and 64-bit kernel, then the UAPI @@ -347,7 +349,7 @@ static int btrfs_ioctl_setflags(struct file *file, void __user *arg) btrfs_update_iflags(inode); inode_inc_iversion(inode); - inode->i_ctime = CURRENT_TIME; + inode->i_ctime = current_fs_time(inode->i_sb); ret = btrfs_update_inode(trans, root, inode); btrfs_end_transaction(trans, root); @@ -443,7 +445,7 @@ static noinline int create_subvol(struct inode *dir, struct btrfs_root *root = BTRFS_I(dir)->root; struct btrfs_root *new_root; struct btrfs_block_rsv block_rsv; - struct timespec cur_time = CURRENT_TIME; + struct timespec cur_time = current_fs_time(dir->i_sb); struct inode *inode; int ret; int err; @@ -844,10 +846,6 @@ static noinline int btrfs_mksubvol(struct path *parent, if (IS_ERR(dentry)) goto out_unlock; - error = -EEXIST; - if (d_really_is_positive(dentry)) - goto out_dput; - error = btrfs_may_create(dir, dentry); if (error) goto out_dput; @@ -900,7 +898,7 @@ static int check_defrag_in_cache(struct inode *inode, u64 offset, u32 thresh) u64 end; read_lock(&em_tree->lock); - em = lookup_extent_mapping(em_tree, offset, PAGE_CACHE_SIZE); + em = lookup_extent_mapping(em_tree, offset, PAGE_SIZE); read_unlock(&em_tree->lock); if (em) { @@ -990,7 +988,7 @@ static struct extent_map *defrag_lookup_extent(struct inode *inode, u64 start) struct extent_map_tree *em_tree = &BTRFS_I(inode)->extent_tree; struct extent_io_tree *io_tree = &BTRFS_I(inode)->io_tree; struct extent_map *em; - u64 len = PAGE_CACHE_SIZE; + u64 len = PAGE_SIZE; /* * hopefully we have this extent in the tree already, try without @@ -1126,15 +1124,15 @@ static int cluster_pages_for_defrag(struct inode *inode, struct extent_io_tree *tree; gfp_t mask = btrfs_alloc_write_mask(inode->i_mapping); - file_end = (isize - 1) >> PAGE_CACHE_SHIFT; + file_end = (isize - 1) >> PAGE_SHIFT; if (!isize || start_index > file_end) return 0; page_cnt = min_t(u64, (u64)num_pages, (u64)file_end - start_index + 1); ret = btrfs_delalloc_reserve_space(inode, - start_index << PAGE_CACHE_SHIFT, - page_cnt << PAGE_CACHE_SHIFT); + start_index << PAGE_SHIFT, + page_cnt << PAGE_SHIFT); if (ret) return ret; i_done = 0; @@ -1150,7 +1148,7 @@ again: break; page_start = page_offset(page); - page_end = page_start + PAGE_CACHE_SIZE - 1; + page_end = page_start + PAGE_SIZE - 1; while (1) { lock_extent_bits(tree, page_start, page_end, &cached_state); @@ -1171,7 +1169,7 @@ again: */ if (page->mapping != inode->i_mapping) { unlock_page(page); - page_cache_release(page); + put_page(page); goto again; } } @@ -1181,7 +1179,7 @@ again: lock_page(page); if (!PageUptodate(page)) { unlock_page(page); - page_cache_release(page); + put_page(page); ret = -EIO; break; } @@ -1189,7 +1187,7 @@ again: if (page->mapping != inode->i_mapping) { unlock_page(page); - page_cache_release(page); + put_page(page); goto again; } @@ -1210,7 +1208,7 @@ again: wait_on_page_writeback(pages[i]); page_start = page_offset(pages[0]); - page_end = page_offset(pages[i_done - 1]) + PAGE_CACHE_SIZE; + page_end = page_offset(pages[i_done - 1]) + PAGE_SIZE; lock_extent_bits(&BTRFS_I(inode)->io_tree, page_start, page_end - 1, &cached_state); @@ -1224,8 +1222,8 @@ again: BTRFS_I(inode)->outstanding_extents++; spin_unlock(&BTRFS_I(inode)->lock); btrfs_delalloc_release_space(inode, - start_index << PAGE_CACHE_SHIFT, - (page_cnt - i_done) << PAGE_CACHE_SHIFT); + start_index << PAGE_SHIFT, + (page_cnt - i_done) << PAGE_SHIFT); } @@ -1242,17 +1240,17 @@ again: set_page_extent_mapped(pages[i]); set_page_dirty(pages[i]); unlock_page(pages[i]); - page_cache_release(pages[i]); + put_page(pages[i]); } return i_done; out: for (i = 0; i < i_done; i++) { unlock_page(pages[i]); - page_cache_release(pages[i]); + put_page(pages[i]); } btrfs_delalloc_release_space(inode, - start_index << PAGE_CACHE_SHIFT, - page_cnt << PAGE_CACHE_SHIFT); + start_index << PAGE_SHIFT, + page_cnt << PAGE_SHIFT); return ret; } @@ -1275,7 +1273,7 @@ int btrfs_defrag_file(struct inode *inode, struct file *file, int defrag_count = 0; int compress_type = BTRFS_COMPRESS_ZLIB; u32 extent_thresh = range->extent_thresh; - unsigned long max_cluster = SZ_256K >> PAGE_CACHE_SHIFT; + unsigned long max_cluster = SZ_256K >> PAGE_SHIFT; unsigned long cluster = max_cluster; u64 new_align = ~((u64)SZ_128K - 1); struct page **pages = NULL; @@ -1319,9 +1317,9 @@ int btrfs_defrag_file(struct inode *inode, struct file *file, /* find the last page to defrag */ if (range->start + range->len > range->start) { last_index = min_t(u64, isize - 1, - range->start + range->len - 1) >> PAGE_CACHE_SHIFT; + range->start + range->len - 1) >> PAGE_SHIFT; } else { - last_index = (isize - 1) >> PAGE_CACHE_SHIFT; + last_index = (isize - 1) >> PAGE_SHIFT; } if (newer_than) { @@ -1333,11 +1331,11 @@ int btrfs_defrag_file(struct inode *inode, struct file *file, * we always align our defrag to help keep * the extents in the file evenly spaced */ - i = (newer_off & new_align) >> PAGE_CACHE_SHIFT; + i = (newer_off & new_align) >> PAGE_SHIFT; } else goto out_ra; } else { - i = range->start >> PAGE_CACHE_SHIFT; + i = range->start >> PAGE_SHIFT; } if (!max_to_defrag) max_to_defrag = last_index - i + 1; @@ -1350,7 +1348,7 @@ int btrfs_defrag_file(struct inode *inode, struct file *file, inode->i_mapping->writeback_index = i; while (i <= last_index && defrag_count < max_to_defrag && - (i < DIV_ROUND_UP(i_size_read(inode), PAGE_CACHE_SIZE))) { + (i < DIV_ROUND_UP(i_size_read(inode), PAGE_SIZE))) { /* * make sure we stop running if someone unmounts * the FS @@ -1364,7 +1362,7 @@ int btrfs_defrag_file(struct inode *inode, struct file *file, break; } - if (!should_defrag_range(inode, (u64)i << PAGE_CACHE_SHIFT, + if (!should_defrag_range(inode, (u64)i << PAGE_SHIFT, extent_thresh, &last_len, &skip, &defrag_end, range->flags & BTRFS_DEFRAG_RANGE_COMPRESS)) { @@ -1373,14 +1371,14 @@ int btrfs_defrag_file(struct inode *inode, struct file *file, * the should_defrag function tells us how much to skip * bump our counter by the suggested amount */ - next = DIV_ROUND_UP(skip, PAGE_CACHE_SIZE); + next = DIV_ROUND_UP(skip, PAGE_SIZE); i = max(i + 1, next); continue; } if (!newer_than) { - cluster = (PAGE_CACHE_ALIGN(defrag_end) >> - PAGE_CACHE_SHIFT) - i; + cluster = (PAGE_ALIGN(defrag_end) >> + PAGE_SHIFT) - i; cluster = min(cluster, max_cluster); } else { cluster = max_cluster; @@ -1414,20 +1412,20 @@ int btrfs_defrag_file(struct inode *inode, struct file *file, i += ret; newer_off = max(newer_off + 1, - (u64)i << PAGE_CACHE_SHIFT); + (u64)i << PAGE_SHIFT); ret = find_new_extents(root, inode, newer_than, &newer_off, SZ_64K); if (!ret) { range->start = newer_off; - i = (newer_off & new_align) >> PAGE_CACHE_SHIFT; + i = (newer_off & new_align) >> PAGE_SHIFT; } else { break; } } else { if (ret > 0) { i += ret; - last_len += ret << PAGE_CACHE_SHIFT; + last_len += ret << PAGE_SHIFT; } else { i++; last_len = 0; @@ -1656,7 +1654,7 @@ static noinline int btrfs_ioctl_snap_create_transid(struct file *file, src_inode = file_inode(src.file); if (src_inode->i_sb != file_inode(file)->i_sb) { - btrfs_info(BTRFS_I(src_inode)->root->fs_info, + btrfs_info(BTRFS_I(file_inode(file))->root->fs_info, "Snapshot src from another FS"); ret = -EXDEV; } else if (!inode_owner_or_capable(src_inode)) { @@ -1724,7 +1722,7 @@ static noinline int btrfs_ioctl_snap_create_v2(struct file *file, if (vol_args->flags & BTRFS_SUBVOL_RDONLY) readonly = true; if (vol_args->flags & BTRFS_SUBVOL_QGROUP_INHERIT) { - if (vol_args->size > PAGE_CACHE_SIZE) { + if (vol_args->size > PAGE_SIZE) { ret = -EINVAL; goto free_args; } @@ -2097,8 +2095,6 @@ static noinline int search_ioctl(struct inode *inode, key.offset = (u64)-1; root = btrfs_read_fs_root_no_name(info, &key); if (IS_ERR(root)) { - btrfs_err(info, "could not find root %llu", - sk->tree_id); btrfs_free_path(path); return -ENOENT; } @@ -2476,6 +2472,8 @@ static noinline int btrfs_ioctl_snap_destroy(struct file *file, trans->block_rsv = &block_rsv; trans->bytes_reserved = block_rsv.size; + btrfs_record_snapshot_destroy(trans, dir); + ret = btrfs_unlink_subvol(trans, root, dir, dest->root_key.objectid, dentry->d_name.name, @@ -2808,12 +2806,12 @@ static struct page *extent_same_get_page(struct inode *inode, pgoff_t index) lock_page(page); if (!PageUptodate(page)) { unlock_page(page); - page_cache_release(page); + put_page(page); return ERR_PTR(-EIO); } if (page->mapping != inode->i_mapping) { unlock_page(page); - page_cache_release(page); + put_page(page); return ERR_PTR(-EAGAIN); } } @@ -2825,7 +2823,7 @@ static int gather_extent_pages(struct inode *inode, struct page **pages, int num_pages, u64 off) { int i; - pgoff_t index = off >> PAGE_CACHE_SHIFT; + pgoff_t index = off >> PAGE_SHIFT; for (i = 0; i < num_pages; i++) { again: @@ -2934,12 +2932,12 @@ static void btrfs_cmp_data_free(struct cmp_pages *cmp) pg = cmp->src_pages[i]; if (pg) { unlock_page(pg); - page_cache_release(pg); + put_page(pg); } pg = cmp->dst_pages[i]; if (pg) { unlock_page(pg); - page_cache_release(pg); + put_page(pg); } } kfree(cmp->src_pages); @@ -2951,7 +2949,7 @@ static int btrfs_cmp_data_prepare(struct inode *src, u64 loff, u64 len, struct cmp_pages *cmp) { int ret; - int num_pages = PAGE_CACHE_ALIGN(len) >> PAGE_CACHE_SHIFT; + int num_pages = PAGE_ALIGN(len) >> PAGE_SHIFT; struct page **src_pgarr, **dst_pgarr; /* @@ -2960,8 +2958,8 @@ static int btrfs_cmp_data_prepare(struct inode *src, u64 loff, * of the array is bounded by len, which is in turn bounded by * BTRFS_MAX_DEDUPE_LEN. */ - src_pgarr = kzalloc(num_pages * sizeof(struct page *), GFP_NOFS); - dst_pgarr = kzalloc(num_pages * sizeof(struct page *), GFP_NOFS); + src_pgarr = kcalloc(num_pages, sizeof(struct page *), GFP_KERNEL); + dst_pgarr = kcalloc(num_pages, sizeof(struct page *), GFP_KERNEL); if (!src_pgarr || !dst_pgarr) { kfree(src_pgarr); kfree(dst_pgarr); @@ -2989,12 +2987,12 @@ static int btrfs_cmp_data(struct inode *src, u64 loff, struct inode *dst, int ret = 0; int i; struct page *src_page, *dst_page; - unsigned int cmp_len = PAGE_CACHE_SIZE; + unsigned int cmp_len = PAGE_SIZE; void *addr, *dst_addr; i = 0; while (len) { - if (len < PAGE_CACHE_SIZE) + if (len < PAGE_SIZE) cmp_len = len; BUG_ON(i >= cmp->num_pages); @@ -3068,6 +3066,9 @@ static int btrfs_extent_same(struct inode *src, u64 loff, u64 olen, ret = extent_same_check_offsets(src, loff, &len, olen); if (ret) goto out_unlock; + ret = extent_same_check_offsets(src, dst_loff, &len, olen); + if (ret) + goto out_unlock; /* * Single inode case wants the same checks, except we @@ -3190,7 +3191,7 @@ ssize_t btrfs_dedupe_file_range(struct file *src_file, u64 loff, u64 olen, if (olen > BTRFS_MAX_DEDUPE_LEN) olen = BTRFS_MAX_DEDUPE_LEN; - if (WARN_ON_ONCE(bs < PAGE_CACHE_SIZE)) { + if (WARN_ON_ONCE(bs < PAGE_SIZE)) { /* * Btrfs does not support blocksize < page_size. As a * result, btrfs_cmp_data() won't correctly handle @@ -3217,7 +3218,7 @@ static int clone_finish_inode_update(struct btrfs_trans_handle *trans, inode_inc_iversion(inode); if (!no_time_update) - inode->i_mtime = inode->i_ctime = CURRENT_TIME; + inode->i_mtime = inode->i_ctime = current_fs_time(inode->i_sb); /* * We round up to the block size at eof when determining which * extents to clone above, but shouldn't round up the file size. @@ -3889,8 +3890,9 @@ static noinline int btrfs_clone_files(struct file *file, struct file *file_src, * Truncate page cache pages so that future reads will see the cloned * data immediately and not the previous data. */ - truncate_inode_pages_range(&inode->i_data, destoff, - PAGE_CACHE_ALIGN(destoff + len) - 1); + truncate_inode_pages_range(&inode->i_data, + round_down(destoff, PAGE_SIZE), + round_up(destoff + len, PAGE_SIZE) - 1); out_unlock: if (!same_inode) btrfs_double_inode_unlock(src, inode); @@ -4122,7 +4124,7 @@ static long btrfs_ioctl_space_info(struct btrfs_root *root, void __user *arg) /* we generally have at most 6 or so space infos, one for each raid * level. So, a whole page should be more than enough for everyone */ - if (alloc_size > PAGE_CACHE_SIZE) + if (alloc_size > PAGE_SIZE) return -ENOMEM; space_args.total_spaces = 0; @@ -5031,7 +5033,7 @@ static long _btrfs_ioctl_set_received_subvol(struct file *file, struct btrfs_root *root = BTRFS_I(inode)->root; struct btrfs_root_item *root_item = &root->root_item; struct btrfs_trans_handle *trans; - struct timespec ct = CURRENT_TIME; + struct timespec ct = current_fs_time(inode->i_sb); int ret = 0; int received_uuid_changed; @@ -5262,8 +5264,7 @@ out_unlock: .compat_ro_flags = BTRFS_FEATURE_COMPAT_RO_##suffix, \ .incompat_flags = BTRFS_FEATURE_INCOMPAT_##suffix } -static int btrfs_ioctl_get_supported_features(struct file *file, - void __user *arg) +int btrfs_ioctl_get_supported_features(void __user *arg) { static const struct btrfs_ioctl_feature_flags features[3] = { INIT_FEATURE_FLAGS(SUPP), @@ -5542,7 +5543,7 @@ long btrfs_ioctl(struct file *file, unsigned int case BTRFS_IOC_SET_FSLABEL: return btrfs_ioctl_set_fslabel(file, argp); case BTRFS_IOC_GET_SUPPORTED_FEATURES: - return btrfs_ioctl_get_supported_features(file, argp); + return btrfs_ioctl_get_supported_features(argp); case BTRFS_IOC_GET_FEATURES: return btrfs_ioctl_get_features(file, argp); case BTRFS_IOC_SET_FEATURES: @@ -5551,3 +5552,24 @@ long btrfs_ioctl(struct file *file, unsigned int return -ENOTTY; } + +#ifdef CONFIG_COMPAT +long btrfs_compat_ioctl(struct file *file, unsigned int cmd, unsigned long arg) +{ + switch (cmd) { + case FS_IOC32_GETFLAGS: + cmd = FS_IOC_GETFLAGS; + break; + case FS_IOC32_SETFLAGS: + cmd = FS_IOC_SETFLAGS; + break; + case FS_IOC32_GETVERSION: + cmd = FS_IOC_GETVERSION; + break; + default: + return -ENOIOCTLCMD; + } + + return btrfs_ioctl(file, cmd, (unsigned long) compat_ptr(arg)); +} +#endif diff --git a/fs/btrfs/lzo.c b/fs/btrfs/lzo.c index a2f051347..1adfbe7be 100644 --- a/fs/btrfs/lzo.c +++ b/fs/btrfs/lzo.c @@ -55,8 +55,8 @@ static struct list_head *lzo_alloc_workspace(void) return ERR_PTR(-ENOMEM); workspace->mem = vmalloc(LZO1X_MEM_COMPRESS); - workspace->buf = vmalloc(lzo1x_worst_compress(PAGE_CACHE_SIZE)); - workspace->cbuf = vmalloc(lzo1x_worst_compress(PAGE_CACHE_SIZE)); + workspace->buf = vmalloc(lzo1x_worst_compress(PAGE_SIZE)); + workspace->cbuf = vmalloc(lzo1x_worst_compress(PAGE_SIZE)); if (!workspace->mem || !workspace->buf || !workspace->cbuf) goto fail; @@ -116,7 +116,7 @@ static int lzo_compress_pages(struct list_head *ws, *total_out = 0; *total_in = 0; - in_page = find_get_page(mapping, start >> PAGE_CACHE_SHIFT); + in_page = find_get_page(mapping, start >> PAGE_SHIFT); data_in = kmap(in_page); /* @@ -133,10 +133,10 @@ static int lzo_compress_pages(struct list_head *ws, tot_out = LZO_LEN; pages[0] = out_page; nr_pages = 1; - pg_bytes_left = PAGE_CACHE_SIZE - LZO_LEN; + pg_bytes_left = PAGE_SIZE - LZO_LEN; /* compress at most one page of data each time */ - in_len = min(len, PAGE_CACHE_SIZE); + in_len = min(len, PAGE_SIZE); while (tot_in < len) { ret = lzo1x_1_compress(data_in, in_len, workspace->cbuf, &out_len, workspace->mem); @@ -201,7 +201,7 @@ static int lzo_compress_pages(struct list_head *ws, cpage_out = kmap(out_page); pages[nr_pages++] = out_page; - pg_bytes_left = PAGE_CACHE_SIZE; + pg_bytes_left = PAGE_SIZE; out_offset = 0; } } @@ -221,12 +221,12 @@ static int lzo_compress_pages(struct list_head *ws, bytes_left = len - tot_in; kunmap(in_page); - page_cache_release(in_page); + put_page(in_page); - start += PAGE_CACHE_SIZE; - in_page = find_get_page(mapping, start >> PAGE_CACHE_SHIFT); + start += PAGE_SIZE; + in_page = find_get_page(mapping, start >> PAGE_SHIFT); data_in = kmap(in_page); - in_len = min(bytes_left, PAGE_CACHE_SIZE); + in_len = min(bytes_left, PAGE_SIZE); } if (tot_out > tot_in) @@ -248,7 +248,7 @@ out: if (in_page) { kunmap(in_page); - page_cache_release(in_page); + put_page(in_page); } return ret; @@ -266,7 +266,7 @@ static int lzo_decompress_biovec(struct list_head *ws, char *data_in; unsigned long page_in_index = 0; unsigned long page_out_index = 0; - unsigned long total_pages_in = DIV_ROUND_UP(srclen, PAGE_CACHE_SIZE); + unsigned long total_pages_in = DIV_ROUND_UP(srclen, PAGE_SIZE); unsigned long buf_start; unsigned long buf_offset = 0; unsigned long bytes; @@ -289,7 +289,7 @@ static int lzo_decompress_biovec(struct list_head *ws, tot_in = LZO_LEN; in_offset = LZO_LEN; tot_len = min_t(size_t, srclen, tot_len); - in_page_bytes_left = PAGE_CACHE_SIZE - LZO_LEN; + in_page_bytes_left = PAGE_SIZE - LZO_LEN; tot_out = 0; pg_offset = 0; @@ -345,12 +345,12 @@ cont: data_in = kmap(pages_in[++page_in_index]); - in_page_bytes_left = PAGE_CACHE_SIZE; + in_page_bytes_left = PAGE_SIZE; in_offset = 0; } } - out_len = lzo1x_worst_compress(PAGE_CACHE_SIZE); + out_len = lzo1x_worst_compress(PAGE_SIZE); ret = lzo1x_decompress_safe(buf, in_len, workspace->buf, &out_len); if (need_unmap) @@ -399,7 +399,7 @@ static int lzo_decompress(struct list_head *ws, unsigned char *data_in, in_len = read_compress_length(data_in); data_in += LZO_LEN; - out_len = PAGE_CACHE_SIZE; + out_len = PAGE_SIZE; ret = lzo1x_decompress_safe(data_in, in_len, workspace->buf, &out_len); if (ret != LZO_E_OK) { printk(KERN_WARNING "BTRFS: decompress failed!\n"); diff --git a/fs/btrfs/ordered-data.c b/fs/btrfs/ordered-data.c index 8c27292ea..0de7da5a6 100644 --- a/fs/btrfs/ordered-data.c +++ b/fs/btrfs/ordered-data.c @@ -25,6 +25,7 @@ #include "btrfs_inode.h" #include "extent_io.h" #include "disk-io.h" +#include "compression.h" static struct kmem_cache *btrfs_ordered_extent_cache; @@ -1009,7 +1010,7 @@ int btrfs_ordered_update_i_size(struct inode *inode, u64 offset, for (; node; node = rb_prev(node)) { test = rb_entry(node, struct btrfs_ordered_extent, rb_node); - /* We treat this entry as if it doesnt exist */ + /* We treat this entry as if it doesn't exist */ if (test_bit(BTRFS_ORDERED_UPDATED_ISIZE, &test->flags)) continue; if (test->file_offset + test->len <= disk_i_size) @@ -1114,6 +1115,5 @@ int __init ordered_data_init(void) void ordered_data_exit(void) { - if (btrfs_ordered_extent_cache) - kmem_cache_destroy(btrfs_ordered_extent_cache); + kmem_cache_destroy(btrfs_ordered_extent_cache); } diff --git a/fs/btrfs/print-tree.c b/fs/btrfs/print-tree.c index 647ab12fd..147dc6ca5 100644 --- a/fs/btrfs/print-tree.c +++ b/fs/btrfs/print-tree.c @@ -295,8 +295,27 @@ void btrfs_print_leaf(struct btrfs_root *root, struct extent_buffer *l) btrfs_dev_extent_chunk_offset(l, dev_extent), btrfs_dev_extent_length(l, dev_extent)); break; - case BTRFS_DEV_STATS_KEY: - printk(KERN_INFO "\t\tdevice stats\n"); + case BTRFS_PERSISTENT_ITEM_KEY: + printk(KERN_INFO "\t\tpersistent item objectid %llu offset %llu\n", + key.objectid, key.offset); + switch (key.objectid) { + case BTRFS_DEV_STATS_OBJECTID: + printk(KERN_INFO "\t\tdevice stats\n"); + break; + default: + printk(KERN_INFO "\t\tunknown persistent item\n"); + } + break; + case BTRFS_TEMPORARY_ITEM_KEY: + printk(KERN_INFO "\t\ttemporary item objectid %llu offset %llu\n", + key.objectid, key.offset); + switch (key.objectid) { + case BTRFS_BALANCE_OBJECTID: + printk(KERN_INFO "\t\tbalance status\n"); + break; + default: + printk(KERN_INFO "\t\tunknown temporary item\n"); + } break; case BTRFS_DEV_REPLACE_KEY: printk(KERN_INFO "\t\tdev replace\n"); diff --git a/fs/btrfs/props.c b/fs/btrfs/props.c index f9e60231f..36992128c 100644 --- a/fs/btrfs/props.c +++ b/fs/btrfs/props.c @@ -22,6 +22,7 @@ #include "hash.h" #include "transaction.h" #include "xattr.h" +#include "compression.h" #define BTRFS_PROP_HANDLERS_HT_BITS 8 static DEFINE_HASHTABLE(prop_handlers_ht, BTRFS_PROP_HANDLERS_HT_BITS); diff --git a/fs/btrfs/qgroup.c b/fs/btrfs/qgroup.c index 5279fdae7..9e119552e 100644 --- a/fs/btrfs/qgroup.c +++ b/fs/btrfs/qgroup.c @@ -1463,6 +1463,7 @@ struct btrfs_qgroup_extent_record u64 bytenr = record->bytenr; assert_spin_locked(&delayed_refs->lock); + trace_btrfs_qgroup_insert_dirty_extent(record); while (*p) { parent_node = *p; @@ -1594,6 +1595,9 @@ static int qgroup_update_counters(struct btrfs_fs_info *fs_info, cur_old_count = btrfs_qgroup_get_old_refcnt(qg, seq); cur_new_count = btrfs_qgroup_get_new_refcnt(qg, seq); + trace_qgroup_update_counters(qg->qgroupid, cur_old_count, + cur_new_count); + /* Rfer update part */ if (cur_old_count == 0 && cur_new_count > 0) { qg->rfer += num_bytes; @@ -1683,6 +1687,9 @@ btrfs_qgroup_account_extent(struct btrfs_trans_handle *trans, goto out_free; BUG_ON(!fs_info->quota_root); + trace_btrfs_qgroup_account_extent(bytenr, num_bytes, nr_old_roots, + nr_new_roots); + qgroups = ulist_alloc(GFP_NOFS); if (!qgroups) { ret = -ENOMEM; @@ -1752,6 +1759,8 @@ int btrfs_qgroup_account_extents(struct btrfs_trans_handle *trans, record = rb_entry(node, struct btrfs_qgroup_extent_record, node); + trace_btrfs_qgroup_account_extents(record); + if (!ret) { /* * Use (u64)-1 as time_seq to do special search, which @@ -1842,8 +1851,10 @@ out: } /* - * copy the acounting information between qgroups. This is necessary when a - * snapshot or a subvolume is created + * Copy the acounting information between qgroups. This is necessary + * when a snapshot or a subvolume is created. Throwing an error will + * cause a transaction abort so we take extra care here to only error + * when a readonly fs is a reasonable outcome. */ int btrfs_qgroup_inherit(struct btrfs_trans_handle *trans, struct btrfs_fs_info *fs_info, u64 srcid, u64 objectid, @@ -1873,15 +1884,15 @@ int btrfs_qgroup_inherit(struct btrfs_trans_handle *trans, 2 * inherit->num_excl_copies; for (i = 0; i < nums; ++i) { srcgroup = find_qgroup_rb(fs_info, *i_qgroups); - if (!srcgroup) { - ret = -EINVAL; - goto out; - } - if ((srcgroup->qgroupid >> 48) <= (objectid >> 48)) { - ret = -EINVAL; - goto out; - } + /* + * Zero out invalid groups so we can ignore + * them later. + */ + if (!srcgroup || + ((srcgroup->qgroupid >> 48) <= (objectid >> 48))) + *i_qgroups = 0ULL; + ++i_qgroups; } } @@ -1916,17 +1927,19 @@ int btrfs_qgroup_inherit(struct btrfs_trans_handle *trans, */ if (inherit) { i_qgroups = (u64 *)(inherit + 1); - for (i = 0; i < inherit->num_qgroups; ++i) { + for (i = 0; i < inherit->num_qgroups; ++i, ++i_qgroups) { + if (*i_qgroups == 0) + continue; ret = add_qgroup_relation_item(trans, quota_root, objectid, *i_qgroups); - if (ret) + if (ret && ret != -EEXIST) goto out; ret = add_qgroup_relation_item(trans, quota_root, *i_qgroups, objectid); - if (ret) + if (ret && ret != -EEXIST) goto out; - ++i_qgroups; } + ret = 0; } @@ -1987,17 +2000,22 @@ int btrfs_qgroup_inherit(struct btrfs_trans_handle *trans, i_qgroups = (u64 *)(inherit + 1); for (i = 0; i < inherit->num_qgroups; ++i) { - ret = add_relation_rb(quota_root->fs_info, objectid, - *i_qgroups); - if (ret) - goto unlock; + if (*i_qgroups) { + ret = add_relation_rb(quota_root->fs_info, objectid, + *i_qgroups); + if (ret) + goto unlock; + } ++i_qgroups; } - for (i = 0; i < inherit->num_ref_copies; ++i) { + for (i = 0; i < inherit->num_ref_copies; ++i, i_qgroups += 2) { struct btrfs_qgroup *src; struct btrfs_qgroup *dst; + if (!i_qgroups[0] || !i_qgroups[1]) + continue; + src = find_qgroup_rb(fs_info, i_qgroups[0]); dst = find_qgroup_rb(fs_info, i_qgroups[1]); @@ -2008,12 +2026,14 @@ int btrfs_qgroup_inherit(struct btrfs_trans_handle *trans, dst->rfer = src->rfer - level_size; dst->rfer_cmpr = src->rfer_cmpr - level_size; - i_qgroups += 2; } - for (i = 0; i < inherit->num_excl_copies; ++i) { + for (i = 0; i < inherit->num_excl_copies; ++i, i_qgroups += 2) { struct btrfs_qgroup *src; struct btrfs_qgroup *dst; + if (!i_qgroups[0] || !i_qgroups[1]) + continue; + src = find_qgroup_rb(fs_info, i_qgroups[0]); dst = find_qgroup_rb(fs_info, i_qgroups[1]); @@ -2024,7 +2044,6 @@ int btrfs_qgroup_inherit(struct btrfs_trans_handle *trans, dst->excl = src->excl + level_size; dst->excl_cmpr = src->excl_cmpr + level_size; - i_qgroups += 2; } unlock: diff --git a/fs/btrfs/raid56.c b/fs/btrfs/raid56.c index 55161369f..0b7792e02 100644 --- a/fs/btrfs/raid56.c +++ b/fs/btrfs/raid56.c @@ -270,7 +270,7 @@ static void cache_rbio_pages(struct btrfs_raid_bio *rbio) s = kmap(rbio->bio_pages[i]); d = kmap(rbio->stripe_pages[i]); - memcpy(d, s, PAGE_CACHE_SIZE); + memcpy(d, s, PAGE_SIZE); kunmap(rbio->bio_pages[i]); kunmap(rbio->stripe_pages[i]); @@ -962,7 +962,7 @@ static struct page *page_in_rbio(struct btrfs_raid_bio *rbio, */ static unsigned long rbio_nr_pages(unsigned long stripe_len, int nr_stripes) { - return DIV_ROUND_UP(stripe_len, PAGE_CACHE_SIZE) * nr_stripes; + return DIV_ROUND_UP(stripe_len, PAGE_SIZE) * nr_stripes; } /* @@ -1078,7 +1078,7 @@ static int rbio_add_io_page(struct btrfs_raid_bio *rbio, u64 disk_start; stripe = &rbio->bbio->stripes[stripe_nr]; - disk_start = stripe->physical + (page_index << PAGE_CACHE_SHIFT); + disk_start = stripe->physical + (page_index << PAGE_SHIFT); /* if the device is missing, just fail this stripe */ if (!stripe->dev->bdev) @@ -1096,8 +1096,8 @@ static int rbio_add_io_page(struct btrfs_raid_bio *rbio, if (last_end == disk_start && stripe->dev->bdev && !last->bi_error && last->bi_bdev == stripe->dev->bdev) { - ret = bio_add_page(last, page, PAGE_CACHE_SIZE, 0); - if (ret == PAGE_CACHE_SIZE) + ret = bio_add_page(last, page, PAGE_SIZE, 0); + if (ret == PAGE_SIZE) return 0; } } @@ -1111,7 +1111,7 @@ static int rbio_add_io_page(struct btrfs_raid_bio *rbio, bio->bi_bdev = stripe->dev->bdev; bio->bi_iter.bi_sector = disk_start >> 9; - bio_add_page(bio, page, PAGE_CACHE_SIZE, 0); + bio_add_page(bio, page, PAGE_SIZE, 0); bio_list_add(bio_list, bio); return 0; } @@ -1154,7 +1154,7 @@ static void index_rbio_pages(struct btrfs_raid_bio *rbio) bio_list_for_each(bio, &rbio->bio_list) { start = (u64)bio->bi_iter.bi_sector << 9; stripe_offset = start - rbio->bbio->raid_map[0]; - page_index = stripe_offset >> PAGE_CACHE_SHIFT; + page_index = stripe_offset >> PAGE_SHIFT; for (i = 0; i < bio->bi_vcnt; i++) { p = bio->bi_io_vec[i].bv_page; @@ -1253,7 +1253,7 @@ static noinline void finish_rmw(struct btrfs_raid_bio *rbio) } else { /* raid5 */ memcpy(pointers[nr_data], pointers[0], PAGE_SIZE); - run_xor(pointers + 1, nr_data - 1, PAGE_CACHE_SIZE); + run_xor(pointers + 1, nr_data - 1, PAGE_SIZE); } @@ -1914,7 +1914,7 @@ pstripe: /* Copy parity block into failed block to start with */ memcpy(pointers[faila], pointers[rbio->nr_data], - PAGE_CACHE_SIZE); + PAGE_SIZE); /* rearrange the pointer array */ p = pointers[faila]; @@ -1923,7 +1923,7 @@ pstripe: pointers[rbio->nr_data - 1] = p; /* xor in the rest */ - run_xor(pointers, rbio->nr_data - 1, PAGE_CACHE_SIZE); + run_xor(pointers, rbio->nr_data - 1, PAGE_SIZE); } /* if we're doing this rebuild as part of an rmw, go through * and set all of our private rbio pages in the @@ -2250,7 +2250,7 @@ void raid56_add_scrub_pages(struct btrfs_raid_bio *rbio, struct page *page, ASSERT(logical + PAGE_SIZE <= rbio->bbio->raid_map[0] + rbio->stripe_len * rbio->nr_data); stripe_offset = (int)(logical - rbio->bbio->raid_map[0]); - index = stripe_offset >> PAGE_CACHE_SHIFT; + index = stripe_offset >> PAGE_SHIFT; rbio->bio_pages[index] = page; } @@ -2365,14 +2365,14 @@ static noinline void finish_parity_scrub(struct btrfs_raid_bio *rbio, } else { /* raid5 */ memcpy(pointers[nr_data], pointers[0], PAGE_SIZE); - run_xor(pointers + 1, nr_data - 1, PAGE_CACHE_SIZE); + run_xor(pointers + 1, nr_data - 1, PAGE_SIZE); } /* Check scrubbing pairty and repair it */ p = rbio_stripe_page(rbio, rbio->scrubp, pagenr); parity = kmap(p); - if (memcmp(parity, pointers[rbio->scrubp], PAGE_CACHE_SIZE)) - memcpy(parity, pointers[rbio->scrubp], PAGE_CACHE_SIZE); + if (memcmp(parity, pointers[rbio->scrubp], PAGE_SIZE)) + memcpy(parity, pointers[rbio->scrubp], PAGE_SIZE); else /* Parity is right, needn't writeback */ bitmap_clear(rbio->dbitmap, pagenr, 1); diff --git a/fs/btrfs/reada.c b/fs/btrfs/reada.c index 619f92963..298631eae 100644 --- a/fs/btrfs/reada.c +++ b/fs/btrfs/reada.c @@ -72,7 +72,7 @@ struct reada_extent { spinlock_t lock; struct reada_zone *zones[BTRFS_MAX_MIRRORS]; int nzones; - struct btrfs_device *scheduled_for; + int scheduled; }; struct reada_zone { @@ -101,67 +101,53 @@ static void reada_start_machine(struct btrfs_fs_info *fs_info); static void __reada_start_machine(struct btrfs_fs_info *fs_info); static int reada_add_block(struct reada_control *rc, u64 logical, - struct btrfs_key *top, int level, u64 generation); + struct btrfs_key *top, u64 generation); /* recurses */ /* in case of err, eb might be NULL */ -static int __readahead_hook(struct btrfs_root *root, struct extent_buffer *eb, - u64 start, int err) +static void __readahead_hook(struct btrfs_fs_info *fs_info, + struct reada_extent *re, struct extent_buffer *eb, + u64 start, int err) { int level = 0; int nritems; int i; u64 bytenr; u64 generation; - struct reada_extent *re; - struct btrfs_fs_info *fs_info = root->fs_info; struct list_head list; - unsigned long index = start >> PAGE_CACHE_SHIFT; - struct btrfs_device *for_dev; if (eb) level = btrfs_header_level(eb); - /* find extent */ - spin_lock(&fs_info->reada_lock); - re = radix_tree_lookup(&fs_info->reada_tree, index); - if (re) - re->refcnt++; - spin_unlock(&fs_info->reada_lock); - - if (!re) - return -1; - spin_lock(&re->lock); /* * just take the full list from the extent. afterwards we * don't need the lock anymore */ list_replace_init(&re->extctl, &list); - for_dev = re->scheduled_for; - re->scheduled_for = NULL; + re->scheduled = 0; spin_unlock(&re->lock); - if (err == 0) { - nritems = level ? btrfs_header_nritems(eb) : 0; - generation = btrfs_header_generation(eb); - /* - * FIXME: currently we just set nritems to 0 if this is a leaf, - * effectively ignoring the content. In a next step we could - * trigger more readahead depending from the content, e.g. - * fetch the checksums for the extents in the leaf. - */ - } else { - /* - * this is the error case, the extent buffer has not been - * read correctly. We won't access anything from it and - * just cleanup our data structures. Effectively this will - * cut the branch below this node from read ahead. - */ - nritems = 0; - generation = 0; - } + /* + * this is the error case, the extent buffer has not been + * read correctly. We won't access anything from it and + * just cleanup our data structures. Effectively this will + * cut the branch below this node from read ahead. + */ + if (err) + goto cleanup; + /* + * FIXME: currently we just set nritems to 0 if this is a leaf, + * effectively ignoring the content. In a next step we could + * trigger more readahead depending from the content, e.g. + * fetch the checksums for the extents in the leaf. + */ + if (!level) + goto cleanup; + + nritems = btrfs_header_nritems(eb); + generation = btrfs_header_generation(eb); for (i = 0; i < nritems; i++) { struct reada_extctl *rec; u64 n_gen; @@ -188,19 +174,20 @@ static int __readahead_hook(struct btrfs_root *root, struct extent_buffer *eb, */ #ifdef DEBUG if (rec->generation != generation) { - btrfs_debug(root->fs_info, - "generation mismatch for (%llu,%d,%llu) %llu != %llu", - key.objectid, key.type, key.offset, - rec->generation, generation); + btrfs_debug(fs_info, + "generation mismatch for (%llu,%d,%llu) %llu != %llu", + key.objectid, key.type, key.offset, + rec->generation, generation); } #endif if (rec->generation == generation && btrfs_comp_cpu_keys(&key, &rc->key_end) < 0 && btrfs_comp_cpu_keys(&next_key, &rc->key_start) > 0) - reada_add_block(rc, bytenr, &next_key, - level - 1, n_gen); + reada_add_block(rc, bytenr, &next_key, n_gen); } } + +cleanup: /* * free extctl records */ @@ -222,26 +209,37 @@ static int __readahead_hook(struct btrfs_root *root, struct extent_buffer *eb, reada_extent_put(fs_info, re); /* one ref for each entry */ } - reada_extent_put(fs_info, re); /* our ref */ - if (for_dev) - atomic_dec(&for_dev->reada_in_flight); - return 0; + return; } /* * start is passed separately in case eb in NULL, which may be the case with * failed I/O */ -int btree_readahead_hook(struct btrfs_root *root, struct extent_buffer *eb, - u64 start, int err) +int btree_readahead_hook(struct btrfs_fs_info *fs_info, + struct extent_buffer *eb, u64 start, int err) { - int ret; + int ret = 0; + struct reada_extent *re; - ret = __readahead_hook(root, eb, start, err); + /* find extent */ + spin_lock(&fs_info->reada_lock); + re = radix_tree_lookup(&fs_info->reada_tree, + start >> PAGE_SHIFT); + if (re) + re->refcnt++; + spin_unlock(&fs_info->reada_lock); + if (!re) { + ret = -1; + goto start_machine; + } - reada_start_machine(root->fs_info); + __readahead_hook(fs_info, re, eb, start, err); + reada_extent_put(fs_info, re); /* our ref */ +start_machine: + reada_start_machine(fs_info); return ret; } @@ -259,19 +257,15 @@ static struct reada_zone *reada_find_zone(struct btrfs_fs_info *fs_info, zone = NULL; spin_lock(&fs_info->reada_lock); ret = radix_tree_gang_lookup(&dev->reada_zones, (void **)&zone, - logical >> PAGE_CACHE_SHIFT, 1); - if (ret == 1) + logical >> PAGE_SHIFT, 1); + if (ret == 1 && logical >= zone->start && logical <= zone->end) { kref_get(&zone->refcnt); - spin_unlock(&fs_info->reada_lock); - - if (ret == 1) { - if (logical >= zone->start && logical < zone->end) - return zone; - spin_lock(&fs_info->reada_lock); - kref_put(&zone->refcnt, reada_zone_release); spin_unlock(&fs_info->reada_lock); + return zone; } + spin_unlock(&fs_info->reada_lock); + cache = btrfs_lookup_block_group(fs_info, logical); if (!cache) return NULL; @@ -280,7 +274,7 @@ static struct reada_zone *reada_find_zone(struct btrfs_fs_info *fs_info, end = start + cache->key.offset - 1; btrfs_put_block_group(cache); - zone = kzalloc(sizeof(*zone), GFP_NOFS); + zone = kzalloc(sizeof(*zone), GFP_KERNEL); if (!zone) return NULL; @@ -300,15 +294,17 @@ static struct reada_zone *reada_find_zone(struct btrfs_fs_info *fs_info, spin_lock(&fs_info->reada_lock); ret = radix_tree_insert(&dev->reada_zones, - (unsigned long)(zone->end >> PAGE_CACHE_SHIFT), + (unsigned long)(zone->end >> PAGE_SHIFT), zone); if (ret == -EEXIST) { kfree(zone); ret = radix_tree_gang_lookup(&dev->reada_zones, (void **)&zone, - logical >> PAGE_CACHE_SHIFT, 1); - if (ret == 1) + logical >> PAGE_SHIFT, 1); + if (ret == 1 && logical >= zone->start && logical <= zone->end) kref_get(&zone->refcnt); + else + zone = NULL; } spin_unlock(&fs_info->reada_lock); @@ -317,7 +313,7 @@ static struct reada_zone *reada_find_zone(struct btrfs_fs_info *fs_info, static struct reada_extent *reada_find_extent(struct btrfs_root *root, u64 logical, - struct btrfs_key *top, int level) + struct btrfs_key *top) { int ret; struct reada_extent *re = NULL; @@ -330,9 +326,9 @@ static struct reada_extent *reada_find_extent(struct btrfs_root *root, u64 length; int real_stripes; int nzones = 0; - int i; - unsigned long index = logical >> PAGE_CACHE_SHIFT; + unsigned long index = logical >> PAGE_SHIFT; int dev_replace_is_ongoing; + int have_zone = 0; spin_lock(&fs_info->reada_lock); re = radix_tree_lookup(&fs_info->reada_tree, index); @@ -343,7 +339,7 @@ static struct reada_extent *reada_find_extent(struct btrfs_root *root, if (re) return re; - re = kzalloc(sizeof(*re), GFP_NOFS); + re = kzalloc(sizeof(*re), GFP_KERNEL); if (!re) return NULL; @@ -375,11 +371,16 @@ static struct reada_extent *reada_find_extent(struct btrfs_root *root, struct reada_zone *zone; dev = bbio->stripes[nzones].dev; + + /* cannot read ahead on missing device. */ + if (!dev->bdev) + continue; + zone = reada_find_zone(fs_info, dev, logical, bbio); if (!zone) - break; + continue; - re->zones[nzones] = zone; + re->zones[re->nzones++] = zone; spin_lock(&zone->lock); if (!zone->elems) kref_get(&zone->refcnt); @@ -389,14 +390,13 @@ static struct reada_extent *reada_find_extent(struct btrfs_root *root, kref_put(&zone->refcnt, reada_zone_release); spin_unlock(&fs_info->reada_lock); } - re->nzones = nzones; - if (nzones == 0) { + if (re->nzones == 0) { /* not a single zone found, error and out */ goto error; } /* insert extent in reada_tree + all per-device trees, all or nothing */ - btrfs_dev_replace_lock(&fs_info->dev_replace); + btrfs_dev_replace_lock(&fs_info->dev_replace, 0); spin_lock(&fs_info->reada_lock); ret = radix_tree_insert(&fs_info->reada_tree, index, re); if (ret == -EEXIST) { @@ -404,19 +404,20 @@ static struct reada_extent *reada_find_extent(struct btrfs_root *root, BUG_ON(!re_exist); re_exist->refcnt++; spin_unlock(&fs_info->reada_lock); - btrfs_dev_replace_unlock(&fs_info->dev_replace); + btrfs_dev_replace_unlock(&fs_info->dev_replace, 0); goto error; } if (ret) { spin_unlock(&fs_info->reada_lock); - btrfs_dev_replace_unlock(&fs_info->dev_replace); + btrfs_dev_replace_unlock(&fs_info->dev_replace, 0); goto error; } prev_dev = NULL; dev_replace_is_ongoing = btrfs_dev_replace_is_ongoing( &fs_info->dev_replace); - for (i = 0; i < nzones; ++i) { - dev = bbio->stripes[i].dev; + for (nzones = 0; nzones < re->nzones; ++nzones) { + dev = re->zones[nzones]->device; + if (dev == prev_dev) { /* * in case of DUP, just add the first zone. As both @@ -427,15 +428,9 @@ static struct reada_extent *reada_find_extent(struct btrfs_root *root, */ continue; } - if (!dev->bdev) { - /* - * cannot read ahead on missing device, but for RAID5/6, - * REQ_GET_READ_MIRRORS return 1. So don't skip missing - * device for such case. - */ - if (nzones > 1) - continue; - } + if (!dev->bdev) + continue; + if (dev_replace_is_ongoing && dev == fs_info->dev_replace.tgtdev) { /* @@ -447,8 +442,8 @@ static struct reada_extent *reada_find_extent(struct btrfs_root *root, prev_dev = dev; ret = radix_tree_insert(&dev->reada_extents, index, re); if (ret) { - while (--i >= 0) { - dev = bbio->stripes[i].dev; + while (--nzones >= 0) { + dev = re->zones[nzones]->device; BUG_ON(dev == NULL); /* ignore whether the entry was inserted */ radix_tree_delete(&dev->reada_extents, index); @@ -456,21 +451,24 @@ static struct reada_extent *reada_find_extent(struct btrfs_root *root, BUG_ON(fs_info == NULL); radix_tree_delete(&fs_info->reada_tree, index); spin_unlock(&fs_info->reada_lock); - btrfs_dev_replace_unlock(&fs_info->dev_replace); + btrfs_dev_replace_unlock(&fs_info->dev_replace, 0); goto error; } + have_zone = 1; } spin_unlock(&fs_info->reada_lock); - btrfs_dev_replace_unlock(&fs_info->dev_replace); + btrfs_dev_replace_unlock(&fs_info->dev_replace, 0); + + if (!have_zone) + goto error; btrfs_put_bbio(bbio); return re; error: - while (nzones) { + for (nzones = 0; nzones < re->nzones; ++nzones) { struct reada_zone *zone; - --nzones; zone = re->zones[nzones]; kref_get(&zone->refcnt); spin_lock(&zone->lock); @@ -497,7 +495,7 @@ static void reada_extent_put(struct btrfs_fs_info *fs_info, struct reada_extent *re) { int i; - unsigned long index = re->logical >> PAGE_CACHE_SHIFT; + unsigned long index = re->logical >> PAGE_SHIFT; spin_lock(&fs_info->reada_lock); if (--re->refcnt) { @@ -531,8 +529,6 @@ static void reada_extent_put(struct btrfs_fs_info *fs_info, kref_put(&zone->refcnt, reada_zone_release); spin_unlock(&fs_info->reada_lock); } - if (re->scheduled_for) - atomic_dec(&re->scheduled_for->reada_in_flight); kfree(re); } @@ -542,7 +538,7 @@ static void reada_zone_release(struct kref *kref) struct reada_zone *zone = container_of(kref, struct reada_zone, refcnt); radix_tree_delete(&zone->device->reada_zones, - zone->end >> PAGE_CACHE_SHIFT); + zone->end >> PAGE_SHIFT); kfree(zone); } @@ -556,17 +552,17 @@ static void reada_control_release(struct kref *kref) } static int reada_add_block(struct reada_control *rc, u64 logical, - struct btrfs_key *top, int level, u64 generation) + struct btrfs_key *top, u64 generation) { struct btrfs_root *root = rc->root; struct reada_extent *re; struct reada_extctl *rec; - re = reada_find_extent(root, logical, top, level); /* takes one ref */ + re = reada_find_extent(root, logical, top); /* takes one ref */ if (!re) return -1; - rec = kzalloc(sizeof(*rec), GFP_NOFS); + rec = kzalloc(sizeof(*rec), GFP_KERNEL); if (!rec) { reada_extent_put(root->fs_info, re); return -ENOMEM; @@ -591,7 +587,7 @@ static int reada_add_block(struct reada_control *rc, u64 logical, static void reada_peer_zones_set_lock(struct reada_zone *zone, int lock) { int i; - unsigned long index = zone->end >> PAGE_CACHE_SHIFT; + unsigned long index = zone->end >> PAGE_SHIFT; for (i = 0; i < zone->ndevs; ++i) { struct reada_zone *peer; @@ -626,7 +622,7 @@ static int reada_pick_zone(struct btrfs_device *dev) (void **)&zone, index, 1); if (ret == 0) break; - index = (zone->end >> PAGE_CACHE_SHIFT) + 1; + index = (zone->end >> PAGE_SHIFT) + 1; if (zone->locked) { if (zone->elems > top_locked_elems) { top_locked_elems = zone->elems; @@ -662,7 +658,6 @@ static int reada_start_machine_dev(struct btrfs_fs_info *fs_info, u64 logical; int ret; int i; - int need_kick = 0; spin_lock(&fs_info->reada_lock); if (dev->reada_curr_zone == NULL) { @@ -678,8 +673,8 @@ static int reada_start_machine_dev(struct btrfs_fs_info *fs_info, * plugging to speed things up */ ret = radix_tree_gang_lookup(&dev->reada_extents, (void **)&re, - dev->reada_next >> PAGE_CACHE_SHIFT, 1); - if (ret == 0 || re->logical >= dev->reada_curr_zone->end) { + dev->reada_next >> PAGE_SHIFT, 1); + if (ret == 0 || re->logical > dev->reada_curr_zone->end) { ret = reada_pick_zone(dev); if (!ret) { spin_unlock(&fs_info->reada_lock); @@ -687,7 +682,7 @@ static int reada_start_machine_dev(struct btrfs_fs_info *fs_info, } re = NULL; ret = radix_tree_gang_lookup(&dev->reada_extents, (void **)&re, - dev->reada_next >> PAGE_CACHE_SHIFT, 1); + dev->reada_next >> PAGE_SHIFT, 1); } if (ret == 0) { spin_unlock(&fs_info->reada_lock); @@ -698,6 +693,15 @@ static int reada_start_machine_dev(struct btrfs_fs_info *fs_info, spin_unlock(&fs_info->reada_lock); + spin_lock(&re->lock); + if (re->scheduled || list_empty(&re->extctl)) { + spin_unlock(&re->lock); + reada_extent_put(fs_info, re); + return 0; + } + re->scheduled = 1; + spin_unlock(&re->lock); + /* * find mirror num */ @@ -709,29 +713,20 @@ static int reada_start_machine_dev(struct btrfs_fs_info *fs_info, } logical = re->logical; - spin_lock(&re->lock); - if (re->scheduled_for == NULL) { - re->scheduled_for = dev; - need_kick = 1; - } - spin_unlock(&re->lock); - - reada_extent_put(fs_info, re); - - if (!need_kick) - return 0; - atomic_inc(&dev->reada_in_flight); ret = reada_tree_block_flagged(fs_info->extent_root, logical, mirror_num, &eb); if (ret) - __readahead_hook(fs_info->extent_root, NULL, logical, ret); + __readahead_hook(fs_info, re, NULL, logical, ret); else if (eb) - __readahead_hook(fs_info->extent_root, eb, eb->start, ret); + __readahead_hook(fs_info, re, eb, eb->start, ret); if (eb) free_extent_buffer(eb); + atomic_dec(&dev->reada_in_flight); + reada_extent_put(fs_info, re); + return 1; } @@ -752,6 +747,8 @@ static void reada_start_machine_worker(struct btrfs_work *work) set_task_ioprio(current, BTRFS_IOPRIO_READA); __reada_start_machine(fs_info); set_task_ioprio(current, old_ioprio); + + atomic_dec(&fs_info->reada_works_cnt); } static void __reada_start_machine(struct btrfs_fs_info *fs_info) @@ -783,15 +780,19 @@ static void __reada_start_machine(struct btrfs_fs_info *fs_info) * enqueue to workers to finish it. This will distribute the load to * the cores. */ - for (i = 0; i < 2; ++i) + for (i = 0; i < 2; ++i) { reada_start_machine(fs_info); + if (atomic_read(&fs_info->reada_works_cnt) > + BTRFS_MAX_MIRRORS * 2) + break; + } } static void reada_start_machine(struct btrfs_fs_info *fs_info) { struct reada_machine_work *rmw; - rmw = kzalloc(sizeof(*rmw), GFP_NOFS); + rmw = kzalloc(sizeof(*rmw), GFP_KERNEL); if (!rmw) { /* FIXME we cannot handle this properly right now */ BUG(); @@ -801,6 +802,7 @@ static void reada_start_machine(struct btrfs_fs_info *fs_info) rmw->fs_info = fs_info; btrfs_queue_work(fs_info->readahead_workers, &rmw->work); + atomic_inc(&fs_info->reada_works_cnt); } #ifdef DEBUG @@ -836,7 +838,7 @@ static void dump_devs(struct btrfs_fs_info *fs_info, int all) printk(KERN_CONT " curr off %llu", device->reada_next - zone->start); printk(KERN_CONT "\n"); - index = (zone->end >> PAGE_CACHE_SHIFT) + 1; + index = (zone->end >> PAGE_SHIFT) + 1; } cnt = 0; index = 0; @@ -848,10 +850,9 @@ static void dump_devs(struct btrfs_fs_info *fs_info, int all) if (ret == 0) break; printk(KERN_DEBUG - " re: logical %llu size %u empty %d for %lld", + " re: logical %llu size %u empty %d scheduled %d", re->logical, fs_info->tree_root->nodesize, - list_empty(&re->extctl), re->scheduled_for ? - re->scheduled_for->devid : -1); + list_empty(&re->extctl), re->scheduled); for (i = 0; i < re->nzones; ++i) { printk(KERN_CONT " zone %llu-%llu devs", @@ -863,7 +864,7 @@ static void dump_devs(struct btrfs_fs_info *fs_info, int all) } } printk(KERN_CONT "\n"); - index = (re->logical >> PAGE_CACHE_SHIFT) + 1; + index = (re->logical >> PAGE_SHIFT) + 1; if (++cnt > 15) break; } @@ -878,31 +879,25 @@ static void dump_devs(struct btrfs_fs_info *fs_info, int all) index, 1); if (ret == 0) break; - if (!re->scheduled_for) { - index = (re->logical >> PAGE_CACHE_SHIFT) + 1; + if (!re->scheduled) { + index = (re->logical >> PAGE_SHIFT) + 1; continue; } printk(KERN_DEBUG - "re: logical %llu size %u list empty %d for %lld", + "re: logical %llu size %u list empty %d scheduled %d", re->logical, fs_info->tree_root->nodesize, - list_empty(&re->extctl), - re->scheduled_for ? re->scheduled_for->devid : -1); + list_empty(&re->extctl), re->scheduled); for (i = 0; i < re->nzones; ++i) { printk(KERN_CONT " zone %llu-%llu devs", re->zones[i]->start, re->zones[i]->end); - for (i = 0; i < re->nzones; ++i) { - printk(KERN_CONT " zone %llu-%llu devs", - re->zones[i]->start, - re->zones[i]->end); - for (j = 0; j < re->zones[i]->ndevs; ++j) { - printk(KERN_CONT " %lld", - re->zones[i]->devs[j]->devid); - } + for (j = 0; j < re->zones[i]->ndevs; ++j) { + printk(KERN_CONT " %lld", + re->zones[i]->devs[j]->devid); } } printk(KERN_CONT "\n"); - index = (re->logical >> PAGE_CACHE_SHIFT) + 1; + index = (re->logical >> PAGE_SHIFT) + 1; } spin_unlock(&fs_info->reada_lock); } @@ -917,7 +912,6 @@ struct reada_control *btrfs_reada_add(struct btrfs_root *root, struct reada_control *rc; u64 start; u64 generation; - int level; int ret; struct extent_buffer *node; static struct btrfs_key max_key = { @@ -926,7 +920,7 @@ struct reada_control *btrfs_reada_add(struct btrfs_root *root, .offset = (u64)-1 }; - rc = kzalloc(sizeof(*rc), GFP_NOFS); + rc = kzalloc(sizeof(*rc), GFP_KERNEL); if (!rc) return ERR_PTR(-ENOMEM); @@ -940,11 +934,10 @@ struct reada_control *btrfs_reada_add(struct btrfs_root *root, node = btrfs_root_node(root); start = node->start; - level = btrfs_header_level(node); generation = btrfs_header_generation(node); free_extent_buffer(node); - ret = reada_add_block(rc, start, &max_key, level, generation); + ret = reada_add_block(rc, start, &max_key, generation); if (ret) { kfree(rc); return ERR_PTR(ret); @@ -959,8 +952,11 @@ struct reada_control *btrfs_reada_add(struct btrfs_root *root, int btrfs_reada_wait(void *handle) { struct reada_control *rc = handle; + struct btrfs_fs_info *fs_info = rc->root->fs_info; while (atomic_read(&rc->elems)) { + if (!atomic_read(&fs_info->reada_works_cnt)) + reada_start_machine(fs_info); wait_event_timeout(rc->wait, atomic_read(&rc->elems) == 0, 5 * HZ); dump_devs(rc->root->fs_info, @@ -977,9 +973,13 @@ int btrfs_reada_wait(void *handle) int btrfs_reada_wait(void *handle) { struct reada_control *rc = handle; + struct btrfs_fs_info *fs_info = rc->root->fs_info; while (atomic_read(&rc->elems)) { - wait_event(rc->wait, atomic_read(&rc->elems) == 0); + if (!atomic_read(&fs_info->reada_works_cnt)) + reada_start_machine(fs_info); + wait_event_timeout(rc->wait, atomic_read(&rc->elems) == 0, + (HZ + 9) / 10); } kref_put(&rc->refcnt, reada_control_release); diff --git a/fs/btrfs/relocation.c b/fs/btrfs/relocation.c index 2bd001145..08ef890de 100644 --- a/fs/btrfs/relocation.c +++ b/fs/btrfs/relocation.c @@ -1850,6 +1850,7 @@ again: eb = read_tree_block(dest, old_bytenr, old_ptr_gen); if (IS_ERR(eb)) { ret = PTR_ERR(eb); + break; } else if (!extent_buffer_uptodate(eb)) { ret = -EIO; free_extent_buffer(eb); @@ -3129,10 +3130,10 @@ static int relocate_file_extent_cluster(struct inode *inode, if (ret) goto out; - index = (cluster->start - offset) >> PAGE_CACHE_SHIFT; - last_index = (cluster->end - offset) >> PAGE_CACHE_SHIFT; + index = (cluster->start - offset) >> PAGE_SHIFT; + last_index = (cluster->end - offset) >> PAGE_SHIFT; while (index <= last_index) { - ret = btrfs_delalloc_reserve_metadata(inode, PAGE_CACHE_SIZE); + ret = btrfs_delalloc_reserve_metadata(inode, PAGE_SIZE); if (ret) goto out; @@ -3145,7 +3146,7 @@ static int relocate_file_extent_cluster(struct inode *inode, mask); if (!page) { btrfs_delalloc_release_metadata(inode, - PAGE_CACHE_SIZE); + PAGE_SIZE); ret = -ENOMEM; goto out; } @@ -3162,16 +3163,16 @@ static int relocate_file_extent_cluster(struct inode *inode, lock_page(page); if (!PageUptodate(page)) { unlock_page(page); - page_cache_release(page); + put_page(page); btrfs_delalloc_release_metadata(inode, - PAGE_CACHE_SIZE); + PAGE_SIZE); ret = -EIO; goto out; } } page_start = page_offset(page); - page_end = page_start + PAGE_CACHE_SIZE - 1; + page_end = page_start + PAGE_SIZE - 1; lock_extent(&BTRFS_I(inode)->io_tree, page_start, page_end); @@ -3191,7 +3192,7 @@ static int relocate_file_extent_cluster(struct inode *inode, unlock_extent(&BTRFS_I(inode)->io_tree, page_start, page_end); unlock_page(page); - page_cache_release(page); + put_page(page); index++; balance_dirty_pages_ratelimited(inode->i_mapping); diff --git a/fs/btrfs/root-tree.c b/fs/btrfs/root-tree.c index 2c849b08a..9fcd6dfc3 100644 --- a/fs/btrfs/root-tree.c +++ b/fs/btrfs/root-tree.c @@ -496,7 +496,7 @@ void btrfs_update_root_times(struct btrfs_trans_handle *trans, struct btrfs_root *root) { struct btrfs_root_item *item = &root->root_item; - struct timespec ct = CURRENT_TIME; + struct timespec ct = current_fs_time(root->fs_info->sb); spin_lock(&root->root_item_lock); btrfs_set_root_ctransid(item, trans->transid); diff --git a/fs/btrfs/scrub.c b/fs/btrfs/scrub.c index 92bf5ee73..4678f03e8 100644 --- a/fs/btrfs/scrub.c +++ b/fs/btrfs/scrub.c @@ -461,7 +461,7 @@ struct scrub_ctx *scrub_setup_ctx(struct btrfs_device *dev, int is_dev_replace) struct btrfs_fs_info *fs_info = dev->dev_root->fs_info; int ret; - sctx = kzalloc(sizeof(*sctx), GFP_NOFS); + sctx = kzalloc(sizeof(*sctx), GFP_KERNEL); if (!sctx) goto nomem; atomic_set(&sctx->refs, 1); @@ -472,7 +472,7 @@ struct scrub_ctx *scrub_setup_ctx(struct btrfs_device *dev, int is_dev_replace) for (i = 0; i < SCRUB_BIOS_PER_SCTX; ++i) { struct scrub_bio *sbio; - sbio = kzalloc(sizeof(*sbio), GFP_NOFS); + sbio = kzalloc(sizeof(*sbio), GFP_KERNEL); if (!sbio) goto nomem; sctx->bios[i] = sbio; @@ -611,7 +611,7 @@ static void scrub_print_warning(const char *errstr, struct scrub_block *sblock) u64 flags = 0; u64 ref_root; u32 item_size; - u8 ref_level; + u8 ref_level = 0; int ret; WARN_ON(sblock->page_count < 1); @@ -703,7 +703,7 @@ static int scrub_fixup_readpage(u64 inum, u64 offset, u64 root, void *fixup_ctx) if (IS_ERR(inode)) return PTR_ERR(inode); - index = offset >> PAGE_CACHE_SHIFT; + index = offset >> PAGE_SHIFT; page = find_or_create_page(inode->i_mapping, index, GFP_NOFS); if (!page) { @@ -1636,7 +1636,7 @@ static int scrub_write_page_to_dev_replace(struct scrub_block *sblock, if (spage->io_error) { void *mapped_buffer = kmap_atomic(spage->page); - memset(mapped_buffer, 0, PAGE_CACHE_SIZE); + memset(mapped_buffer, 0, PAGE_SIZE); flush_dcache_page(spage->page); kunmap_atomic(mapped_buffer); } @@ -1654,7 +1654,7 @@ static int scrub_add_page_to_wr_bio(struct scrub_ctx *sctx, again: if (!wr_ctx->wr_curr_bio) { wr_ctx->wr_curr_bio = kzalloc(sizeof(*wr_ctx->wr_curr_bio), - GFP_NOFS); + GFP_KERNEL); if (!wr_ctx->wr_curr_bio) { mutex_unlock(&wr_ctx->wr_lock); return -ENOMEM; @@ -1671,7 +1671,8 @@ again: sbio->dev = wr_ctx->tgtdev; bio = sbio->bio; if (!bio) { - bio = btrfs_io_bio_alloc(GFP_NOFS, wr_ctx->pages_per_wr_bio); + bio = btrfs_io_bio_alloc(GFP_KERNEL, + wr_ctx->pages_per_wr_bio); if (!bio) { mutex_unlock(&wr_ctx->wr_lock); return -ENOMEM; @@ -2076,7 +2077,8 @@ again: sbio->dev = spage->dev; bio = sbio->bio; if (!bio) { - bio = btrfs_io_bio_alloc(GFP_NOFS, sctx->pages_per_rd_bio); + bio = btrfs_io_bio_alloc(GFP_KERNEL, + sctx->pages_per_rd_bio); if (!bio) return -ENOMEM; sbio->bio = bio; @@ -2241,7 +2243,7 @@ static int scrub_pages(struct scrub_ctx *sctx, u64 logical, u64 len, struct scrub_block *sblock; int index; - sblock = kzalloc(sizeof(*sblock), GFP_NOFS); + sblock = kzalloc(sizeof(*sblock), GFP_KERNEL); if (!sblock) { spin_lock(&sctx->stat_lock); sctx->stat.malloc_errors++; @@ -2259,7 +2261,7 @@ static int scrub_pages(struct scrub_ctx *sctx, u64 logical, u64 len, struct scrub_page *spage; u64 l = min_t(u64, len, PAGE_SIZE); - spage = kzalloc(sizeof(*spage), GFP_NOFS); + spage = kzalloc(sizeof(*spage), GFP_KERNEL); if (!spage) { leave_nomem: spin_lock(&sctx->stat_lock); @@ -2286,7 +2288,7 @@ leave_nomem: spage->have_csum = 0; } sblock->page_count++; - spage->page = alloc_page(GFP_NOFS); + spage->page = alloc_page(GFP_KERNEL); if (!spage->page) goto leave_nomem; len -= l; @@ -2541,7 +2543,7 @@ static int scrub_pages_for_parity(struct scrub_parity *sparity, struct scrub_block *sblock; int index; - sblock = kzalloc(sizeof(*sblock), GFP_NOFS); + sblock = kzalloc(sizeof(*sblock), GFP_KERNEL); if (!sblock) { spin_lock(&sctx->stat_lock); sctx->stat.malloc_errors++; @@ -2561,7 +2563,7 @@ static int scrub_pages_for_parity(struct scrub_parity *sparity, struct scrub_page *spage; u64 l = min_t(u64, len, PAGE_SIZE); - spage = kzalloc(sizeof(*spage), GFP_NOFS); + spage = kzalloc(sizeof(*spage), GFP_KERNEL); if (!spage) { leave_nomem: spin_lock(&sctx->stat_lock); @@ -2591,7 +2593,7 @@ leave_nomem: spage->have_csum = 0; } sblock->page_count++; - spage->page = alloc_page(GFP_NOFS); + spage->page = alloc_page(GFP_KERNEL); if (!spage->page) goto leave_nomem; len -= l; @@ -3857,16 +3859,16 @@ int btrfs_scrub_dev(struct btrfs_fs_info *fs_info, u64 devid, u64 start, return -EIO; } - btrfs_dev_replace_lock(&fs_info->dev_replace); + btrfs_dev_replace_lock(&fs_info->dev_replace, 0); if (dev->scrub_device || (!is_dev_replace && btrfs_dev_replace_is_ongoing(&fs_info->dev_replace))) { - btrfs_dev_replace_unlock(&fs_info->dev_replace); + btrfs_dev_replace_unlock(&fs_info->dev_replace, 0); mutex_unlock(&fs_info->scrub_lock); mutex_unlock(&fs_info->fs_devices->device_list_mutex); return -EINPROGRESS; } - btrfs_dev_replace_unlock(&fs_info->dev_replace); + btrfs_dev_replace_unlock(&fs_info->dev_replace, 0); ret = scrub_workers_get(fs_info, is_dev_replace); if (ret) { @@ -4292,8 +4294,8 @@ static int copy_nocow_pages_for_inode(u64 inum, u64 offset, u64 root, goto out; } - while (len >= PAGE_CACHE_SIZE) { - index = offset >> PAGE_CACHE_SHIFT; + while (len >= PAGE_SIZE) { + index = offset >> PAGE_SHIFT; again: page = find_or_create_page(inode->i_mapping, index, GFP_NOFS); if (!page) { @@ -4324,7 +4326,7 @@ again: */ if (page->mapping != inode->i_mapping) { unlock_page(page); - page_cache_release(page); + put_page(page); goto again; } if (!PageUptodate(page)) { @@ -4346,15 +4348,15 @@ again: ret = err; next_page: unlock_page(page); - page_cache_release(page); + put_page(page); if (ret) break; - offset += PAGE_CACHE_SIZE; - physical_for_dev_replace += PAGE_CACHE_SIZE; - nocow_ctx_logical += PAGE_CACHE_SIZE; - len -= PAGE_CACHE_SIZE; + offset += PAGE_SIZE; + physical_for_dev_replace += PAGE_SIZE; + nocow_ctx_logical += PAGE_SIZE; + len -= PAGE_SIZE; } ret = COPY_COMPLETE; out: @@ -4388,8 +4390,8 @@ static int write_page_nocow(struct scrub_ctx *sctx, bio->bi_iter.bi_size = 0; bio->bi_iter.bi_sector = physical_for_dev_replace >> 9; bio->bi_bdev = dev->bdev; - ret = bio_add_page(bio, page, PAGE_CACHE_SIZE, 0); - if (ret != PAGE_CACHE_SIZE) { + ret = bio_add_page(bio, page, PAGE_SIZE, 0); + if (ret != PAGE_SIZE) { leave_with_eio: bio_put(bio); btrfs_dev_stat_inc_and_print(dev, BTRFS_DEV_STAT_WRITE_ERRS); diff --git a/fs/btrfs/send.c b/fs/btrfs/send.c index 63a6152be..8d358c547 100644 --- a/fs/btrfs/send.c +++ b/fs/btrfs/send.c @@ -34,6 +34,7 @@ #include "disk-io.h" #include "btrfs_inode.h" #include "transaction.h" +#include "compression.h" static int g_verbose = 0; @@ -304,7 +305,7 @@ static struct fs_path *fs_path_alloc(void) { struct fs_path *p; - p = kmalloc(sizeof(*p), GFP_NOFS); + p = kmalloc(sizeof(*p), GFP_KERNEL); if (!p) return NULL; p->reversed = 0; @@ -363,11 +364,11 @@ static int fs_path_ensure_buf(struct fs_path *p, int len) * First time the inline_buf does not suffice */ if (p->buf == p->inline_buf) { - tmp_buf = kmalloc(len, GFP_NOFS); + tmp_buf = kmalloc(len, GFP_KERNEL); if (tmp_buf) memcpy(tmp_buf, p->buf, old_buf_len); } else { - tmp_buf = krealloc(p->buf, len, GFP_NOFS); + tmp_buf = krealloc(p->buf, len, GFP_KERNEL); } if (!tmp_buf) return -ENOMEM; @@ -995,7 +996,7 @@ static int iterate_dir_item(struct btrfs_root *root, struct btrfs_path *path, * values are small. */ buf_len = PATH_MAX; - buf = kmalloc(buf_len, GFP_NOFS); + buf = kmalloc(buf_len, GFP_KERNEL); if (!buf) { ret = -ENOMEM; goto out; @@ -1042,7 +1043,7 @@ static int iterate_dir_item(struct btrfs_root *root, struct btrfs_path *path, buf = NULL; } else { char *tmp = krealloc(buf, buf_len, - GFP_NOFS | __GFP_NOWARN); + GFP_KERNEL | __GFP_NOWARN); if (!tmp) kfree(buf); @@ -1303,7 +1304,7 @@ static int find_extent_clone(struct send_ctx *sctx, /* We only use this path under the commit sem */ tmp_path->need_commit_sem = 0; - backref_ctx = kmalloc(sizeof(*backref_ctx), GFP_NOFS); + backref_ctx = kmalloc(sizeof(*backref_ctx), GFP_KERNEL); if (!backref_ctx) { ret = -ENOMEM; goto out; @@ -1984,7 +1985,7 @@ static int name_cache_insert(struct send_ctx *sctx, nce_head = radix_tree_lookup(&sctx->name_cache, (unsigned long)nce->ino); if (!nce_head) { - nce_head = kmalloc(sizeof(*nce_head), GFP_NOFS); + nce_head = kmalloc(sizeof(*nce_head), GFP_KERNEL); if (!nce_head) { kfree(nce); return -ENOMEM; @@ -2179,7 +2180,7 @@ out_cache: /* * Store the result of the lookup in the name cache. */ - nce = kmalloc(sizeof(*nce) + fs_path_len(dest) + 1, GFP_NOFS); + nce = kmalloc(sizeof(*nce) + fs_path_len(dest) + 1, GFP_KERNEL); if (!nce) { ret = -ENOMEM; goto out; @@ -2315,7 +2316,7 @@ static int send_subvol_begin(struct send_ctx *sctx) if (!path) return -ENOMEM; - name = kmalloc(BTRFS_PATH_NAME_MAX, GFP_NOFS); + name = kmalloc(BTRFS_PATH_NAME_MAX, GFP_KERNEL); if (!name) { btrfs_free_path(path); return -ENOMEM; @@ -2730,7 +2731,7 @@ static int __record_ref(struct list_head *head, u64 dir, { struct recorded_ref *ref; - ref = kmalloc(sizeof(*ref), GFP_NOFS); + ref = kmalloc(sizeof(*ref), GFP_KERNEL); if (!ref) return -ENOMEM; @@ -2755,7 +2756,7 @@ static int dup_ref(struct recorded_ref *ref, struct list_head *list) { struct recorded_ref *new; - new = kmalloc(sizeof(*ref), GFP_NOFS); + new = kmalloc(sizeof(*ref), GFP_KERNEL); if (!new) return -ENOMEM; @@ -2818,7 +2819,7 @@ add_orphan_dir_info(struct send_ctx *sctx, u64 dir_ino) struct rb_node *parent = NULL; struct orphan_dir_info *entry, *odi; - odi = kmalloc(sizeof(*odi), GFP_NOFS); + odi = kmalloc(sizeof(*odi), GFP_KERNEL); if (!odi) return ERR_PTR(-ENOMEM); odi->ino = dir_ino; @@ -2973,7 +2974,7 @@ static int add_waiting_dir_move(struct send_ctx *sctx, u64 ino, bool orphanized) struct rb_node *parent = NULL; struct waiting_dir_move *entry, *dm; - dm = kmalloc(sizeof(*dm), GFP_NOFS); + dm = kmalloc(sizeof(*dm), GFP_KERNEL); if (!dm) return -ENOMEM; dm->ino = ino; @@ -3040,7 +3041,7 @@ static int add_pending_dir_move(struct send_ctx *sctx, int exists = 0; int ret; - pm = kmalloc(sizeof(*pm), GFP_NOFS); + pm = kmalloc(sizeof(*pm), GFP_KERNEL); if (!pm) return -ENOMEM; pm->parent_ino = parent_ino; @@ -4280,7 +4281,7 @@ static int __find_xattr(int num, struct btrfs_key *di_key, strncmp(name, ctx->name, name_len) == 0) { ctx->found_idx = num; ctx->found_data_len = data_len; - ctx->found_data = kmemdup(data, data_len, GFP_NOFS); + ctx->found_data = kmemdup(data, data_len, GFP_KERNEL); if (!ctx->found_data) return -ENOMEM; return 1; @@ -4448,9 +4449,9 @@ static ssize_t fill_read_buf(struct send_ctx *sctx, u64 offset, u32 len) struct page *page; char *addr; struct btrfs_key key; - pgoff_t index = offset >> PAGE_CACHE_SHIFT; + pgoff_t index = offset >> PAGE_SHIFT; pgoff_t last_index; - unsigned pg_offset = offset & ~PAGE_CACHE_MASK; + unsigned pg_offset = offset & ~PAGE_MASK; ssize_t ret = 0; key.objectid = sctx->cur_ino; @@ -4470,7 +4471,7 @@ static ssize_t fill_read_buf(struct send_ctx *sctx, u64 offset, u32 len) if (len == 0) goto out; - last_index = (offset + len - 1) >> PAGE_CACHE_SHIFT; + last_index = (offset + len - 1) >> PAGE_SHIFT; /* initial readahead */ memset(&sctx->ra, 0, sizeof(struct file_ra_state)); @@ -4480,8 +4481,8 @@ static ssize_t fill_read_buf(struct send_ctx *sctx, u64 offset, u32 len) while (index <= last_index) { unsigned cur_len = min_t(unsigned, len, - PAGE_CACHE_SIZE - pg_offset); - page = find_or_create_page(inode->i_mapping, index, GFP_NOFS); + PAGE_SIZE - pg_offset); + page = find_or_create_page(inode->i_mapping, index, GFP_KERNEL); if (!page) { ret = -ENOMEM; break; @@ -4492,7 +4493,7 @@ static ssize_t fill_read_buf(struct send_ctx *sctx, u64 offset, u32 len) lock_page(page); if (!PageUptodate(page)) { unlock_page(page); - page_cache_release(page); + put_page(page); ret = -EIO; break; } @@ -4502,7 +4503,7 @@ static ssize_t fill_read_buf(struct send_ctx *sctx, u64 offset, u32 len) memcpy(sctx->read_buf + ret, addr + pg_offset, cur_len); kunmap(page); unlock_page(page); - page_cache_release(page); + put_page(page); index++; pg_offset = 0; len -= cur_len; @@ -4803,7 +4804,7 @@ static int clone_range(struct send_ctx *sctx, type = btrfs_file_extent_type(leaf, ei); if (type == BTRFS_FILE_EXTENT_INLINE) { ext_len = btrfs_file_extent_inline_len(leaf, slot, ei); - ext_len = PAGE_CACHE_ALIGN(ext_len); + ext_len = PAGE_ALIGN(ext_len); } else { ext_len = btrfs_file_extent_num_bytes(leaf, ei); } @@ -4885,7 +4886,7 @@ static int send_write_or_clone(struct send_ctx *sctx, * but there may be items after this page. Make * sure to send the whole thing */ - len = PAGE_CACHE_ALIGN(len); + len = PAGE_ALIGN(len); } else { len = btrfs_file_extent_num_bytes(path->nodes[0], ei); } @@ -5989,7 +5990,7 @@ long btrfs_ioctl_send(struct file *mnt_file, void __user *arg_) goto out; } - sctx = kzalloc(sizeof(struct send_ctx), GFP_NOFS); + sctx = kzalloc(sizeof(struct send_ctx), GFP_KERNEL); if (!sctx) { ret = -ENOMEM; goto out; @@ -5997,7 +5998,7 @@ long btrfs_ioctl_send(struct file *mnt_file, void __user *arg_) INIT_LIST_HEAD(&sctx->new_refs); INIT_LIST_HEAD(&sctx->deleted_refs); - INIT_RADIX_TREE(&sctx->name_cache, GFP_NOFS); + INIT_RADIX_TREE(&sctx->name_cache, GFP_KERNEL); INIT_LIST_HEAD(&sctx->name_cache_list); sctx->flags = arg->flags; diff --git a/fs/btrfs/struct-funcs.c b/fs/btrfs/struct-funcs.c index b976597b0..e05619f24 100644 --- a/fs/btrfs/struct-funcs.c +++ b/fs/btrfs/struct-funcs.c @@ -66,7 +66,7 @@ u##bits btrfs_get_token_##bits(struct extent_buffer *eb, void *ptr, \ \ if (token && token->kaddr && token->offset <= offset && \ token->eb == eb && \ - (token->offset + PAGE_CACHE_SIZE >= offset + size)) { \ + (token->offset + PAGE_SIZE >= offset + size)) { \ kaddr = token->kaddr; \ p = kaddr + part_offset - token->offset; \ res = get_unaligned_le##bits(p + off); \ @@ -104,7 +104,7 @@ void btrfs_set_token_##bits(struct extent_buffer *eb, \ \ if (token && token->kaddr && token->offset <= offset && \ token->eb == eb && \ - (token->offset + PAGE_CACHE_SIZE >= offset + size)) { \ + (token->offset + PAGE_SIZE >= offset + size)) { \ kaddr = token->kaddr; \ p = kaddr + part_offset - token->offset; \ put_unaligned_le##bits(val, p + off); \ diff --git a/fs/btrfs/super.c b/fs/btrfs/super.c index d41e09fe8..00b8f37cc 100644 --- a/fs/btrfs/super.c +++ b/fs/btrfs/super.c @@ -303,7 +303,8 @@ enum { Opt_check_integrity_print_mask, Opt_fatal_errors, Opt_rescan_uuid_tree, Opt_commit_interval, Opt_barrier, Opt_nodefrag, Opt_nodiscard, Opt_noenospc_debug, Opt_noflushoncommit, Opt_acl, Opt_datacow, - Opt_datasum, Opt_treelog, Opt_noinode_cache, + Opt_datasum, Opt_treelog, Opt_noinode_cache, Opt_usebackuproot, + Opt_nologreplay, Opt_norecovery, #ifdef CONFIG_BTRFS_DEBUG Opt_fragment_data, Opt_fragment_metadata, Opt_fragment_all, #endif @@ -335,6 +336,8 @@ static const match_table_t tokens = { {Opt_noacl, "noacl"}, {Opt_notreelog, "notreelog"}, {Opt_treelog, "treelog"}, + {Opt_nologreplay, "nologreplay"}, + {Opt_norecovery, "norecovery"}, {Opt_flushoncommit, "flushoncommit"}, {Opt_noflushoncommit, "noflushoncommit"}, {Opt_ratio, "metadata_ratio=%d"}, @@ -352,7 +355,8 @@ static const match_table_t tokens = { {Opt_inode_cache, "inode_cache"}, {Opt_noinode_cache, "noinode_cache"}, {Opt_no_space_cache, "nospace_cache"}, - {Opt_recovery, "recovery"}, + {Opt_recovery, "recovery"}, /* deprecated */ + {Opt_usebackuproot, "usebackuproot"}, {Opt_skip_balance, "skip_balance"}, {Opt_check_integrity, "check_int"}, {Opt_check_integrity_including_extent_data, "check_int_data"}, @@ -373,7 +377,8 @@ static const match_table_t tokens = { * reading in a new superblock is parsed here. * XXX JDM: This needs to be cleaned up for remount. */ -int btrfs_parse_options(struct btrfs_root *root, char *options) +int btrfs_parse_options(struct btrfs_root *root, char *options, + unsigned long new_flags) { struct btrfs_fs_info *info = root->fs_info; substring_t args[MAX_OPT_ARGS]; @@ -393,8 +398,12 @@ int btrfs_parse_options(struct btrfs_root *root, char *options) else if (cache_gen) btrfs_set_opt(info->mount_opt, SPACE_CACHE); + /* + * Even the options are empty, we still need to do extra check + * against new flags + */ if (!options) - goto out; + goto check; /* * strsep changes the string, duplicate it because parse_options @@ -606,6 +615,11 @@ int btrfs_parse_options(struct btrfs_root *root, char *options) btrfs_clear_and_info(root, NOTREELOG, "enabling tree log"); break; + case Opt_norecovery: + case Opt_nologreplay: + btrfs_set_and_info(root, NOLOGREPLAY, + "disabling log replay at mount time"); + break; case Opt_flushoncommit: btrfs_set_and_info(root, FLUSHONCOMMIT, "turning on flush-on-commit"); @@ -696,8 +710,12 @@ int btrfs_parse_options(struct btrfs_root *root, char *options) "disabling auto defrag"); break; case Opt_recovery: - btrfs_info(root->fs_info, "enabling auto recovery"); - btrfs_set_opt(info->mount_opt, RECOVERY); + btrfs_warn(root->fs_info, + "'recovery' is deprecated, use 'usebackuproot' instead"); + case Opt_usebackuproot: + btrfs_info(root->fs_info, + "trying to use backup root at mount time"); + btrfs_set_opt(info->mount_opt, USEBACKUPROOT); break; case Opt_skip_balance: btrfs_set_opt(info->mount_opt, SKIP_BALANCE); @@ -792,6 +810,15 @@ int btrfs_parse_options(struct btrfs_root *root, char *options) break; } } +check: + /* + * Extra check for current option against current flag + */ + if (btrfs_test_opt(root, NOLOGREPLAY) && !(new_flags & MS_RDONLY)) { + btrfs_err(root->fs_info, + "nologreplay must be used with ro mount option"); + ret = -EINVAL; + } out: if (btrfs_fs_compat_ro(root->fs_info, FREE_SPACE_TREE) && !btrfs_test_opt(root, FREE_SPACE_TREE) && @@ -1202,6 +1229,8 @@ static int btrfs_show_options(struct seq_file *seq, struct dentry *dentry) seq_puts(seq, ",ssd"); if (btrfs_test_opt(root, NOTREELOG)) seq_puts(seq, ",notreelog"); + if (btrfs_test_opt(root, NOLOGREPLAY)) + seq_puts(seq, ",nologreplay"); if (btrfs_test_opt(root, FLUSHONCOMMIT)) seq_puts(seq, ",flushoncommit"); if (btrfs_test_opt(root, DISCARD)) @@ -1228,8 +1257,6 @@ static int btrfs_show_options(struct seq_file *seq, struct dentry *dentry) seq_puts(seq, ",inode_cache"); if (btrfs_test_opt(root, SKIP_BALANCE)) seq_puts(seq, ",skip_balance"); - if (btrfs_test_opt(root, RECOVERY)) - seq_puts(seq, ",recovery"); #ifdef CONFIG_BTRFS_FS_CHECK_INTEGRITY if (btrfs_test_opt(root, CHECK_INTEGRITY_INCLUDING_EXTENT_DATA)) seq_puts(seq, ",check_int_data"); @@ -1685,7 +1712,7 @@ static int btrfs_remount(struct super_block *sb, int *flags, char *data) } } - ret = btrfs_parse_options(root, data); + ret = btrfs_parse_options(root, data, *flags); if (ret) { ret = -EINVAL; goto restore; @@ -2163,6 +2190,9 @@ static long btrfs_control_ioctl(struct file *file, unsigned int cmd, break; ret = !(fs_devices->num_devices == fs_devices->total_devices); break; + case BTRFS_IOC_GET_SUPPORTED_FEATURES: + ret = btrfs_ioctl_get_supported_features((void __user*)arg); + break; } kfree(vol); @@ -2261,7 +2291,7 @@ static void btrfs_interface_exit(void) misc_deregister(&btrfs_misc); } -static void btrfs_print_info(void) +static void btrfs_print_mod_info(void) { printk(KERN_INFO "Btrfs loaded" #ifdef CONFIG_BTRFS_DEBUG @@ -2363,7 +2393,7 @@ static int __init init_btrfs_fs(void) btrfs_init_lockdep(); - btrfs_print_info(); + btrfs_print_mod_info(); err = btrfs_run_sanity_tests(); if (err) diff --git a/fs/btrfs/tests/btrfs-tests.c b/fs/btrfs/tests/btrfs-tests.c index d39f714da..f54bf450b 100644 --- a/fs/btrfs/tests/btrfs-tests.c +++ b/fs/btrfs/tests/btrfs-tests.c @@ -137,7 +137,6 @@ static void btrfs_free_dummy_fs_info(struct btrfs_fs_info *fs_info) void **slot; spin_lock(&fs_info->buffer_lock); -restart: radix_tree_for_each_slot(slot, &fs_info->buffer_radix, &iter, 0) { struct extent_buffer *eb; @@ -147,7 +146,7 @@ restart: /* Shouldn't happen but that kind of thinking creates CVE's */ if (radix_tree_exception(eb)) { if (radix_tree_deref_retry(eb)) - goto restart; + slot = radix_tree_iter_retry(&iter); continue; } spin_unlock(&fs_info->buffer_lock); diff --git a/fs/btrfs/tests/extent-io-tests.c b/fs/btrfs/tests/extent-io-tests.c index 669b58201..70948b13b 100644 --- a/fs/btrfs/tests/extent-io-tests.c +++ b/fs/btrfs/tests/extent-io-tests.c @@ -32,8 +32,8 @@ static noinline int process_page_range(struct inode *inode, u64 start, u64 end, { int ret; struct page *pages[16]; - unsigned long index = start >> PAGE_CACHE_SHIFT; - unsigned long end_index = end >> PAGE_CACHE_SHIFT; + unsigned long index = start >> PAGE_SHIFT; + unsigned long end_index = end >> PAGE_SHIFT; unsigned long nr_pages = end_index - index + 1; int i; int count = 0; @@ -49,9 +49,9 @@ static noinline int process_page_range(struct inode *inode, u64 start, u64 end, count++; if (flags & PROCESS_UNLOCK && PageLocked(pages[i])) unlock_page(pages[i]); - page_cache_release(pages[i]); + put_page(pages[i]); if (flags & PROCESS_RELEASE) - page_cache_release(pages[i]); + put_page(pages[i]); } nr_pages -= ret; index += ret; @@ -93,7 +93,7 @@ static int test_find_delalloc(void) * everything to make sure our pages don't get evicted and screw up our * test. */ - for (index = 0; index < (total_dirty >> PAGE_CACHE_SHIFT); index++) { + for (index = 0; index < (total_dirty >> PAGE_SHIFT); index++) { page = find_or_create_page(inode->i_mapping, index, GFP_KERNEL); if (!page) { test_msg("Failed to allocate test page\n"); @@ -104,7 +104,7 @@ static int test_find_delalloc(void) if (index) { unlock_page(page); } else { - page_cache_get(page); + get_page(page); locked_page = page; } } @@ -129,7 +129,7 @@ static int test_find_delalloc(void) } unlock_extent(&tmp, start, end); unlock_page(locked_page); - page_cache_release(locked_page); + put_page(locked_page); /* * Test this scenario @@ -139,7 +139,7 @@ static int test_find_delalloc(void) */ test_start = SZ_64M; locked_page = find_lock_page(inode->i_mapping, - test_start >> PAGE_CACHE_SHIFT); + test_start >> PAGE_SHIFT); if (!locked_page) { test_msg("Couldn't find the locked page\n"); goto out_bits; @@ -165,7 +165,7 @@ static int test_find_delalloc(void) } unlock_extent(&tmp, start, end); /* locked_page was unlocked above */ - page_cache_release(locked_page); + put_page(locked_page); /* * Test this scenario @@ -174,7 +174,7 @@ static int test_find_delalloc(void) */ test_start = max_bytes + 4096; locked_page = find_lock_page(inode->i_mapping, test_start >> - PAGE_CACHE_SHIFT); + PAGE_SHIFT); if (!locked_page) { test_msg("Could'nt find the locked page\n"); goto out_bits; @@ -225,13 +225,13 @@ static int test_find_delalloc(void) * range we want to find. */ page = find_get_page(inode->i_mapping, - (max_bytes + SZ_1M) >> PAGE_CACHE_SHIFT); + (max_bytes + SZ_1M) >> PAGE_SHIFT); if (!page) { test_msg("Couldn't find our page\n"); goto out_bits; } ClearPageDirty(page); - page_cache_release(page); + put_page(page); /* We unlocked it in the previous test */ lock_page(locked_page); @@ -239,7 +239,7 @@ static int test_find_delalloc(void) end = 0; /* * Currently if we fail to find dirty pages in the delalloc range we - * will adjust max_bytes down to PAGE_CACHE_SIZE and then re-search. If + * will adjust max_bytes down to PAGE_SIZE and then re-search. If * this changes at any point in the future we will need to fix this * tests expected behavior. */ @@ -249,9 +249,9 @@ static int test_find_delalloc(void) test_msg("Didn't find our range\n"); goto out_bits; } - if (start != test_start && end != test_start + PAGE_CACHE_SIZE - 1) { + if (start != test_start && end != test_start + PAGE_SIZE - 1) { test_msg("Expected start %Lu end %Lu, got start %Lu end %Lu\n", - test_start, test_start + PAGE_CACHE_SIZE - 1, start, + test_start, test_start + PAGE_SIZE - 1, start, end); goto out_bits; } @@ -265,7 +265,7 @@ out_bits: clear_extent_bits(&tmp, 0, total_dirty - 1, (unsigned)-1, GFP_KERNEL); out: if (locked_page) - page_cache_release(locked_page); + put_page(locked_page); process_page_range(inode, 0, total_dirty - 1, PROCESS_UNLOCK | PROCESS_RELEASE); iput(inode); @@ -298,9 +298,9 @@ static int __test_eb_bitmaps(unsigned long *bitmap, struct extent_buffer *eb, return -EINVAL; } - bitmap_set(bitmap, (PAGE_CACHE_SIZE - sizeof(long) / 2) * BITS_PER_BYTE, + bitmap_set(bitmap, (PAGE_SIZE - sizeof(long) / 2) * BITS_PER_BYTE, sizeof(long) * BITS_PER_BYTE); - extent_buffer_bitmap_set(eb, PAGE_CACHE_SIZE - sizeof(long) / 2, 0, + extent_buffer_bitmap_set(eb, PAGE_SIZE - sizeof(long) / 2, 0, sizeof(long) * BITS_PER_BYTE); if (memcmp_extent_buffer(eb, bitmap, 0, len) != 0) { test_msg("Setting straddling pages failed\n"); @@ -309,10 +309,10 @@ static int __test_eb_bitmaps(unsigned long *bitmap, struct extent_buffer *eb, bitmap_set(bitmap, 0, len * BITS_PER_BYTE); bitmap_clear(bitmap, - (PAGE_CACHE_SIZE - sizeof(long) / 2) * BITS_PER_BYTE, + (PAGE_SIZE - sizeof(long) / 2) * BITS_PER_BYTE, sizeof(long) * BITS_PER_BYTE); extent_buffer_bitmap_set(eb, 0, 0, len * BITS_PER_BYTE); - extent_buffer_bitmap_clear(eb, PAGE_CACHE_SIZE - sizeof(long) / 2, 0, + extent_buffer_bitmap_clear(eb, PAGE_SIZE - sizeof(long) / 2, 0, sizeof(long) * BITS_PER_BYTE); if (memcmp_extent_buffer(eb, bitmap, 0, len) != 0) { test_msg("Clearing straddling pages failed\n"); @@ -353,7 +353,7 @@ static int __test_eb_bitmaps(unsigned long *bitmap, struct extent_buffer *eb, static int test_eb_bitmaps(void) { - unsigned long len = PAGE_CACHE_SIZE * 4; + unsigned long len = PAGE_SIZE * 4; unsigned long *bitmap; struct extent_buffer *eb; int ret; @@ -379,7 +379,7 @@ static int test_eb_bitmaps(void) /* Do it over again with an extent buffer which isn't page-aligned. */ free_extent_buffer(eb); - eb = __alloc_dummy_extent_buffer(NULL, PAGE_CACHE_SIZE / 2, len); + eb = __alloc_dummy_extent_buffer(NULL, PAGE_SIZE / 2, len); if (!eb) { test_msg("Couldn't allocate test extent buffer\n"); kfree(bitmap); diff --git a/fs/btrfs/tests/free-space-tests.c b/fs/btrfs/tests/free-space-tests.c index c9ad97b1e..514247515 100644 --- a/fs/btrfs/tests/free-space-tests.c +++ b/fs/btrfs/tests/free-space-tests.c @@ -22,7 +22,7 @@ #include "../disk-io.h" #include "../free-space-cache.h" -#define BITS_PER_BITMAP (PAGE_CACHE_SIZE * 8) +#define BITS_PER_BITMAP (PAGE_SIZE * 8) /* * This test just does basic sanity checking, making sure we can add an exten diff --git a/fs/btrfs/tests/inode-tests.c b/fs/btrfs/tests/inode-tests.c index e2d3da02d..863a6a3af 100644 --- a/fs/btrfs/tests/inode-tests.c +++ b/fs/btrfs/tests/inode-tests.c @@ -22,6 +22,7 @@ #include "../disk-io.h" #include "../extent_io.h" #include "../volumes.h" +#include "../compression.h" static void insert_extent(struct btrfs_root *root, u64 start, u64 len, u64 ram_bytes, u64 offset, u64 disk_bytenr, diff --git a/fs/btrfs/transaction.c b/fs/btrfs/transaction.c index b6031ce47..43885e51b 100644 --- a/fs/btrfs/transaction.c +++ b/fs/btrfs/transaction.c @@ -637,6 +637,8 @@ struct btrfs_trans_handle *btrfs_start_transaction_fallback_global_rsv( trans->block_rsv = &root->fs_info->trans_block_rsv; trans->bytes_reserved = num_bytes; + trace_btrfs_space_reservation(root->fs_info, "transaction", + trans->transid, num_bytes, 1); return trans; } @@ -1333,7 +1335,7 @@ static noinline int create_pending_snapshot(struct btrfs_trans_handle *trans, struct dentry *dentry; struct extent_buffer *tmp; struct extent_buffer *old; - struct timespec cur_time = CURRENT_TIME; + struct timespec cur_time; int ret = 0; u64 to_reserve = 0; u64 index = 0; @@ -1375,12 +1377,16 @@ static noinline int create_pending_snapshot(struct btrfs_trans_handle *trans, rsv = trans->block_rsv; trans->block_rsv = &pending->block_rsv; trans->bytes_reserved = trans->block_rsv->reserved; - + trace_btrfs_space_reservation(root->fs_info, "transaction", + trans->transid, + trans->bytes_reserved, 1); dentry = pending->dentry; parent_inode = pending->dir; parent_root = BTRFS_I(parent_inode)->root; record_root_in_trans(trans, parent_root); + cur_time = current_fs_time(parent_inode->i_sb); + /* * insert the directory item */ @@ -1523,7 +1529,8 @@ static noinline int create_pending_snapshot(struct btrfs_trans_handle *trans, btrfs_i_size_write(parent_inode, parent_inode->i_size + dentry->d_name.len * 2); - parent_inode->i_mtime = parent_inode->i_ctime = CURRENT_TIME; + parent_inode->i_mtime = parent_inode->i_ctime = + current_fs_time(parent_inode->i_sb); ret = btrfs_update_inode_fallback(trans, parent_root, parent_inode); if (ret) { btrfs_abort_transaction(trans, root, ret); diff --git a/fs/btrfs/tree-log.c b/fs/btrfs/tree-log.c index 849a30aa1..517d0ccb3 100644 --- a/fs/btrfs/tree-log.c +++ b/fs/btrfs/tree-log.c @@ -26,6 +26,7 @@ #include "print-tree.h" #include "backref.h" #include "hash.h" +#include "compression.h" /* magic values for the inode_only field in btrfs_log_inode: * @@ -1045,7 +1046,7 @@ again: /* * NOTE: we have searched root tree and checked the - * coresponding ref, it does not need to check again. + * corresponding ref, it does not need to check again. */ *search_done = 1; } @@ -4621,7 +4622,22 @@ static int btrfs_log_inode(struct btrfs_trans_handle *trans, mutex_lock(&BTRFS_I(inode)->log_mutex); - btrfs_get_logged_extents(inode, &logged_list, start, end); + /* + * Collect ordered extents only if we are logging data. This is to + * ensure a subsequent request to log this inode in LOG_INODE_ALL mode + * will process the ordered extents if they still exists at the time, + * because when we collect them we test and set for the flag + * BTRFS_ORDERED_LOGGED to prevent multiple log requests to process the + * same ordered extents. The consequence for the LOG_INODE_ALL log mode + * not processing the ordered extents is that we end up logging the + * corresponding file extent items, based on the extent maps in the + * inode's extent_map_tree's modified_list, without logging the + * respective checksums (since the may still be only attached to the + * ordered extents and have not been inserted in the csum tree by + * btrfs_finish_ordered_io() yet). + */ + if (inode_only == LOG_INODE_ALL) + btrfs_get_logged_extents(inode, &logged_list, start, end); /* * a brute force approach to making sure we get the most uptodate @@ -4909,6 +4925,42 @@ out_unlock: } /* + * Check if we must fallback to a transaction commit when logging an inode. + * This must be called after logging the inode and is used only in the context + * when fsyncing an inode requires the need to log some other inode - in which + * case we can't lock the i_mutex of each other inode we need to log as that + * can lead to deadlocks with concurrent fsync against other inodes (as we can + * log inodes up or down in the hierarchy) or rename operations for example. So + * we take the log_mutex of the inode after we have logged it and then check for + * its last_unlink_trans value - this is safe because any task setting + * last_unlink_trans must take the log_mutex and it must do this before it does + * the actual unlink operation, so if we do this check before a concurrent task + * sets last_unlink_trans it means we've logged a consistent version/state of + * all the inode items, otherwise we are not sure and must do a transaction + * commit (the concurrent task migth have only updated last_unlink_trans before + * we logged the inode or it might have also done the unlink). + */ +static bool btrfs_must_commit_transaction(struct btrfs_trans_handle *trans, + struct inode *inode) +{ + struct btrfs_fs_info *fs_info = BTRFS_I(inode)->root->fs_info; + bool ret = false; + + mutex_lock(&BTRFS_I(inode)->log_mutex); + if (BTRFS_I(inode)->last_unlink_trans > fs_info->last_trans_committed) { + /* + * Make sure any commits to the log are forced to be full + * commits. + */ + btrfs_set_log_full_commit(fs_info, trans); + ret = true; + } + mutex_unlock(&BTRFS_I(inode)->log_mutex); + + return ret; +} + +/* * follow the dentry parent pointers up the chain and see if any * of the directories in it require a full commit before they can * be logged. Returns zero if nothing special needs to be done or 1 if @@ -4921,7 +4973,6 @@ static noinline int check_parent_dirs_for_sync(struct btrfs_trans_handle *trans, u64 last_committed) { int ret = 0; - struct btrfs_root *root; struct dentry *old_parent = NULL; struct inode *orig_inode = inode; @@ -4953,14 +5004,7 @@ static noinline int check_parent_dirs_for_sync(struct btrfs_trans_handle *trans, BTRFS_I(inode)->logged_trans = trans->transid; smp_mb(); - if (BTRFS_I(inode)->last_unlink_trans > last_committed) { - root = BTRFS_I(inode)->root; - - /* - * make sure any commits to the log are forced - * to be full commits - */ - btrfs_set_log_full_commit(root->fs_info, trans); + if (btrfs_must_commit_transaction(trans, inode)) { ret = 1; break; } @@ -5119,6 +5163,9 @@ process_leaf: btrfs_release_path(path); ret = btrfs_log_inode(trans, root, di_inode, log_mode, 0, LLONG_MAX, ctx); + if (!ret && + btrfs_must_commit_transaction(trans, di_inode)) + ret = 1; iput(di_inode); if (ret) goto next_dir_inode; @@ -5233,6 +5280,9 @@ static int btrfs_log_all_parents(struct btrfs_trans_handle *trans, ret = btrfs_log_inode(trans, root, dir_inode, LOG_INODE_ALL, 0, LLONG_MAX, ctx); + if (!ret && + btrfs_must_commit_transaction(trans, dir_inode)) + ret = 1; iput(dir_inode); if (ret) goto out; @@ -5584,6 +5634,9 @@ error: * They revolve around files there were unlinked from the directory, and * this function updates the parent directory so that a full commit is * properly done if it is fsync'd later after the unlinks are done. + * + * Must be called before the unlink operations (updates to the subvolume tree, + * inodes, etc) are done. */ void btrfs_record_unlink_dir(struct btrfs_trans_handle *trans, struct inode *dir, struct inode *inode, @@ -5599,8 +5652,11 @@ void btrfs_record_unlink_dir(struct btrfs_trans_handle *trans, * into the file. When the file is logged we check it and * don't log the parents if the file is fully on disk. */ - if (S_ISREG(inode->i_mode)) + if (S_ISREG(inode->i_mode)) { + mutex_lock(&BTRFS_I(inode)->log_mutex); BTRFS_I(inode)->last_unlink_trans = trans->transid; + mutex_unlock(&BTRFS_I(inode)->log_mutex); + } /* * if this directory was already logged any new @@ -5631,7 +5687,29 @@ void btrfs_record_unlink_dir(struct btrfs_trans_handle *trans, return; record: + mutex_lock(&BTRFS_I(dir)->log_mutex); + BTRFS_I(dir)->last_unlink_trans = trans->transid; + mutex_unlock(&BTRFS_I(dir)->log_mutex); +} + +/* + * Make sure that if someone attempts to fsync the parent directory of a deleted + * snapshot, it ends up triggering a transaction commit. This is to guarantee + * that after replaying the log tree of the parent directory's root we will not + * see the snapshot anymore and at log replay time we will not see any log tree + * corresponding to the deleted snapshot's root, which could lead to replaying + * it after replaying the log tree of the parent directory (which would replay + * the snapshot delete operation). + * + * Must be called before the actual snapshot destroy operation (updates to the + * parent root and tree of tree roots trees, etc) are done. + */ +void btrfs_record_snapshot_destroy(struct btrfs_trans_handle *trans, + struct inode *dir) +{ + mutex_lock(&BTRFS_I(dir)->log_mutex); BTRFS_I(dir)->last_unlink_trans = trans->transid; + mutex_unlock(&BTRFS_I(dir)->log_mutex); } /* diff --git a/fs/btrfs/tree-log.h b/fs/btrfs/tree-log.h index 6916a781e..a9f1b75d0 100644 --- a/fs/btrfs/tree-log.h +++ b/fs/btrfs/tree-log.h @@ -79,6 +79,8 @@ int btrfs_pin_log_trans(struct btrfs_root *root); void btrfs_record_unlink_dir(struct btrfs_trans_handle *trans, struct inode *dir, struct inode *inode, int for_rename); +void btrfs_record_snapshot_destroy(struct btrfs_trans_handle *trans, + struct inode *dir); int btrfs_log_new_name(struct btrfs_trans_handle *trans, struct inode *inode, struct inode *old_dir, struct dentry *parent); diff --git a/fs/btrfs/volumes.c b/fs/btrfs/volumes.c index 366b33594..bd0f45fb3 100644 --- a/fs/btrfs/volumes.c +++ b/fs/btrfs/volumes.c @@ -138,7 +138,7 @@ static struct btrfs_fs_devices *__alloc_fs_devices(void) { struct btrfs_fs_devices *fs_devs; - fs_devs = kzalloc(sizeof(*fs_devs), GFP_NOFS); + fs_devs = kzalloc(sizeof(*fs_devs), GFP_KERNEL); if (!fs_devs) return ERR_PTR(-ENOMEM); @@ -220,7 +220,7 @@ static struct btrfs_device *__alloc_device(void) { struct btrfs_device *dev; - dev = kzalloc(sizeof(*dev), GFP_NOFS); + dev = kzalloc(sizeof(*dev), GFP_KERNEL); if (!dev) return ERR_PTR(-ENOMEM); @@ -733,7 +733,8 @@ static struct btrfs_fs_devices *clone_fs_devices(struct btrfs_fs_devices *orig) * uuid mutex so nothing we touch in here is going to disappear. */ if (orig_dev->name) { - name = rcu_string_strdup(orig_dev->name->str, GFP_NOFS); + name = rcu_string_strdup(orig_dev->name->str, + GFP_KERNEL); if (!name) { kfree(device); goto error; @@ -1024,16 +1025,16 @@ int btrfs_scan_one_device(const char *path, fmode_t flags, void *holder, } /* make sure our super fits in the device */ - if (bytenr + PAGE_CACHE_SIZE >= i_size_read(bdev->bd_inode)) + if (bytenr + PAGE_SIZE >= i_size_read(bdev->bd_inode)) goto error_bdev_put; /* make sure our super fits in the page */ - if (sizeof(*disk_super) > PAGE_CACHE_SIZE) + if (sizeof(*disk_super) > PAGE_SIZE) goto error_bdev_put; /* make sure our super doesn't straddle pages on disk */ - index = bytenr >> PAGE_CACHE_SHIFT; - if ((bytenr + sizeof(*disk_super) - 1) >> PAGE_CACHE_SHIFT != index) + index = bytenr >> PAGE_SHIFT; + if ((bytenr + sizeof(*disk_super) - 1) >> PAGE_SHIFT != index) goto error_bdev_put; /* pull in the page with our super */ @@ -1046,7 +1047,7 @@ int btrfs_scan_one_device(const char *path, fmode_t flags, void *holder, p = kmap(page); /* align our pointer to the offset of the super block */ - disk_super = p + (bytenr & ~PAGE_CACHE_MASK); + disk_super = p + (bytenr & ~PAGE_MASK); if (btrfs_super_bytenr(disk_super) != bytenr || btrfs_super_magic(disk_super) != BTRFS_MAGIC) @@ -1074,7 +1075,7 @@ int btrfs_scan_one_device(const char *path, fmode_t flags, void *holder, error_unmap: kunmap(page); - page_cache_release(page); + put_page(page); error_bdev_put: blkdev_put(bdev, flags); @@ -1714,12 +1715,12 @@ int btrfs_rm_device(struct btrfs_root *root, char *device_path) } while (read_seqretry(&root->fs_info->profiles_lock, seq)); num_devices = root->fs_info->fs_devices->num_devices; - btrfs_dev_replace_lock(&root->fs_info->dev_replace); + btrfs_dev_replace_lock(&root->fs_info->dev_replace, 0); if (btrfs_dev_replace_is_ongoing(&root->fs_info->dev_replace)) { WARN_ON(num_devices < 1); num_devices--; } - btrfs_dev_replace_unlock(&root->fs_info->dev_replace); + btrfs_dev_replace_unlock(&root->fs_info->dev_replace, 0); if ((all_avail & BTRFS_BLOCK_GROUP_RAID10) && num_devices <= 4) { ret = BTRFS_ERROR_DEV_RAID10_MIN_NOT_MET; @@ -2287,7 +2288,7 @@ int btrfs_init_new_device(struct btrfs_root *root, char *device_path) goto error; } - name = rcu_string_strdup(device_path, GFP_NOFS); + name = rcu_string_strdup(device_path, GFP_KERNEL); if (!name) { kfree(device); ret = -ENOMEM; @@ -2748,7 +2749,7 @@ int btrfs_remove_chunk(struct btrfs_trans_handle *trans, em->start + em->len < chunk_offset) { /* * This is a logic error, but we don't want to just rely on the - * user having built with ASSERT enabled, so if ASSERT doens't + * user having built with ASSERT enabled, so if ASSERT doesn't * do anything we still error out. */ ASSERT(0); @@ -2966,7 +2967,7 @@ static int insert_balance_item(struct btrfs_root *root, } key.objectid = BTRFS_BALANCE_OBJECTID; - key.type = BTRFS_BALANCE_ITEM_KEY; + key.type = BTRFS_TEMPORARY_ITEM_KEY; key.offset = 0; ret = btrfs_insert_empty_item(trans, root, path, &key, @@ -3015,7 +3016,7 @@ static int del_balance_item(struct btrfs_root *root) } key.objectid = BTRFS_BALANCE_OBJECTID; - key.type = BTRFS_BALANCE_ITEM_KEY; + key.type = BTRFS_TEMPORARY_ITEM_KEY; key.offset = 0; ret = btrfs_search_slot(trans, root, &key, path, -1, 1); @@ -3686,12 +3687,12 @@ int btrfs_balance(struct btrfs_balance_control *bctl, } num_devices = fs_info->fs_devices->num_devices; - btrfs_dev_replace_lock(&fs_info->dev_replace); + btrfs_dev_replace_lock(&fs_info->dev_replace, 0); if (btrfs_dev_replace_is_ongoing(&fs_info->dev_replace)) { BUG_ON(num_devices < 1); num_devices--; } - btrfs_dev_replace_unlock(&fs_info->dev_replace); + btrfs_dev_replace_unlock(&fs_info->dev_replace, 0); allowed = BTRFS_AVAIL_ALLOC_BIT_SINGLE; if (num_devices == 1) allowed |= BTRFS_BLOCK_GROUP_DUP; @@ -3867,7 +3868,7 @@ int btrfs_recover_balance(struct btrfs_fs_info *fs_info) return -ENOMEM; key.objectid = BTRFS_BALANCE_OBJECTID; - key.type = BTRFS_BALANCE_ITEM_KEY; + key.type = BTRFS_TEMPORARY_ITEM_KEY; key.offset = 0; ret = btrfs_search_slot(NULL, fs_info->tree_root, &key, path, 0, 0); @@ -4118,7 +4119,7 @@ out: * Callback for btrfs_uuid_tree_iterate(). * returns: * 0 check succeeded, the entry is not outdated. - * < 0 if an error occured. + * < 0 if an error occurred. * > 0 if the check failed, which means the caller shall remove the entry. */ static int btrfs_check_uuid_tree_entry(struct btrfs_fs_info *fs_info, @@ -5062,10 +5063,10 @@ int btrfs_num_copies(struct btrfs_fs_info *fs_info, u64 logical, u64 len) ret = 1; free_extent_map(em); - btrfs_dev_replace_lock(&fs_info->dev_replace); + btrfs_dev_replace_lock(&fs_info->dev_replace, 0); if (btrfs_dev_replace_is_ongoing(&fs_info->dev_replace)) ret++; - btrfs_dev_replace_unlock(&fs_info->dev_replace); + btrfs_dev_replace_unlock(&fs_info->dev_replace, 0); return ret; } @@ -5325,10 +5326,12 @@ static int __btrfs_map_block(struct btrfs_fs_info *fs_info, int rw, if (!bbio_ret) goto out; - btrfs_dev_replace_lock(dev_replace); + btrfs_dev_replace_lock(dev_replace, 0); dev_replace_is_ongoing = btrfs_dev_replace_is_ongoing(dev_replace); if (!dev_replace_is_ongoing) - btrfs_dev_replace_unlock(dev_replace); + btrfs_dev_replace_unlock(dev_replace, 0); + else + btrfs_dev_replace_set_lock_blocking(dev_replace); if (dev_replace_is_ongoing && mirror_num == map->num_stripes + 1 && !(rw & (REQ_WRITE | REQ_DISCARD | REQ_GET_READ_MIRRORS)) && @@ -5751,8 +5754,10 @@ static int __btrfs_map_block(struct btrfs_fs_info *fs_info, int rw, bbio->mirror_num = map->num_stripes + 1; } out: - if (dev_replace_is_ongoing) - btrfs_dev_replace_unlock(dev_replace); + if (dev_replace_is_ongoing) { + btrfs_dev_replace_clear_lock_blocking(dev_replace); + btrfs_dev_replace_unlock(dev_replace, 0); + } free_extent_map(em); return ret; } @@ -6522,7 +6527,7 @@ int btrfs_read_sys_array(struct btrfs_root *root) * but sb spans only this function. Add an explicit SetPageUptodate call * to silence the warning eg. on PowerPC 64. */ - if (PAGE_CACHE_SIZE > BTRFS_SUPER_INFO_SIZE) + if (PAGE_SIZE > BTRFS_SUPER_INFO_SIZE) SetPageUptodate(sb->pages[0]); write_extent_buffer(sb, super_copy, 0, BTRFS_SUPER_INFO_SIZE); @@ -6705,8 +6710,8 @@ int btrfs_init_dev_stats(struct btrfs_fs_info *fs_info) int item_size; struct btrfs_dev_stats_item *ptr; - key.objectid = 0; - key.type = BTRFS_DEV_STATS_KEY; + key.objectid = BTRFS_DEV_STATS_OBJECTID; + key.type = BTRFS_PERSISTENT_ITEM_KEY; key.offset = device->devid; ret = btrfs_search_slot(NULL, dev_root, &key, path, 0, 0); if (ret) { @@ -6753,8 +6758,8 @@ static int update_dev_stat_item(struct btrfs_trans_handle *trans, int ret; int i; - key.objectid = 0; - key.type = BTRFS_DEV_STATS_KEY; + key.objectid = BTRFS_DEV_STATS_OBJECTID; + key.type = BTRFS_PERSISTENT_ITEM_KEY; key.offset = device->devid; path = btrfs_alloc_path(); diff --git a/fs/btrfs/xattr.c b/fs/btrfs/xattr.c index 6c68d6356..145d2b89e 100644 --- a/fs/btrfs/xattr.c +++ b/fs/btrfs/xattr.c @@ -249,7 +249,7 @@ int __btrfs_setxattr(struct btrfs_trans_handle *trans, goto out; inode_inc_iversion(inode); - inode->i_ctime = CURRENT_TIME; + inode->i_ctime = current_fs_time(inode->i_sb); set_bit(BTRFS_INODE_COPY_EVERYTHING, &BTRFS_I(inode)->runtime_flags); ret = btrfs_update_inode(trans, root, inode); BUG_ON(ret); @@ -260,16 +260,12 @@ out: ssize_t btrfs_listxattr(struct dentry *dentry, char *buffer, size_t size) { - struct btrfs_key key, found_key; + struct btrfs_key key; struct inode *inode = d_inode(dentry); struct btrfs_root *root = BTRFS_I(inode)->root; struct btrfs_path *path; - struct extent_buffer *leaf; - struct btrfs_dir_item *di; - int ret = 0, slot; + int ret = 0; size_t total_size = 0, size_left = size; - unsigned long name_ptr; - size_t name_len; /* * ok we want all objects associated with this id. @@ -291,6 +287,13 @@ ssize_t btrfs_listxattr(struct dentry *dentry, char *buffer, size_t size) goto err; while (1) { + struct extent_buffer *leaf; + int slot; + struct btrfs_dir_item *di; + struct btrfs_key found_key; + u32 item_size; + u32 cur; + leaf = path->nodes[0]; slot = path->slots[0]; @@ -316,31 +319,45 @@ ssize_t btrfs_listxattr(struct dentry *dentry, char *buffer, size_t size) if (found_key.type > BTRFS_XATTR_ITEM_KEY) break; if (found_key.type < BTRFS_XATTR_ITEM_KEY) - goto next; + goto next_item; di = btrfs_item_ptr(leaf, slot, struct btrfs_dir_item); - if (verify_dir_item(root, leaf, di)) - goto next; - - name_len = btrfs_dir_name_len(leaf, di); - total_size += name_len + 1; + item_size = btrfs_item_size_nr(leaf, slot); + cur = 0; + while (cur < item_size) { + u16 name_len = btrfs_dir_name_len(leaf, di); + u16 data_len = btrfs_dir_data_len(leaf, di); + u32 this_len = sizeof(*di) + name_len + data_len; + unsigned long name_ptr = (unsigned long)(di + 1); + + if (verify_dir_item(root, leaf, di)) { + ret = -EIO; + goto err; + } - /* we are just looking for how big our buffer needs to be */ - if (!size) - goto next; + total_size += name_len + 1; + /* + * We are just looking for how big our buffer needs to + * be. + */ + if (!size) + goto next; - if (!buffer || (name_len + 1) > size_left) { - ret = -ERANGE; - goto err; - } + if (!buffer || (name_len + 1) > size_left) { + ret = -ERANGE; + goto err; + } - name_ptr = (unsigned long)(di + 1); - read_extent_buffer(leaf, buffer, name_ptr, name_len); - buffer[name_len] = '\0'; + read_extent_buffer(leaf, buffer, name_ptr, name_len); + buffer[name_len] = '\0'; - size_left -= name_len + 1; - buffer += name_len + 1; + size_left -= name_len + 1; + buffer += name_len + 1; next: + cur += this_len; + di = (struct btrfs_dir_item *)((char *)di + this_len); + } +next_item: path->slots[0]++; } ret = total_size; diff --git a/fs/btrfs/zlib.c b/fs/btrfs/zlib.c index 82990b8f8..88d274e8e 100644 --- a/fs/btrfs/zlib.c +++ b/fs/btrfs/zlib.c @@ -59,7 +59,7 @@ static struct list_head *zlib_alloc_workspace(void) workspacesize = max(zlib_deflate_workspacesize(MAX_WBITS, MAX_MEM_LEVEL), zlib_inflate_workspacesize()); workspace->strm.workspace = vmalloc(workspacesize); - workspace->buf = kmalloc(PAGE_CACHE_SIZE, GFP_NOFS); + workspace->buf = kmalloc(PAGE_SIZE, GFP_NOFS); if (!workspace->strm.workspace || !workspace->buf) goto fail; @@ -103,7 +103,7 @@ static int zlib_compress_pages(struct list_head *ws, workspace->strm.total_in = 0; workspace->strm.total_out = 0; - in_page = find_get_page(mapping, start >> PAGE_CACHE_SHIFT); + in_page = find_get_page(mapping, start >> PAGE_SHIFT); data_in = kmap(in_page); out_page = alloc_page(GFP_NOFS | __GFP_HIGHMEM); @@ -117,8 +117,8 @@ static int zlib_compress_pages(struct list_head *ws, workspace->strm.next_in = data_in; workspace->strm.next_out = cpage_out; - workspace->strm.avail_out = PAGE_CACHE_SIZE; - workspace->strm.avail_in = min(len, PAGE_CACHE_SIZE); + workspace->strm.avail_out = PAGE_SIZE; + workspace->strm.avail_in = min(len, PAGE_SIZE); while (workspace->strm.total_in < len) { ret = zlib_deflate(&workspace->strm, Z_SYNC_FLUSH); @@ -156,7 +156,7 @@ static int zlib_compress_pages(struct list_head *ws, cpage_out = kmap(out_page); pages[nr_pages] = out_page; nr_pages++; - workspace->strm.avail_out = PAGE_CACHE_SIZE; + workspace->strm.avail_out = PAGE_SIZE; workspace->strm.next_out = cpage_out; } /* we're all done */ @@ -170,14 +170,14 @@ static int zlib_compress_pages(struct list_head *ws, bytes_left = len - workspace->strm.total_in; kunmap(in_page); - page_cache_release(in_page); + put_page(in_page); - start += PAGE_CACHE_SIZE; + start += PAGE_SIZE; in_page = find_get_page(mapping, - start >> PAGE_CACHE_SHIFT); + start >> PAGE_SHIFT); data_in = kmap(in_page); workspace->strm.avail_in = min(bytes_left, - PAGE_CACHE_SIZE); + PAGE_SIZE); workspace->strm.next_in = data_in; } } @@ -205,7 +205,7 @@ out: if (in_page) { kunmap(in_page); - page_cache_release(in_page); + put_page(in_page); } return ret; } @@ -223,18 +223,18 @@ static int zlib_decompress_biovec(struct list_head *ws, struct page **pages_in, size_t total_out = 0; unsigned long page_in_index = 0; unsigned long page_out_index = 0; - unsigned long total_pages_in = DIV_ROUND_UP(srclen, PAGE_CACHE_SIZE); + unsigned long total_pages_in = DIV_ROUND_UP(srclen, PAGE_SIZE); unsigned long buf_start; unsigned long pg_offset; data_in = kmap(pages_in[page_in_index]); workspace->strm.next_in = data_in; - workspace->strm.avail_in = min_t(size_t, srclen, PAGE_CACHE_SIZE); + workspace->strm.avail_in = min_t(size_t, srclen, PAGE_SIZE); workspace->strm.total_in = 0; workspace->strm.total_out = 0; workspace->strm.next_out = workspace->buf; - workspace->strm.avail_out = PAGE_CACHE_SIZE; + workspace->strm.avail_out = PAGE_SIZE; pg_offset = 0; /* If it's deflate, and it's got no preset dictionary, then @@ -274,7 +274,7 @@ static int zlib_decompress_biovec(struct list_head *ws, struct page **pages_in, } workspace->strm.next_out = workspace->buf; - workspace->strm.avail_out = PAGE_CACHE_SIZE; + workspace->strm.avail_out = PAGE_SIZE; if (workspace->strm.avail_in == 0) { unsigned long tmp; @@ -288,7 +288,7 @@ static int zlib_decompress_biovec(struct list_head *ws, struct page **pages_in, workspace->strm.next_in = data_in; tmp = srclen - workspace->strm.total_in; workspace->strm.avail_in = min(tmp, - PAGE_CACHE_SIZE); + PAGE_SIZE); } } if (ret != Z_STREAM_END) @@ -325,7 +325,7 @@ static int zlib_decompress(struct list_head *ws, unsigned char *data_in, workspace->strm.total_in = 0; workspace->strm.next_out = workspace->buf; - workspace->strm.avail_out = PAGE_CACHE_SIZE; + workspace->strm.avail_out = PAGE_SIZE; workspace->strm.total_out = 0; /* If it's deflate, and it's got no preset dictionary, then we can tell zlib to skip the adler32 check. */ @@ -368,8 +368,8 @@ static int zlib_decompress(struct list_head *ws, unsigned char *data_in, else buf_offset = 0; - bytes = min(PAGE_CACHE_SIZE - pg_offset, - PAGE_CACHE_SIZE - buf_offset); + bytes = min(PAGE_SIZE - pg_offset, + PAGE_SIZE - buf_offset); bytes = min(bytes, bytes_left); kaddr = kmap_atomic(dest_page); @@ -380,7 +380,7 @@ static int zlib_decompress(struct list_head *ws, unsigned char *data_in, bytes_left -= bytes; next: workspace->strm.next_out = workspace->buf; - workspace->strm.avail_out = PAGE_CACHE_SIZE; + workspace->strm.avail_out = PAGE_SIZE; } if (ret != Z_STREAM_END && bytes_left != 0) |