summaryrefslogtreecommitdiff
path: root/fs/btrfs
diff options
context:
space:
mode:
authorAndré Fabian Silva Delgado <emulatorman@parabola.nu>2016-10-20 00:10:27 -0300
committerAndré Fabian Silva Delgado <emulatorman@parabola.nu>2016-10-20 00:10:27 -0300
commitd0b2f91bede3bd5e3d24dd6803e56eee959c1797 (patch)
tree7fee4ab0509879c373c4f2cbd5b8a5be5b4041ee /fs/btrfs
parente914f8eb445e8f74b00303c19c2ffceaedd16a05 (diff)
Linux-libre 4.8.2-gnupck-4.8.2-gnu
Diffstat (limited to 'fs/btrfs')
-rw-r--r--fs/btrfs/acl.c3
-rw-r--r--fs/btrfs/async-thread.c31
-rw-r--r--fs/btrfs/async-thread.h6
-rw-r--r--fs/btrfs/backref.c5
-rw-r--r--fs/btrfs/check-integrity.c61
-rw-r--r--fs/btrfs/check-integrity.h6
-rw-r--r--fs/btrfs/compression.c27
-rw-r--r--fs/btrfs/ctree.c91
-rw-r--r--fs/btrfs/ctree.h123
-rw-r--r--fs/btrfs/dedupe.h24
-rw-r--r--fs/btrfs/delayed-inode.c72
-rw-r--r--fs/btrfs/delayed-ref.c47
-rw-r--r--fs/btrfs/delayed-ref.h3
-rw-r--r--fs/btrfs/dev-replace.c4
-rw-r--r--fs/btrfs/disk-io.c193
-rw-r--r--fs/btrfs/disk-io.h5
-rw-r--r--fs/btrfs/extent-tree.c1056
-rw-r--r--fs/btrfs/extent_io.c124
-rw-r--r--fs/btrfs/extent_io.h9
-rw-r--r--fs/btrfs/extent_map.c2
-rw-r--r--fs/btrfs/file-item.c4
-rw-r--r--fs/btrfs/file.c52
-rw-r--r--fs/btrfs/free-space-cache.c8
-rw-r--r--fs/btrfs/free-space-tree.c16
-rw-r--r--fs/btrfs/inode-map.c19
-rw-r--r--fs/btrfs/inode.c378
-rw-r--r--fs/btrfs/ioctl.c40
-rw-r--r--fs/btrfs/ordered-data.c2
-rw-r--r--fs/btrfs/props.c6
-rw-r--r--fs/btrfs/qgroup.c58
-rw-r--r--fs/btrfs/qgroup.h34
-rw-r--r--fs/btrfs/raid56.c17
-rw-r--r--fs/btrfs/relocation.c193
-rw-r--r--fs/btrfs/root-tree.c10
-rw-r--r--fs/btrfs/scrub.c27
-rw-r--r--fs/btrfs/send.c181
-rw-r--r--fs/btrfs/super.c234
-rw-r--r--fs/btrfs/sysfs.c2
-rw-r--r--fs/btrfs/tests/btrfs-tests.c67
-rw-r--r--fs/btrfs/tests/btrfs-tests.h36
-rw-r--r--fs/btrfs/tests/extent-buffer-tests.c23
-rw-r--r--fs/btrfs/tests/free-space-tests.c14
-rw-r--r--fs/btrfs/tests/free-space-tree-tests.c18
-rw-r--r--fs/btrfs/tests/inode-tests.c46
-rw-r--r--fs/btrfs/tests/qgroup-tests.c23
-rw-r--r--fs/btrfs/transaction.c44
-rw-r--r--fs/btrfs/transaction.h1
-rw-r--r--fs/btrfs/tree-log.c128
-rw-r--r--fs/btrfs/tree-log.h5
-rw-r--r--fs/btrfs/volumes.c252
-rw-r--r--fs/btrfs/volumes.h6
51 files changed, 2391 insertions, 1445 deletions
diff --git a/fs/btrfs/acl.c b/fs/btrfs/acl.c
index 67a607709..53bb7af4e 100644
--- a/fs/btrfs/acl.c
+++ b/fs/btrfs/acl.c
@@ -55,8 +55,7 @@ struct posix_acl *btrfs_get_acl(struct inode *inode, int type)
}
if (size > 0) {
acl = posix_acl_from_xattr(&init_user_ns, value, size);
- } else if (size == -ENOENT || size == -ENODATA || size == 0) {
- /* FIXME, who returns -ENOENT? I think nobody */
+ } else if (size == -ERANGE || size == -ENODATA || size == 0) {
acl = NULL;
} else {
acl = ERR_PTR(-EIO);
diff --git a/fs/btrfs/async-thread.c b/fs/btrfs/async-thread.c
index 5fb60ea7e..e0f071f6b 100644
--- a/fs/btrfs/async-thread.c
+++ b/fs/btrfs/async-thread.c
@@ -34,6 +34,10 @@
struct __btrfs_workqueue {
struct workqueue_struct *normal_wq;
+
+ /* File system this workqueue services */
+ struct btrfs_fs_info *fs_info;
+
/* List head pointing to ordered work list */
struct list_head ordered_list;
@@ -70,6 +74,18 @@ void btrfs_##name(struct work_struct *arg) \
normal_work_helper(work); \
}
+struct btrfs_fs_info *
+btrfs_workqueue_owner(struct __btrfs_workqueue *wq)
+{
+ return wq->fs_info;
+}
+
+struct btrfs_fs_info *
+btrfs_work_owner(struct btrfs_work *work)
+{
+ return work->wq->fs_info;
+}
+
BTRFS_WORK_HELPER(worker_helper);
BTRFS_WORK_HELPER(delalloc_helper);
BTRFS_WORK_HELPER(flush_delalloc_helper);
@@ -94,14 +110,15 @@ BTRFS_WORK_HELPER(scrubnc_helper);
BTRFS_WORK_HELPER(scrubparity_helper);
static struct __btrfs_workqueue *
-__btrfs_alloc_workqueue(const char *name, unsigned int flags, int limit_active,
- int thresh)
+__btrfs_alloc_workqueue(struct btrfs_fs_info *fs_info, const char *name,
+ unsigned int flags, int limit_active, int thresh)
{
struct __btrfs_workqueue *ret = kzalloc(sizeof(*ret), GFP_KERNEL);
if (!ret)
return NULL;
+ ret->fs_info = fs_info;
ret->limit_active = limit_active;
atomic_set(&ret->pending, 0);
if (thresh == 0)
@@ -143,7 +160,8 @@ __btrfs_alloc_workqueue(const char *name, unsigned int flags, int limit_active,
static inline void
__btrfs_destroy_workqueue(struct __btrfs_workqueue *wq);
-struct btrfs_workqueue *btrfs_alloc_workqueue(const char *name,
+struct btrfs_workqueue *btrfs_alloc_workqueue(struct btrfs_fs_info *fs_info,
+ const char *name,
unsigned int flags,
int limit_active,
int thresh)
@@ -153,7 +171,8 @@ struct btrfs_workqueue *btrfs_alloc_workqueue(const char *name,
if (!ret)
return NULL;
- ret->normal = __btrfs_alloc_workqueue(name, flags & ~WQ_HIGHPRI,
+ ret->normal = __btrfs_alloc_workqueue(fs_info, name,
+ flags & ~WQ_HIGHPRI,
limit_active, thresh);
if (!ret->normal) {
kfree(ret);
@@ -161,8 +180,8 @@ struct btrfs_workqueue *btrfs_alloc_workqueue(const char *name,
}
if (flags & WQ_HIGHPRI) {
- ret->high = __btrfs_alloc_workqueue(name, flags, limit_active,
- thresh);
+ ret->high = __btrfs_alloc_workqueue(fs_info, name, flags,
+ limit_active, thresh);
if (!ret->high) {
__btrfs_destroy_workqueue(ret->normal);
kfree(ret);
diff --git a/fs/btrfs/async-thread.h b/fs/btrfs/async-thread.h
index ad4d0647d..8e52484cd 100644
--- a/fs/btrfs/async-thread.h
+++ b/fs/btrfs/async-thread.h
@@ -21,6 +21,7 @@
#define __BTRFS_ASYNC_THREAD_
#include <linux/workqueue.h>
+struct btrfs_fs_info;
struct btrfs_workqueue;
/* Internal use only */
struct __btrfs_workqueue;
@@ -67,7 +68,8 @@ BTRFS_WORK_HELPER_PROTO(scrubnc_helper);
BTRFS_WORK_HELPER_PROTO(scrubparity_helper);
-struct btrfs_workqueue *btrfs_alloc_workqueue(const char *name,
+struct btrfs_workqueue *btrfs_alloc_workqueue(struct btrfs_fs_info *fs_info,
+ const char *name,
unsigned int flags,
int limit_active,
int thresh);
@@ -80,4 +82,6 @@ void btrfs_queue_work(struct btrfs_workqueue *wq,
void btrfs_destroy_workqueue(struct btrfs_workqueue *wq);
void btrfs_workqueue_set_max(struct btrfs_workqueue *wq, int max);
void btrfs_set_work_high_priority(struct btrfs_work *work);
+struct btrfs_fs_info *btrfs_work_owner(struct btrfs_work *work);
+struct btrfs_fs_info *btrfs_workqueue_owner(struct __btrfs_workqueue *wq);
#endif
diff --git a/fs/btrfs/backref.c b/fs/btrfs/backref.c
index 8bb350909..455a6b2fd 100644
--- a/fs/btrfs/backref.c
+++ b/fs/btrfs/backref.c
@@ -139,7 +139,7 @@ int __init btrfs_prelim_ref_init(void)
btrfs_prelim_ref_cache = kmem_cache_create("btrfs_prelim_ref",
sizeof(struct __prelim_ref),
0,
- SLAB_RECLAIM_ACCOUNT | SLAB_MEM_SPREAD,
+ SLAB_MEM_SPREAD,
NULL);
if (!btrfs_prelim_ref_cache)
return -ENOMEM;
@@ -361,7 +361,7 @@ static int __resolve_indirect_ref(struct btrfs_fs_info *fs_info,
goto out;
}
- if (btrfs_test_is_dummy_root(root)) {
+ if (btrfs_is_testing(fs_info)) {
srcu_read_unlock(&fs_info->subvol_srcu, index);
ret = -ENOENT;
goto out;
@@ -589,6 +589,7 @@ static void __merge_refs(struct list_head *head, int mode)
list_del(&ref2->list);
kmem_cache_free(btrfs_prelim_ref_cache, ref2);
+ cond_resched();
}
}
diff --git a/fs/btrfs/check-integrity.c b/fs/btrfs/check-integrity.c
index 7706c8dc5..66789471b 100644
--- a/fs/btrfs/check-integrity.c
+++ b/fs/btrfs/check-integrity.c
@@ -1673,6 +1673,7 @@ static int btrfsic_read_block(struct btrfsic_state *state,
}
bio->bi_bdev = block_ctx->dev->bdev;
bio->bi_iter.bi_sector = dev_bytenr >> 9;
+ bio_set_op_attrs(bio, REQ_OP_READ, 0);
for (j = i; j < num_pages; j++) {
ret = bio_add_page(bio, block_ctx->pagev[j],
@@ -1685,7 +1686,7 @@ static int btrfsic_read_block(struct btrfsic_state *state,
"btrfsic: error, failed to add a single page!\n");
return -1;
}
- if (submit_bio_wait(READ, bio)) {
+ if (submit_bio_wait(bio)) {
printk(KERN_INFO
"btrfsic: read error at logical %llu dev %s!\n",
block_ctx->start, block_ctx->dev->name);
@@ -2206,7 +2207,7 @@ static void btrfsic_bio_end_io(struct bio *bp)
block->dev_bytenr, block->mirror_num);
next_block = block->next_in_same_bio;
block->iodone_w_error = iodone_w_error;
- if (block->submit_bio_bh_rw & REQ_FLUSH) {
+ if (block->submit_bio_bh_rw & REQ_PREFLUSH) {
dev_state->last_flush_gen++;
if ((dev_state->state->print_mask &
BTRFSIC_PRINT_MASK_END_IO_BIO_BH))
@@ -2242,7 +2243,7 @@ static void btrfsic_bh_end_io(struct buffer_head *bh, int uptodate)
block->dev_bytenr, block->mirror_num);
block->iodone_w_error = iodone_w_error;
- if (block->submit_bio_bh_rw & REQ_FLUSH) {
+ if (block->submit_bio_bh_rw & REQ_PREFLUSH) {
dev_state->last_flush_gen++;
if ((dev_state->state->print_mask &
BTRFSIC_PRINT_MASK_END_IO_BIO_BH))
@@ -2855,12 +2856,12 @@ static struct btrfsic_dev_state *btrfsic_dev_state_lookup(
return ds;
}
-int btrfsic_submit_bh(int rw, struct buffer_head *bh)
+int btrfsic_submit_bh(int op, int op_flags, struct buffer_head *bh)
{
struct btrfsic_dev_state *dev_state;
if (!btrfsic_is_initialized)
- return submit_bh(rw, bh);
+ return submit_bh(op, op_flags, bh);
mutex_lock(&btrfsic_mutex);
/* since btrfsic_submit_bh() might also be called before
@@ -2869,26 +2870,26 @@ int btrfsic_submit_bh(int rw, struct buffer_head *bh)
/* Only called to write the superblock (incl. FLUSH/FUA) */
if (NULL != dev_state &&
- (rw & WRITE) && bh->b_size > 0) {
+ (op == REQ_OP_WRITE) && bh->b_size > 0) {
u64 dev_bytenr;
dev_bytenr = 4096 * bh->b_blocknr;
if (dev_state->state->print_mask &
BTRFSIC_PRINT_MASK_SUBMIT_BIO_BH)
printk(KERN_INFO
- "submit_bh(rw=0x%x, blocknr=%llu (bytenr %llu),"
- " size=%zu, data=%p, bdev=%p)\n",
- rw, (unsigned long long)bh->b_blocknr,
+ "submit_bh(op=0x%x,0x%x, blocknr=%llu "
+ "(bytenr %llu), size=%zu, data=%p, bdev=%p)\n",
+ op, op_flags, (unsigned long long)bh->b_blocknr,
dev_bytenr, bh->b_size, bh->b_data, bh->b_bdev);
btrfsic_process_written_block(dev_state, dev_bytenr,
&bh->b_data, 1, NULL,
- NULL, bh, rw);
- } else if (NULL != dev_state && (rw & REQ_FLUSH)) {
+ NULL, bh, op_flags);
+ } else if (NULL != dev_state && (op_flags & REQ_PREFLUSH)) {
if (dev_state->state->print_mask &
BTRFSIC_PRINT_MASK_SUBMIT_BIO_BH)
printk(KERN_INFO
- "submit_bh(rw=0x%x FLUSH, bdev=%p)\n",
- rw, bh->b_bdev);
+ "submit_bh(op=0x%x,0x%x FLUSH, bdev=%p)\n",
+ op, op_flags, bh->b_bdev);
if (!dev_state->dummy_block_for_bio_bh_flush.is_iodone) {
if ((dev_state->state->print_mask &
(BTRFSIC_PRINT_MASK_SUBMIT_BIO_BH |
@@ -2906,7 +2907,7 @@ int btrfsic_submit_bh(int rw, struct buffer_head *bh)
block->never_written = 0;
block->iodone_w_error = 0;
block->flush_gen = dev_state->last_flush_gen + 1;
- block->submit_bio_bh_rw = rw;
+ block->submit_bio_bh_rw = op_flags;
block->orig_bio_bh_private = bh->b_private;
block->orig_bio_bh_end_io.bh = bh->b_end_io;
block->next_in_same_bio = NULL;
@@ -2915,10 +2916,10 @@ int btrfsic_submit_bh(int rw, struct buffer_head *bh)
}
}
mutex_unlock(&btrfsic_mutex);
- return submit_bh(rw, bh);
+ return submit_bh(op, op_flags, bh);
}
-static void __btrfsic_submit_bio(int rw, struct bio *bio)
+static void __btrfsic_submit_bio(struct bio *bio)
{
struct btrfsic_dev_state *dev_state;
@@ -2930,7 +2931,7 @@ static void __btrfsic_submit_bio(int rw, struct bio *bio)
* btrfsic_mount(), this might return NULL */
dev_state = btrfsic_dev_state_lookup(bio->bi_bdev);
if (NULL != dev_state &&
- (rw & WRITE) && NULL != bio->bi_io_vec) {
+ (bio_op(bio) == REQ_OP_WRITE) && NULL != bio->bi_io_vec) {
unsigned int i;
u64 dev_bytenr;
u64 cur_bytenr;
@@ -2942,9 +2943,9 @@ static void __btrfsic_submit_bio(int rw, struct bio *bio)
if (dev_state->state->print_mask &
BTRFSIC_PRINT_MASK_SUBMIT_BIO_BH)
printk(KERN_INFO
- "submit_bio(rw=0x%x, bi_vcnt=%u,"
+ "submit_bio(rw=%d,0x%x, bi_vcnt=%u,"
" bi_sector=%llu (bytenr %llu), bi_bdev=%p)\n",
- rw, bio->bi_vcnt,
+ bio_op(bio), bio->bi_opf, bio->bi_vcnt,
(unsigned long long)bio->bi_iter.bi_sector,
dev_bytenr, bio->bi_bdev);
@@ -2975,18 +2976,18 @@ static void __btrfsic_submit_bio(int rw, struct bio *bio)
btrfsic_process_written_block(dev_state, dev_bytenr,
mapped_datav, bio->bi_vcnt,
bio, &bio_is_patched,
- NULL, rw);
+ NULL, bio->bi_opf);
while (i > 0) {
i--;
kunmap(bio->bi_io_vec[i].bv_page);
}
kfree(mapped_datav);
- } else if (NULL != dev_state && (rw & REQ_FLUSH)) {
+ } else if (NULL != dev_state && (bio->bi_opf & REQ_PREFLUSH)) {
if (dev_state->state->print_mask &
BTRFSIC_PRINT_MASK_SUBMIT_BIO_BH)
printk(KERN_INFO
- "submit_bio(rw=0x%x FLUSH, bdev=%p)\n",
- rw, bio->bi_bdev);
+ "submit_bio(rw=%d,0x%x FLUSH, bdev=%p)\n",
+ bio_op(bio), bio->bi_opf, bio->bi_bdev);
if (!dev_state->dummy_block_for_bio_bh_flush.is_iodone) {
if ((dev_state->state->print_mask &
(BTRFSIC_PRINT_MASK_SUBMIT_BIO_BH |
@@ -3004,7 +3005,7 @@ static void __btrfsic_submit_bio(int rw, struct bio *bio)
block->never_written = 0;
block->iodone_w_error = 0;
block->flush_gen = dev_state->last_flush_gen + 1;
- block->submit_bio_bh_rw = rw;
+ block->submit_bio_bh_rw = bio->bi_opf;
block->orig_bio_bh_private = bio->bi_private;
block->orig_bio_bh_end_io.bio = bio->bi_end_io;
block->next_in_same_bio = NULL;
@@ -3016,16 +3017,16 @@ leave:
mutex_unlock(&btrfsic_mutex);
}
-void btrfsic_submit_bio(int rw, struct bio *bio)
+void btrfsic_submit_bio(struct bio *bio)
{
- __btrfsic_submit_bio(rw, bio);
- submit_bio(rw, bio);
+ __btrfsic_submit_bio(bio);
+ submit_bio(bio);
}
-int btrfsic_submit_bio_wait(int rw, struct bio *bio)
+int btrfsic_submit_bio_wait(struct bio *bio)
{
- __btrfsic_submit_bio(rw, bio);
- return submit_bio_wait(rw, bio);
+ __btrfsic_submit_bio(bio);
+ return submit_bio_wait(bio);
}
int btrfsic_mount(struct btrfs_root *root,
diff --git a/fs/btrfs/check-integrity.h b/fs/btrfs/check-integrity.h
index 13b8566c9..f78dff1c7 100644
--- a/fs/btrfs/check-integrity.h
+++ b/fs/btrfs/check-integrity.h
@@ -20,9 +20,9 @@
#define __BTRFS_CHECK_INTEGRITY__
#ifdef CONFIG_BTRFS_FS_CHECK_INTEGRITY
-int btrfsic_submit_bh(int rw, struct buffer_head *bh);
-void btrfsic_submit_bio(int rw, struct bio *bio);
-int btrfsic_submit_bio_wait(int rw, struct bio *bio);
+int btrfsic_submit_bh(int op, int op_flags, struct buffer_head *bh);
+void btrfsic_submit_bio(struct bio *bio);
+int btrfsic_submit_bio_wait(struct bio *bio);
#else
#define btrfsic_submit_bh submit_bh
#define btrfsic_submit_bio submit_bio
diff --git a/fs/btrfs/compression.c b/fs/btrfs/compression.c
index 658c39b70..029db6e11 100644
--- a/fs/btrfs/compression.c
+++ b/fs/btrfs/compression.c
@@ -363,6 +363,7 @@ int btrfs_submit_compressed_write(struct inode *inode, u64 start,
kfree(cb);
return -ENOMEM;
}
+ bio_set_op_attrs(bio, REQ_OP_WRITE, 0);
bio->bi_private = cb;
bio->bi_end_io = end_compressed_bio_write;
atomic_inc(&cb->pending_bios);
@@ -373,7 +374,7 @@ int btrfs_submit_compressed_write(struct inode *inode, u64 start,
page = compressed_pages[pg_index];
page->mapping = inode->i_mapping;
if (bio->bi_iter.bi_size)
- ret = io_tree->ops->merge_bio_hook(WRITE, page, 0,
+ ret = io_tree->ops->merge_bio_hook(page, 0,
PAGE_SIZE,
bio, 0);
else
@@ -401,13 +402,17 @@ int btrfs_submit_compressed_write(struct inode *inode, u64 start,
BUG_ON(ret); /* -ENOMEM */
}
- ret = btrfs_map_bio(root, WRITE, bio, 0, 1);
- BUG_ON(ret); /* -ENOMEM */
+ ret = btrfs_map_bio(root, bio, 0, 1);
+ if (ret) {
+ bio->bi_error = ret;
+ bio_endio(bio);
+ }
bio_put(bio);
bio = compressed_bio_alloc(bdev, first_byte, GFP_NOFS);
BUG_ON(!bio);
+ bio_set_op_attrs(bio, REQ_OP_WRITE, 0);
bio->bi_private = cb;
bio->bi_end_io = end_compressed_bio_write;
bio_add_page(bio, page, PAGE_SIZE, 0);
@@ -431,8 +436,11 @@ int btrfs_submit_compressed_write(struct inode *inode, u64 start,
BUG_ON(ret); /* -ENOMEM */
}
- ret = btrfs_map_bio(root, WRITE, bio, 0, 1);
- BUG_ON(ret); /* -ENOMEM */
+ ret = btrfs_map_bio(root, bio, 0, 1);
+ if (ret) {
+ bio->bi_error = ret;
+ bio_endio(bio);
+ }
bio_put(bio);
return 0;
@@ -646,6 +654,7 @@ int btrfs_submit_compressed_read(struct inode *inode, struct bio *bio,
comp_bio = compressed_bio_alloc(bdev, cur_disk_byte, GFP_NOFS);
if (!comp_bio)
goto fail2;
+ bio_set_op_attrs (comp_bio, REQ_OP_READ, 0);
comp_bio->bi_private = cb;
comp_bio->bi_end_io = end_compressed_bio_read;
atomic_inc(&cb->pending_bios);
@@ -656,7 +665,7 @@ int btrfs_submit_compressed_read(struct inode *inode, struct bio *bio,
page->index = em_start >> PAGE_SHIFT;
if (comp_bio->bi_iter.bi_size)
- ret = tree->ops->merge_bio_hook(READ, page, 0,
+ ret = tree->ops->merge_bio_hook(page, 0,
PAGE_SIZE,
comp_bio, 0);
else
@@ -687,8 +696,7 @@ int btrfs_submit_compressed_read(struct inode *inode, struct bio *bio,
sums += DIV_ROUND_UP(comp_bio->bi_iter.bi_size,
root->sectorsize);
- ret = btrfs_map_bio(root, READ, comp_bio,
- mirror_num, 0);
+ ret = btrfs_map_bio(root, comp_bio, mirror_num, 0);
if (ret) {
bio->bi_error = ret;
bio_endio(comp_bio);
@@ -699,6 +707,7 @@ int btrfs_submit_compressed_read(struct inode *inode, struct bio *bio,
comp_bio = compressed_bio_alloc(bdev, cur_disk_byte,
GFP_NOFS);
BUG_ON(!comp_bio);
+ bio_set_op_attrs(comp_bio, REQ_OP_READ, 0);
comp_bio->bi_private = cb;
comp_bio->bi_end_io = end_compressed_bio_read;
@@ -717,7 +726,7 @@ int btrfs_submit_compressed_read(struct inode *inode, struct bio *bio,
BUG_ON(ret); /* -ENOMEM */
}
- ret = btrfs_map_bio(root, READ, comp_bio, mirror_num, 0);
+ ret = btrfs_map_bio(root, comp_bio, mirror_num, 0);
if (ret) {
bio->bi_error = ret;
bio_endio(comp_bio);
diff --git a/fs/btrfs/ctree.c b/fs/btrfs/ctree.c
index a85cf7d23..d1c56c94d 100644
--- a/fs/btrfs/ctree.c
+++ b/fs/btrfs/ctree.c
@@ -1153,14 +1153,14 @@ static noinline int __btrfs_cow_block(struct btrfs_trans_handle *trans,
ret = update_ref_for_cow(trans, root, buf, cow, &last_ref);
if (ret) {
- btrfs_abort_transaction(trans, root, ret);
+ btrfs_abort_transaction(trans, ret);
return ret;
}
if (test_bit(BTRFS_ROOT_REF_COWS, &root->state)) {
ret = btrfs_reloc_cow_block(trans, root, buf, cow);
if (ret) {
- btrfs_abort_transaction(trans, root, ret);
+ btrfs_abort_transaction(trans, ret);
return ret;
}
}
@@ -1198,7 +1198,7 @@ static noinline int __btrfs_cow_block(struct btrfs_trans_handle *trans,
if (last_ref) {
ret = tree_mod_log_free_eb(root->fs_info, buf);
if (ret) {
- btrfs_abort_transaction(trans, root, ret);
+ btrfs_abort_transaction(trans, ret);
return ret;
}
}
@@ -1505,7 +1505,7 @@ static inline int should_cow_block(struct btrfs_trans_handle *trans,
struct btrfs_root *root,
struct extent_buffer *buf)
{
- if (btrfs_test_is_dummy_root(root))
+ if (btrfs_is_testing(root->fs_info))
return 0;
/* ensure we can see the force_cow */
@@ -1771,6 +1771,14 @@ static noinline int generic_bin_search(struct extent_buffer *eb,
unsigned long map_len = 0;
int err;
+ if (low > high) {
+ btrfs_err(eb->fs_info,
+ "%s: low (%d) > high (%d) eb %llu owner %llu level %d",
+ __func__, low, high, eb->start,
+ btrfs_header_owner(eb), btrfs_header_level(eb));
+ return -EINVAL;
+ }
+
while (low < high) {
mid = (low + high) / 2;
offset = p + mid * item_size;
@@ -1858,7 +1866,6 @@ static void root_sub_used(struct btrfs_root *root, u32 size)
/* given a node and slot number, this reads the blocks it points to. The
* extent buffer is returned with a reference taken (but unlocked).
- * NULL is returned on error.
*/
static noinline struct extent_buffer *read_node_slot(struct btrfs_root *root,
struct extent_buffer *parent, int slot)
@@ -1866,19 +1873,16 @@ static noinline struct extent_buffer *read_node_slot(struct btrfs_root *root,
int level = btrfs_header_level(parent);
struct extent_buffer *eb;
- if (slot < 0)
- return NULL;
- if (slot >= btrfs_header_nritems(parent))
- return NULL;
+ if (slot < 0 || slot >= btrfs_header_nritems(parent))
+ return ERR_PTR(-ENOENT);
BUG_ON(level == 0);
eb = read_tree_block(root, btrfs_node_blockptr(parent, slot),
btrfs_node_ptr_generation(parent, slot));
- if (IS_ERR(eb) || !extent_buffer_uptodate(eb)) {
- if (!IS_ERR(eb))
- free_extent_buffer(eb);
- eb = NULL;
+ if (!IS_ERR(eb) && !extent_buffer_uptodate(eb)) {
+ free_extent_buffer(eb);
+ eb = ERR_PTR(-EIO);
}
return eb;
@@ -1931,8 +1935,8 @@ static noinline int balance_level(struct btrfs_trans_handle *trans,
/* promote the child to a root */
child = read_node_slot(root, mid, 0);
- if (!child) {
- ret = -EROFS;
+ if (IS_ERR(child)) {
+ ret = PTR_ERR(child);
btrfs_handle_fs_error(root->fs_info, ret, NULL);
goto enospc;
}
@@ -1970,6 +1974,9 @@ static noinline int balance_level(struct btrfs_trans_handle *trans,
return 0;
left = read_node_slot(root, parent, pslot - 1);
+ if (IS_ERR(left))
+ left = NULL;
+
if (left) {
btrfs_tree_lock(left);
btrfs_set_lock_blocking(left);
@@ -1980,7 +1987,11 @@ static noinline int balance_level(struct btrfs_trans_handle *trans,
goto enospc;
}
}
+
right = read_node_slot(root, parent, pslot + 1);
+ if (IS_ERR(right))
+ right = NULL;
+
if (right) {
btrfs_tree_lock(right);
btrfs_set_lock_blocking(right);
@@ -2135,6 +2146,8 @@ static noinline int push_nodes_for_insert(struct btrfs_trans_handle *trans,
return 1;
left = read_node_slot(root, parent, pslot - 1);
+ if (IS_ERR(left))
+ left = NULL;
/* first, try to make some room in the middle buffer */
if (left) {
@@ -2185,6 +2198,8 @@ static noinline int push_nodes_for_insert(struct btrfs_trans_handle *trans,
free_extent_buffer(left);
}
right = read_node_slot(root, parent, pslot + 1);
+ if (IS_ERR(right))
+ right = NULL;
/*
* then try to empty the right most buffer into the middle
@@ -3240,7 +3255,7 @@ static int push_node_left(struct btrfs_trans_handle *trans,
ret = tree_mod_log_eb_copy(root->fs_info, dst, src, dst_nritems, 0,
push_items);
if (ret) {
- btrfs_abort_transaction(trans, root, ret);
+ btrfs_abort_transaction(trans, ret);
return ret;
}
copy_extent_buffer(dst, src,
@@ -3315,7 +3330,7 @@ static int balance_node_right(struct btrfs_trans_handle *trans,
ret = tree_mod_log_eb_copy(root->fs_info, dst, src, 0,
src_nritems - push_items, push_items);
if (ret) {
- btrfs_abort_transaction(trans, root, ret);
+ btrfs_abort_transaction(trans, ret);
return ret;
}
copy_extent_buffer(dst, src,
@@ -3519,7 +3534,7 @@ static noinline int split_node(struct btrfs_trans_handle *trans,
ret = tree_mod_log_eb_copy(root->fs_info, split, c, 0,
mid, c_nritems - mid);
if (ret) {
- btrfs_abort_transaction(trans, root, ret);
+ btrfs_abort_transaction(trans, ret);
return ret;
}
copy_extent_buffer(split, c,
@@ -3773,7 +3788,11 @@ static int push_leaf_right(struct btrfs_trans_handle *trans, struct btrfs_root
btrfs_assert_tree_locked(path->nodes[1]);
right = read_node_slot(root, upper, slot + 1);
- if (right == NULL)
+ /*
+ * slot + 1 is not valid or we fail to read the right node,
+ * no big deal, just return.
+ */
+ if (IS_ERR(right))
return 1;
btrfs_tree_lock(right);
@@ -4003,7 +4022,11 @@ static int push_leaf_left(struct btrfs_trans_handle *trans, struct btrfs_root
btrfs_assert_tree_locked(path->nodes[1]);
left = read_node_slot(root, path->nodes[1], slot - 1);
- if (left == NULL)
+ /*
+ * slot - 1 is not valid or we fail to read the left node,
+ * no big deal, just return.
+ */
+ if (IS_ERR(left))
return 1;
btrfs_tree_lock(left);
@@ -5210,7 +5233,10 @@ find_next_key:
}
btrfs_set_path_blocking(path);
cur = read_node_slot(root, cur, slot);
- BUG_ON(!cur); /* -ENOMEM */
+ if (IS_ERR(cur)) {
+ ret = PTR_ERR(cur);
+ goto out;
+ }
btrfs_tree_read_lock(cur);
@@ -5229,15 +5255,21 @@ out:
return ret;
}
-static void tree_move_down(struct btrfs_root *root,
+static int tree_move_down(struct btrfs_root *root,
struct btrfs_path *path,
int *level, int root_level)
{
+ struct extent_buffer *eb;
+
BUG_ON(*level == 0);
- path->nodes[*level - 1] = read_node_slot(root, path->nodes[*level],
- path->slots[*level]);
+ eb = read_node_slot(root, path->nodes[*level], path->slots[*level]);
+ if (IS_ERR(eb))
+ return PTR_ERR(eb);
+
+ path->nodes[*level - 1] = eb;
path->slots[*level - 1] = 0;
(*level)--;
+ return 0;
}
static int tree_move_next_or_upnext(struct btrfs_root *root,
@@ -5282,8 +5314,7 @@ static int tree_advance(struct btrfs_root *root,
if (*level == 0 || !allow_down) {
ret = tree_move_next_or_upnext(root, path, level, root_level);
} else {
- tree_move_down(root, path, level, root_level);
- ret = 0;
+ ret = tree_move_down(root, path, level, root_level);
}
if (ret >= 0) {
if (*level == 0)
@@ -5457,8 +5488,10 @@ int btrfs_compare_trees(struct btrfs_root *left_root,
left_root_level,
advance_left != ADVANCE_ONLY_NEXT,
&left_key);
- if (ret < 0)
+ if (ret == -1)
left_end_reached = ADVANCE;
+ else if (ret < 0)
+ goto out;
advance_left = 0;
}
if (advance_right && !right_end_reached) {
@@ -5466,8 +5499,10 @@ int btrfs_compare_trees(struct btrfs_root *left_root,
right_root_level,
advance_right != ADVANCE_ONLY_NEXT,
&right_key);
- if (ret < 0)
+ if (ret == -1)
right_end_reached = ADVANCE;
+ else if (ret < 0)
+ goto out;
advance_right = 0;
}
diff --git a/fs/btrfs/ctree.h b/fs/btrfs/ctree.h
index 72f50480e..33fe03551 100644
--- a/fs/btrfs/ctree.h
+++ b/fs/btrfs/ctree.h
@@ -117,6 +117,7 @@ static inline unsigned long btrfs_chunk_item_size(int num_stripes)
#define BTRFS_FS_STATE_REMOUNTING 1
#define BTRFS_FS_STATE_TRANS_ABORTED 2
#define BTRFS_FS_STATE_DEV_REPLACING 3
+#define BTRFS_FS_STATE_DUMMY_FS_INFO 4
#define BTRFS_BACKREF_REV_MAX 256
#define BTRFS_BACKREF_REV_SHIFT 56
@@ -144,21 +145,6 @@ struct btrfs_header {
u8 level;
} __attribute__ ((__packed__));
-#define BTRFS_NODEPTRS_PER_BLOCK(r) (((r)->nodesize - \
- sizeof(struct btrfs_header)) / \
- sizeof(struct btrfs_key_ptr))
-#define __BTRFS_LEAF_DATA_SIZE(bs) ((bs) - sizeof(struct btrfs_header))
-#define BTRFS_LEAF_DATA_SIZE(r) (__BTRFS_LEAF_DATA_SIZE(r->nodesize))
-#define BTRFS_FILE_EXTENT_INLINE_DATA_START \
- (offsetof(struct btrfs_file_extent_item, disk_bytenr))
-#define BTRFS_MAX_INLINE_DATA_SIZE(r) (BTRFS_LEAF_DATA_SIZE(r) - \
- sizeof(struct btrfs_item) - \
- BTRFS_FILE_EXTENT_INLINE_DATA_START)
-#define BTRFS_MAX_XATTR_SIZE(r) (BTRFS_LEAF_DATA_SIZE(r) - \
- sizeof(struct btrfs_item) -\
- sizeof(struct btrfs_dir_item))
-
-
/*
* this is a very generous portion of the super block, giving us
* room to translate 14 chunks with 3 stripes each.
@@ -439,6 +425,9 @@ struct btrfs_space_info {
struct list_head list;
/* Protected by the spinlock 'lock'. */
struct list_head ro_bgs;
+ struct list_head priority_tickets;
+ struct list_head tickets;
+ u64 tickets_id;
struct rw_semaphore groups_sem;
/* for block groups in our same type */
@@ -1092,6 +1081,8 @@ struct btrfs_fs_info {
struct list_head pinned_chunks;
int creating_free_space_tree;
+ /* Used to record internally whether fs has been frozen */
+ int fs_frozen;
};
struct btrfs_subvolume_writers {
@@ -1113,12 +1104,11 @@ struct btrfs_subvolume_writers {
#define BTRFS_ROOT_REF_COWS 1
#define BTRFS_ROOT_TRACK_DIRTY 2
#define BTRFS_ROOT_IN_RADIX 3
-#define BTRFS_ROOT_DUMMY_ROOT 4
-#define BTRFS_ROOT_ORPHAN_ITEM_INSERTED 5
-#define BTRFS_ROOT_DEFRAG_RUNNING 6
-#define BTRFS_ROOT_FORCE_COW 7
-#define BTRFS_ROOT_MULTI_LOG_TASKS 8
-#define BTRFS_ROOT_DIRTY 9
+#define BTRFS_ROOT_ORPHAN_ITEM_INSERTED 4
+#define BTRFS_ROOT_DEFRAG_RUNNING 5
+#define BTRFS_ROOT_FORCE_COW 6
+#define BTRFS_ROOT_MULTI_LOG_TASKS 7
+#define BTRFS_ROOT_DIRTY 8
/*
* in ram representation of the tree. extent_root is used for all allocations
@@ -1180,8 +1170,10 @@ struct btrfs_root {
u64 highest_objectid;
+#ifdef CONFIG_BTRFS_FS_RUN_SANITY_TESTS
/* only used with CONFIG_BTRFS_FS_RUN_SANITY_TESTS is enabled */
u64 alloc_bytenr;
+#endif
u64 defrag_trans_start;
struct btrfs_key defrag_progress;
@@ -1258,6 +1250,39 @@ struct btrfs_root {
atomic_t qgroup_meta_rsv;
};
+static inline u32 __BTRFS_LEAF_DATA_SIZE(u32 blocksize)
+{
+ return blocksize - sizeof(struct btrfs_header);
+}
+
+static inline u32 BTRFS_LEAF_DATA_SIZE(const struct btrfs_root *root)
+{
+ return __BTRFS_LEAF_DATA_SIZE(root->nodesize);
+}
+
+static inline u32 BTRFS_MAX_ITEM_SIZE(const struct btrfs_root *root)
+{
+ return BTRFS_LEAF_DATA_SIZE(root) - sizeof(struct btrfs_item);
+}
+
+static inline u32 BTRFS_NODEPTRS_PER_BLOCK(const struct btrfs_root *root)
+{
+ return BTRFS_LEAF_DATA_SIZE(root) / sizeof(struct btrfs_key_ptr);
+}
+
+#define BTRFS_FILE_EXTENT_INLINE_DATA_START \
+ (offsetof(struct btrfs_file_extent_item, disk_bytenr))
+static inline u32 BTRFS_MAX_INLINE_DATA_SIZE(const struct btrfs_root *root)
+{
+ return BTRFS_MAX_ITEM_SIZE(root) -
+ BTRFS_FILE_EXTENT_INLINE_DATA_START;
+}
+
+static inline u32 BTRFS_MAX_XATTR_SIZE(const struct btrfs_root *root)
+{
+ return BTRFS_MAX_ITEM_SIZE(root) - sizeof(struct btrfs_dir_item);
+}
+
/*
* Flags for mount options.
*
@@ -1298,21 +1323,21 @@ struct btrfs_root {
#define btrfs_clear_opt(o, opt) ((o) &= ~BTRFS_MOUNT_##opt)
#define btrfs_set_opt(o, opt) ((o) |= BTRFS_MOUNT_##opt)
#define btrfs_raw_test_opt(o, opt) ((o) & BTRFS_MOUNT_##opt)
-#define btrfs_test_opt(root, opt) ((root)->fs_info->mount_opt & \
+#define btrfs_test_opt(fs_info, opt) ((fs_info)->mount_opt & \
BTRFS_MOUNT_##opt)
-#define btrfs_set_and_info(root, opt, fmt, args...) \
+#define btrfs_set_and_info(fs_info, opt, fmt, args...) \
{ \
- if (!btrfs_test_opt(root, opt)) \
- btrfs_info(root->fs_info, fmt, ##args); \
- btrfs_set_opt(root->fs_info->mount_opt, opt); \
+ if (!btrfs_test_opt(fs_info, opt)) \
+ btrfs_info(fs_info, fmt, ##args); \
+ btrfs_set_opt(fs_info->mount_opt, opt); \
}
-#define btrfs_clear_and_info(root, opt, fmt, args...) \
+#define btrfs_clear_and_info(fs_info, opt, fmt, args...) \
{ \
- if (btrfs_test_opt(root, opt)) \
- btrfs_info(root->fs_info, fmt, ##args); \
- btrfs_clear_opt(root->fs_info->mount_opt, opt); \
+ if (btrfs_test_opt(fs_info, opt)) \
+ btrfs_info(fs_info, fmt, ##args); \
+ btrfs_clear_opt(fs_info->mount_opt, opt); \
}
#ifdef CONFIG_BTRFS_DEBUG
@@ -1320,9 +1345,9 @@ static inline int
btrfs_should_fragment_free_space(struct btrfs_root *root,
struct btrfs_block_group_cache *block_group)
{
- return (btrfs_test_opt(root, FRAGMENT_METADATA) &&
+ return (btrfs_test_opt(root->fs_info, FRAGMENT_METADATA) &&
block_group->flags & BTRFS_BLOCK_GROUP_METADATA) ||
- (btrfs_test_opt(root, FRAGMENT_DATA) &&
+ (btrfs_test_opt(root->fs_info, FRAGMENT_DATA) &&
block_group->flags & BTRFS_BLOCK_GROUP_DATA);
}
#endif
@@ -2557,7 +2582,7 @@ int btrfs_alloc_logged_file_extent(struct btrfs_trans_handle *trans,
struct btrfs_root *root,
u64 root_objectid, u64 owner, u64 offset,
struct btrfs_key *ins);
-int btrfs_reserve_extent(struct btrfs_root *root, u64 num_bytes,
+int btrfs_reserve_extent(struct btrfs_root *root, u64 ram_bytes, u64 num_bytes,
u64 min_alloc_size, u64 empty_size, u64 hint_byte,
struct btrfs_key *ins, int is_data, int delalloc);
int btrfs_inc_ref(struct btrfs_trans_handle *trans, struct btrfs_root *root,
@@ -2625,6 +2650,15 @@ enum btrfs_reserve_flush_enum {
BTRFS_RESERVE_FLUSH_ALL,
};
+enum btrfs_flush_state {
+ FLUSH_DELAYED_ITEMS_NR = 1,
+ FLUSH_DELAYED_ITEMS = 2,
+ FLUSH_DELALLOC = 3,
+ FLUSH_DELALLOC_WAIT = 4,
+ ALLOC_CHUNK = 5,
+ COMMIT_TRANS = 6,
+};
+
int btrfs_check_data_free_space(struct inode *inode, u64 start, u64 len);
int btrfs_alloc_data_chunk_ondemand(struct inode *inode, u64 bytes);
void btrfs_free_reserved_data_space(struct inode *inode, u64 start, u64 len);
@@ -2662,8 +2696,8 @@ int btrfs_block_rsv_refill(struct btrfs_root *root,
struct btrfs_block_rsv *block_rsv, u64 min_reserved,
enum btrfs_reserve_flush_enum flush);
int btrfs_block_rsv_migrate(struct btrfs_block_rsv *src_rsv,
- struct btrfs_block_rsv *dst_rsv,
- u64 num_bytes);
+ struct btrfs_block_rsv *dst_rsv, u64 num_bytes,
+ int update_size);
int btrfs_cond_migrate_bytes(struct btrfs_fs_info *fs_info,
struct btrfs_block_rsv *dest, u64 num_bytes,
int min_factor);
@@ -2876,9 +2910,6 @@ void btrfs_put_tree_mod_seq(struct btrfs_fs_info *fs_info,
int btrfs_old_root_level(struct btrfs_root *root, u64 time_seq);
/* root-item.c */
-int btrfs_find_root_ref(struct btrfs_root *tree_root,
- struct btrfs_path *path,
- u64 root_id, u64 ref_id);
int btrfs_add_root_ref(struct btrfs_trans_handle *trans,
struct btrfs_root *tree_root,
u64 root_id, u64 ref_id, u64 dirid, u64 sequence,
@@ -3092,7 +3123,7 @@ int btrfs_create_subvol_root(struct btrfs_trans_handle *trans,
struct btrfs_root *new_root,
struct btrfs_root *parent_root,
u64 new_dirid);
-int btrfs_merge_bio_hook(int rw, struct page *page, unsigned long offset,
+int btrfs_merge_bio_hook(struct page *page, unsigned long offset,
size_t size, struct bio *bio,
unsigned long bio_flags);
int btrfs_page_mkwrite(struct vm_area_struct *vma, struct vm_fault *vmf);
@@ -3352,23 +3383,23 @@ const char *btrfs_decode_error(int errno);
__cold
void __btrfs_abort_transaction(struct btrfs_trans_handle *trans,
- struct btrfs_root *root, const char *function,
+ const char *function,
unsigned int line, int errno);
/*
* Call btrfs_abort_transaction as early as possible when an error condition is
* detected, that way the exact line number is reported.
*/
-#define btrfs_abort_transaction(trans, root, errno) \
+#define btrfs_abort_transaction(trans, errno) \
do { \
/* Report first abort since mount */ \
if (!test_and_set_bit(BTRFS_FS_STATE_TRANS_ABORTED, \
- &((root)->fs_info->fs_state))) { \
+ &((trans)->fs_info->fs_state))) { \
WARN(1, KERN_DEBUG \
"BTRFS: Transaction aborted (error %d)\n", \
(errno)); \
} \
- __btrfs_abort_transaction((trans), (root), __func__, \
+ __btrfs_abort_transaction((trans), __func__, \
__LINE__, (errno)); \
} while (0)
@@ -3600,13 +3631,13 @@ static inline int btrfs_defrag_cancelled(struct btrfs_fs_info *fs_info)
void btrfs_test_destroy_inode(struct inode *inode);
#endif
-static inline int btrfs_test_is_dummy_root(struct btrfs_root *root)
+static inline int btrfs_is_testing(struct btrfs_fs_info *fs_info)
{
#ifdef CONFIG_BTRFS_FS_RUN_SANITY_TESTS
- if (unlikely(test_bit(BTRFS_ROOT_DUMMY_ROOT, &root->state)))
+ if (unlikely(test_bit(BTRFS_FS_STATE_DUMMY_FS_INFO,
+ &fs_info->fs_state)))
return 1;
#endif
return 0;
}
-
#endif
diff --git a/fs/btrfs/dedupe.h b/fs/btrfs/dedupe.h
new file mode 100644
index 000000000..83ebfe28d
--- /dev/null
+++ b/fs/btrfs/dedupe.h
@@ -0,0 +1,24 @@
+/*
+ * Copyright (C) 2016 Fujitsu. All rights reserved.
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public
+ * License v2 as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public
+ * License along with this program; if not, write to the
+ * Free Software Foundation, Inc., 59 Temple Place - Suite 330,
+ * Boston, MA 021110-1307, USA.
+ */
+
+#ifndef __BTRFS_DEDUPE__
+#define __BTRFS_DEDUPE__
+
+/* later in-band dedupe will expand this struct */
+struct btrfs_dedupe_hash;
+#endif
diff --git a/fs/btrfs/delayed-inode.c b/fs/btrfs/delayed-inode.c
index d3aaabbfa..3eeb9cd8c 100644
--- a/fs/btrfs/delayed-inode.c
+++ b/fs/btrfs/delayed-inode.c
@@ -34,7 +34,7 @@ int __init btrfs_delayed_inode_init(void)
delayed_node_cache = kmem_cache_create("btrfs_delayed_node",
sizeof(struct btrfs_delayed_node),
0,
- SLAB_RECLAIM_ACCOUNT | SLAB_MEM_SPREAD,
+ SLAB_MEM_SPREAD,
NULL);
if (!delayed_node_cache)
return -ENOMEM;
@@ -553,7 +553,7 @@ static int btrfs_delayed_item_reserve_metadata(struct btrfs_trans_handle *trans,
dst_rsv = &root->fs_info->delayed_block_rsv;
num_bytes = btrfs_calc_trans_metadata_size(root, 1);
- ret = btrfs_block_rsv_migrate(src_rsv, dst_rsv, num_bytes);
+ ret = btrfs_block_rsv_migrate(src_rsv, dst_rsv, num_bytes, 1);
if (!ret) {
trace_btrfs_space_reservation(root->fs_info, "delayed_item",
item->key.objectid,
@@ -598,6 +598,29 @@ static int btrfs_delayed_inode_reserve_metadata(
num_bytes = btrfs_calc_trans_metadata_size(root, 1);
/*
+ * If our block_rsv is the delalloc block reserve then check and see if
+ * we have our extra reservation for updating the inode. If not fall
+ * through and try to reserve space quickly.
+ *
+ * We used to try and steal from the delalloc block rsv or the global
+ * reserve, but we'd steal a full reservation, which isn't kind. We are
+ * here through delalloc which means we've likely just cowed down close
+ * to the leaf that contains the inode, so we would steal less just
+ * doing the fallback inode update, so if we do end up having to steal
+ * from the global block rsv we hopefully only steal one or two blocks
+ * worth which is less likely to hurt us.
+ */
+ if (src_rsv && src_rsv->type == BTRFS_BLOCK_RSV_DELALLOC) {
+ spin_lock(&BTRFS_I(inode)->lock);
+ if (test_and_clear_bit(BTRFS_INODE_DELALLOC_META_RESERVED,
+ &BTRFS_I(inode)->runtime_flags))
+ release = true;
+ else
+ src_rsv = NULL;
+ spin_unlock(&BTRFS_I(inode)->lock);
+ }
+
+ /*
* btrfs_dirty_inode will update the inode under btrfs_join_transaction
* which doesn't reserve space for speed. This is a problem since we
* still need to reserve space for this update, so try to reserve the
@@ -626,51 +649,10 @@ static int btrfs_delayed_inode_reserve_metadata(
num_bytes, 1);
}
return ret;
- } else if (src_rsv->type == BTRFS_BLOCK_RSV_DELALLOC) {
- spin_lock(&BTRFS_I(inode)->lock);
- if (test_and_clear_bit(BTRFS_INODE_DELALLOC_META_RESERVED,
- &BTRFS_I(inode)->runtime_flags)) {
- spin_unlock(&BTRFS_I(inode)->lock);
- release = true;
- goto migrate;
- }
- spin_unlock(&BTRFS_I(inode)->lock);
-
- /* Ok we didn't have space pre-reserved. This shouldn't happen
- * too often but it can happen if we do delalloc to an existing
- * inode which gets dirtied because of the time update, and then
- * isn't touched again until after the transaction commits and
- * then we try to write out the data. First try to be nice and
- * reserve something strictly for us. If not be a pain and try
- * to steal from the delalloc block rsv.
- */
- ret = btrfs_block_rsv_add(root, dst_rsv, num_bytes,
- BTRFS_RESERVE_NO_FLUSH);
- if (!ret)
- goto out;
-
- ret = btrfs_block_rsv_migrate(src_rsv, dst_rsv, num_bytes);
- if (!ret)
- goto out;
-
- if (btrfs_test_opt(root, ENOSPC_DEBUG)) {
- btrfs_debug(root->fs_info,
- "block rsv migrate returned %d", ret);
- WARN_ON(1);
- }
- /*
- * Ok this is a problem, let's just steal from the global rsv
- * since this really shouldn't happen that often.
- */
- ret = btrfs_block_rsv_migrate(&root->fs_info->global_block_rsv,
- dst_rsv, num_bytes);
- goto out;
}
-migrate:
- ret = btrfs_block_rsv_migrate(src_rsv, dst_rsv, num_bytes);
+ ret = btrfs_block_rsv_migrate(src_rsv, dst_rsv, num_bytes, 1);
-out:
/*
* Migrate only takes a reservation, it doesn't touch the size of the
* block_rsv. This is to simplify people who don't normally have things
@@ -1188,7 +1170,7 @@ static int __btrfs_run_delayed_items(struct btrfs_trans_handle *trans,
if (ret) {
btrfs_release_delayed_node(curr_node);
curr_node = NULL;
- btrfs_abort_transaction(trans, root, ret);
+ btrfs_abort_transaction(trans, ret);
break;
}
diff --git a/fs/btrfs/delayed-ref.c b/fs/btrfs/delayed-ref.c
index 430b3689b..ac02e0414 100644
--- a/fs/btrfs/delayed-ref.c
+++ b/fs/btrfs/delayed-ref.c
@@ -541,7 +541,6 @@ add_delayed_ref_head(struct btrfs_fs_info *fs_info,
struct btrfs_delayed_ref_head *existing;
struct btrfs_delayed_ref_head *head_ref = NULL;
struct btrfs_delayed_ref_root *delayed_refs;
- struct btrfs_qgroup_extent_record *qexisting;
int count_mod = 1;
int must_insert_reserved = 0;
@@ -606,16 +605,15 @@ add_delayed_ref_head(struct btrfs_fs_info *fs_info,
qrecord->num_bytes = num_bytes;
qrecord->old_roots = NULL;
- qexisting = btrfs_qgroup_insert_dirty_extent(delayed_refs,
- qrecord);
- if (qexisting)
+ if(btrfs_qgroup_insert_dirty_extent_nolock(fs_info,
+ delayed_refs, qrecord))
kfree(qrecord);
}
spin_lock_init(&head_ref->lock);
mutex_init(&head_ref->mutex);
- trace_add_delayed_ref_head(ref, head_ref, action);
+ trace_add_delayed_ref_head(fs_info, ref, head_ref, action);
existing = htree_insert(&delayed_refs->href_root,
&head_ref->href_node);
@@ -682,7 +680,7 @@ add_delayed_tree_ref(struct btrfs_fs_info *fs_info,
ref->type = BTRFS_TREE_BLOCK_REF_KEY;
full_ref->level = level;
- trace_add_delayed_tree_ref(ref, full_ref, action);
+ trace_add_delayed_tree_ref(fs_info, ref, full_ref, action);
ret = add_delayed_ref_tail_merge(trans, delayed_refs, head_ref, ref);
@@ -739,7 +737,7 @@ add_delayed_data_ref(struct btrfs_fs_info *fs_info,
full_ref->objectid = owner;
full_ref->offset = offset;
- trace_add_delayed_data_ref(ref, full_ref, action);
+ trace_add_delayed_data_ref(fs_info, ref, full_ref, action);
ret = add_delayed_ref_tail_merge(trans, delayed_refs, head_ref, ref);
@@ -861,33 +859,6 @@ int btrfs_add_delayed_data_ref(struct btrfs_fs_info *fs_info,
return 0;
}
-int btrfs_add_delayed_qgroup_reserve(struct btrfs_fs_info *fs_info,
- struct btrfs_trans_handle *trans,
- u64 ref_root, u64 bytenr, u64 num_bytes)
-{
- struct btrfs_delayed_ref_root *delayed_refs;
- struct btrfs_delayed_ref_head *ref_head;
- int ret = 0;
-
- if (!fs_info->quota_enabled || !is_fstree(ref_root))
- return 0;
-
- delayed_refs = &trans->transaction->delayed_refs;
-
- spin_lock(&delayed_refs->lock);
- ref_head = find_ref_head(&delayed_refs->href_root, bytenr, 0);
- if (!ref_head) {
- ret = -ENOENT;
- goto out;
- }
- WARN_ON(ref_head->qgroup_reserved || ref_head->qgroup_ref_root);
- ref_head->qgroup_ref_root = ref_root;
- ref_head->qgroup_reserved = num_bytes;
-out:
- spin_unlock(&delayed_refs->lock);
- return ret;
-}
-
int btrfs_add_delayed_extent_op(struct btrfs_fs_info *fs_info,
struct btrfs_trans_handle *trans,
u64 bytenr, u64 num_bytes,
@@ -940,28 +911,28 @@ int btrfs_delayed_ref_init(void)
btrfs_delayed_ref_head_cachep = kmem_cache_create(
"btrfs_delayed_ref_head",
sizeof(struct btrfs_delayed_ref_head), 0,
- SLAB_RECLAIM_ACCOUNT | SLAB_MEM_SPREAD, NULL);
+ SLAB_MEM_SPREAD, NULL);
if (!btrfs_delayed_ref_head_cachep)
goto fail;
btrfs_delayed_tree_ref_cachep = kmem_cache_create(
"btrfs_delayed_tree_ref",
sizeof(struct btrfs_delayed_tree_ref), 0,
- SLAB_RECLAIM_ACCOUNT | SLAB_MEM_SPREAD, NULL);
+ SLAB_MEM_SPREAD, NULL);
if (!btrfs_delayed_tree_ref_cachep)
goto fail;
btrfs_delayed_data_ref_cachep = kmem_cache_create(
"btrfs_delayed_data_ref",
sizeof(struct btrfs_delayed_data_ref), 0,
- SLAB_RECLAIM_ACCOUNT | SLAB_MEM_SPREAD, NULL);
+ SLAB_MEM_SPREAD, NULL);
if (!btrfs_delayed_data_ref_cachep)
goto fail;
btrfs_delayed_extent_op_cachep = kmem_cache_create(
"btrfs_delayed_extent_op",
sizeof(struct btrfs_delayed_extent_op), 0,
- SLAB_RECLAIM_ACCOUNT | SLAB_MEM_SPREAD, NULL);
+ SLAB_MEM_SPREAD, NULL);
if (!btrfs_delayed_extent_op_cachep)
goto fail;
diff --git a/fs/btrfs/delayed-ref.h b/fs/btrfs/delayed-ref.h
index 5fca9534a..43f362976 100644
--- a/fs/btrfs/delayed-ref.h
+++ b/fs/btrfs/delayed-ref.h
@@ -250,9 +250,6 @@ int btrfs_add_delayed_data_ref(struct btrfs_fs_info *fs_info,
u64 parent, u64 ref_root,
u64 owner, u64 offset, u64 reserved, int action,
struct btrfs_delayed_extent_op *extent_op);
-int btrfs_add_delayed_qgroup_reserve(struct btrfs_fs_info *fs_info,
- struct btrfs_trans_handle *trans,
- u64 ref_root, u64 bytenr, u64 num_bytes);
int btrfs_add_delayed_extent_op(struct btrfs_fs_info *fs_info,
struct btrfs_trans_handle *trans,
u64 bytenr, u64 num_bytes,
diff --git a/fs/btrfs/dev-replace.c b/fs/btrfs/dev-replace.c
index 63ef9cdf0..e9bbff3c0 100644
--- a/fs/btrfs/dev-replace.c
+++ b/fs/btrfs/dev-replace.c
@@ -142,7 +142,7 @@ no_valid_dev_replace_entry_found:
* missing
*/
if (!dev_replace->srcdev &&
- !btrfs_test_opt(dev_root, DEGRADED)) {
+ !btrfs_test_opt(dev_root->fs_info, DEGRADED)) {
ret = -EIO;
btrfs_warn(fs_info,
"cannot mount because device replace operation is ongoing and");
@@ -151,7 +151,7 @@ no_valid_dev_replace_entry_found:
src_devid);
}
if (!dev_replace->tgtdev &&
- !btrfs_test_opt(dev_root, DEGRADED)) {
+ !btrfs_test_opt(dev_root->fs_info, DEGRADED)) {
ret = -EIO;
btrfs_warn(fs_info,
"cannot mount because device replace operation is ongoing and");
diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c
index 864cf3be0..54bc8c7c6 100644
--- a/fs/btrfs/disk-io.c
+++ b/fs/btrfs/disk-io.c
@@ -101,7 +101,7 @@ int __init btrfs_end_io_wq_init(void)
btrfs_end_io_wq_cache = kmem_cache_create("btrfs_end_io_wq",
sizeof(struct btrfs_end_io_wq),
0,
- SLAB_RECLAIM_ACCOUNT | SLAB_MEM_SPREAD,
+ SLAB_MEM_SPREAD,
NULL);
if (!btrfs_end_io_wq_cache)
return -ENOMEM;
@@ -124,7 +124,6 @@ struct async_submit_bio {
struct list_head list;
extent_submit_bio_hook_t *submit_bio_start;
extent_submit_bio_hook_t *submit_bio_done;
- int rw;
int mirror_num;
unsigned long bio_flags;
/*
@@ -560,8 +559,29 @@ static noinline int check_leaf(struct btrfs_root *root,
u32 nritems = btrfs_header_nritems(leaf);
int slot;
- if (nritems == 0)
+ if (nritems == 0) {
+ struct btrfs_root *check_root;
+
+ key.objectid = btrfs_header_owner(leaf);
+ key.type = BTRFS_ROOT_ITEM_KEY;
+ key.offset = (u64)-1;
+
+ check_root = btrfs_get_fs_root(root->fs_info, &key, false);
+ /*
+ * The only reason we also check NULL here is that during
+ * open_ctree() some roots has not yet been set up.
+ */
+ if (!IS_ERR_OR_NULL(check_root)) {
+ /* if leaf is the root, then it's fine */
+ if (leaf->start !=
+ btrfs_root_bytenr(&check_root->root_item)) {
+ CORRUPT("non-root leaf's nritems is 0",
+ leaf, root, 0);
+ return -EIO;
+ }
+ }
return 0;
+ }
/* Check the 0 item */
if (btrfs_item_offset_nr(leaf, 0) + btrfs_item_size_nr(leaf, 0) !=
@@ -613,6 +633,19 @@ static noinline int check_leaf(struct btrfs_root *root,
return 0;
}
+static int check_node(struct btrfs_root *root, struct extent_buffer *node)
+{
+ unsigned long nr = btrfs_header_nritems(node);
+
+ if (nr == 0 || nr > BTRFS_NODEPTRS_PER_BLOCK(root)) {
+ btrfs_crit(root->fs_info,
+ "corrupt node: block %llu root %llu nritems %lu",
+ node->start, root->objectid, nr);
+ return -EIO;
+ }
+ return 0;
+}
+
static int btree_readpage_end_io_hook(struct btrfs_io_bio *io_bio,
u64 phy_offset, struct page *page,
u64 start, u64 end, int mirror)
@@ -683,6 +716,9 @@ static int btree_readpage_end_io_hook(struct btrfs_io_bio *io_bio,
ret = -EIO;
}
+ if (found_level > 0 && check_node(root, eb))
+ ret = -EIO;
+
if (!ret)
set_extent_buffer_uptodate(eb);
err:
@@ -727,7 +763,7 @@ static void end_workqueue_bio(struct bio *bio)
fs_info = end_io_wq->info;
end_io_wq->error = bio->bi_error;
- if (bio->bi_rw & REQ_WRITE) {
+ if (bio_op(bio) == REQ_OP_WRITE) {
if (end_io_wq->metadata == BTRFS_WQ_ENDIO_METADATA) {
wq = fs_info->endio_meta_write_workers;
func = btrfs_endio_meta_write_helper;
@@ -797,7 +833,7 @@ static void run_one_async_start(struct btrfs_work *work)
int ret;
async = container_of(work, struct async_submit_bio, work);
- ret = async->submit_bio_start(async->inode, async->rw, async->bio,
+ ret = async->submit_bio_start(async->inode, async->bio,
async->mirror_num, async->bio_flags,
async->bio_offset);
if (ret)
@@ -830,9 +866,8 @@ static void run_one_async_done(struct btrfs_work *work)
return;
}
- async->submit_bio_done(async->inode, async->rw, async->bio,
- async->mirror_num, async->bio_flags,
- async->bio_offset);
+ async->submit_bio_done(async->inode, async->bio, async->mirror_num,
+ async->bio_flags, async->bio_offset);
}
static void run_one_async_free(struct btrfs_work *work)
@@ -844,7 +879,7 @@ static void run_one_async_free(struct btrfs_work *work)
}
int btrfs_wq_submit_bio(struct btrfs_fs_info *fs_info, struct inode *inode,
- int rw, struct bio *bio, int mirror_num,
+ struct bio *bio, int mirror_num,
unsigned long bio_flags,
u64 bio_offset,
extent_submit_bio_hook_t *submit_bio_start,
@@ -857,7 +892,6 @@ int btrfs_wq_submit_bio(struct btrfs_fs_info *fs_info, struct inode *inode,
return -ENOMEM;
async->inode = inode;
- async->rw = rw;
async->bio = bio;
async->mirror_num = mirror_num;
async->submit_bio_start = submit_bio_start;
@@ -873,7 +907,7 @@ int btrfs_wq_submit_bio(struct btrfs_fs_info *fs_info, struct inode *inode,
atomic_inc(&fs_info->nr_async_submits);
- if (rw & REQ_SYNC)
+ if (bio->bi_opf & REQ_SYNC)
btrfs_set_work_high_priority(&async->work);
btrfs_queue_work(fs_info->workers, &async->work);
@@ -903,9 +937,8 @@ static int btree_csum_one_bio(struct bio *bio)
return ret;
}
-static int __btree_submit_bio_start(struct inode *inode, int rw,
- struct bio *bio, int mirror_num,
- unsigned long bio_flags,
+static int __btree_submit_bio_start(struct inode *inode, struct bio *bio,
+ int mirror_num, unsigned long bio_flags,
u64 bio_offset)
{
/*
@@ -915,7 +948,7 @@ static int __btree_submit_bio_start(struct inode *inode, int rw,
return btree_csum_one_bio(bio);
}
-static int __btree_submit_bio_done(struct inode *inode, int rw, struct bio *bio,
+static int __btree_submit_bio_done(struct inode *inode, struct bio *bio,
int mirror_num, unsigned long bio_flags,
u64 bio_offset)
{
@@ -925,7 +958,7 @@ static int __btree_submit_bio_done(struct inode *inode, int rw, struct bio *bio,
* when we're called for a write, we're already in the async
* submission context. Just jump into btrfs_map_bio
*/
- ret = btrfs_map_bio(BTRFS_I(inode)->root, rw, bio, mirror_num, 1);
+ ret = btrfs_map_bio(BTRFS_I(inode)->root, bio, mirror_num, 1);
if (ret) {
bio->bi_error = ret;
bio_endio(bio);
@@ -944,14 +977,14 @@ static int check_async_write(struct inode *inode, unsigned long bio_flags)
return 1;
}
-static int btree_submit_bio_hook(struct inode *inode, int rw, struct bio *bio,
+static int btree_submit_bio_hook(struct inode *inode, struct bio *bio,
int mirror_num, unsigned long bio_flags,
u64 bio_offset)
{
int async = check_async_write(inode, bio_flags);
int ret;
- if (!(rw & REQ_WRITE)) {
+ if (bio_op(bio) != REQ_OP_WRITE) {
/*
* called for a read, do the setup so that checksum validation
* can happen in the async kernel threads
@@ -960,21 +993,19 @@ static int btree_submit_bio_hook(struct inode *inode, int rw, struct bio *bio,
bio, BTRFS_WQ_ENDIO_METADATA);
if (ret)
goto out_w_error;
- ret = btrfs_map_bio(BTRFS_I(inode)->root, rw, bio,
- mirror_num, 0);
+ ret = btrfs_map_bio(BTRFS_I(inode)->root, bio, mirror_num, 0);
} else if (!async) {
ret = btree_csum_one_bio(bio);
if (ret)
goto out_w_error;
- ret = btrfs_map_bio(BTRFS_I(inode)->root, rw, bio,
- mirror_num, 0);
+ ret = btrfs_map_bio(BTRFS_I(inode)->root, bio, mirror_num, 0);
} else {
/*
* kthread helpers are used to submit writes so that
* checksumming can happen in parallel across all CPUs
*/
ret = btrfs_wq_submit_bio(BTRFS_I(inode)->root->fs_info,
- inode, rw, bio, mirror_num, 0,
+ inode, bio, mirror_num, 0,
bio_offset,
__btree_submit_bio_start,
__btree_submit_bio_done);
@@ -1146,7 +1177,7 @@ struct extent_buffer *btrfs_find_tree_block(struct btrfs_fs_info *fs_info,
struct extent_buffer *btrfs_find_create_tree_block(struct btrfs_root *root,
u64 bytenr)
{
- if (btrfs_test_is_dummy_root(root))
+ if (btrfs_is_testing(root->fs_info))
return alloc_test_extent_buffer(root->fs_info, bytenr,
root->nodesize);
return alloc_extent_buffer(root->fs_info, bytenr);
@@ -1233,6 +1264,7 @@ static void __setup_root(u32 nodesize, u32 sectorsize, u32 stripesize,
struct btrfs_root *root, struct btrfs_fs_info *fs_info,
u64 objectid)
{
+ bool dummy = test_bit(BTRFS_FS_STATE_DUMMY_FS_INFO, &fs_info->fs_state);
root->node = NULL;
root->commit_root = NULL;
root->sectorsize = sectorsize;
@@ -1287,14 +1319,14 @@ static void __setup_root(u32 nodesize, u32 sectorsize, u32 stripesize,
root->log_transid = 0;
root->log_transid_committed = -1;
root->last_log_commit = 0;
- if (fs_info)
+ if (!dummy)
extent_io_tree_init(&root->dirty_log_pages,
fs_info->btree_inode->i_mapping);
memset(&root->root_key, 0, sizeof(root->root_key));
memset(&root->root_item, 0, sizeof(root->root_item));
memset(&root->defrag_progress, 0, sizeof(root->defrag_progress));
- if (fs_info)
+ if (!dummy)
root->defrag_trans_start = fs_info->generation;
else
root->defrag_trans_start = 0;
@@ -1315,17 +1347,20 @@ static struct btrfs_root *btrfs_alloc_root(struct btrfs_fs_info *fs_info,
#ifdef CONFIG_BTRFS_FS_RUN_SANITY_TESTS
/* Should only be used by the testing infrastructure */
-struct btrfs_root *btrfs_alloc_dummy_root(u32 sectorsize, u32 nodesize)
+struct btrfs_root *btrfs_alloc_dummy_root(struct btrfs_fs_info *fs_info,
+ u32 sectorsize, u32 nodesize)
{
struct btrfs_root *root;
- root = btrfs_alloc_root(NULL, GFP_KERNEL);
+ if (!fs_info)
+ return ERR_PTR(-EINVAL);
+
+ root = btrfs_alloc_root(fs_info, GFP_KERNEL);
if (!root)
return ERR_PTR(-ENOMEM);
/* We don't use the stripesize in selftest, set it as sectorsize */
- __setup_root(nodesize, sectorsize, sectorsize, root, NULL,
+ __setup_root(nodesize, sectorsize, sectorsize, root, fs_info,
BTRFS_ROOT_TREE_OBJECTID);
- set_bit(BTRFS_ROOT_DUMMY_ROOT, &root->state);
root->alloc_bytenr = 0;
return root;
@@ -1600,14 +1635,14 @@ int btrfs_init_fs_root(struct btrfs_root *root)
ret = get_anon_bdev(&root->anon_dev);
if (ret)
- goto free_writers;
+ goto fail;
mutex_lock(&root->objectid_mutex);
ret = btrfs_find_highest_objectid(root,
&root->highest_objectid);
if (ret) {
mutex_unlock(&root->objectid_mutex);
- goto free_root_dev;
+ goto fail;
}
ASSERT(root->highest_objectid <= BTRFS_LAST_FREE_OBJECTID);
@@ -1615,14 +1650,8 @@ int btrfs_init_fs_root(struct btrfs_root *root)
mutex_unlock(&root->objectid_mutex);
return 0;
-
-free_root_dev:
- free_anon_bdev(root->anon_dev);
-free_writers:
- btrfs_free_subvolume_writers(root->subv_writers);
fail:
- kfree(root->free_ino_ctl);
- kfree(root->free_ino_pinned);
+ /* the caller is responsible to call free_fs_root */
return ret;
}
@@ -2317,17 +2346,19 @@ static int btrfs_init_workqueues(struct btrfs_fs_info *fs_info,
unsigned int flags = WQ_MEM_RECLAIM | WQ_FREEZABLE | WQ_UNBOUND;
fs_info->workers =
- btrfs_alloc_workqueue("worker", flags | WQ_HIGHPRI,
- max_active, 16);
+ btrfs_alloc_workqueue(fs_info, "worker",
+ flags | WQ_HIGHPRI, max_active, 16);
fs_info->delalloc_workers =
- btrfs_alloc_workqueue("delalloc", flags, max_active, 2);
+ btrfs_alloc_workqueue(fs_info, "delalloc",
+ flags, max_active, 2);
fs_info->flush_workers =
- btrfs_alloc_workqueue("flush_delalloc", flags, max_active, 0);
+ btrfs_alloc_workqueue(fs_info, "flush_delalloc",
+ flags, max_active, 0);
fs_info->caching_workers =
- btrfs_alloc_workqueue("cache", flags, max_active, 0);
+ btrfs_alloc_workqueue(fs_info, "cache", flags, max_active, 0);
/*
* a higher idle thresh on the submit workers makes it much more
@@ -2335,41 +2366,48 @@ static int btrfs_init_workqueues(struct btrfs_fs_info *fs_info,
* devices
*/
fs_info->submit_workers =
- btrfs_alloc_workqueue("submit", flags,
+ btrfs_alloc_workqueue(fs_info, "submit", flags,
min_t(u64, fs_devices->num_devices,
max_active), 64);
fs_info->fixup_workers =
- btrfs_alloc_workqueue("fixup", flags, 1, 0);
+ btrfs_alloc_workqueue(fs_info, "fixup", flags, 1, 0);
/*
* endios are largely parallel and should have a very
* low idle thresh
*/
fs_info->endio_workers =
- btrfs_alloc_workqueue("endio", flags, max_active, 4);
+ btrfs_alloc_workqueue(fs_info, "endio", flags, max_active, 4);
fs_info->endio_meta_workers =
- btrfs_alloc_workqueue("endio-meta", flags, max_active, 4);
+ btrfs_alloc_workqueue(fs_info, "endio-meta", flags,
+ max_active, 4);
fs_info->endio_meta_write_workers =
- btrfs_alloc_workqueue("endio-meta-write", flags, max_active, 2);
+ btrfs_alloc_workqueue(fs_info, "endio-meta-write", flags,
+ max_active, 2);
fs_info->endio_raid56_workers =
- btrfs_alloc_workqueue("endio-raid56", flags, max_active, 4);
+ btrfs_alloc_workqueue(fs_info, "endio-raid56", flags,
+ max_active, 4);
fs_info->endio_repair_workers =
- btrfs_alloc_workqueue("endio-repair", flags, 1, 0);
+ btrfs_alloc_workqueue(fs_info, "endio-repair", flags, 1, 0);
fs_info->rmw_workers =
- btrfs_alloc_workqueue("rmw", flags, max_active, 2);
+ btrfs_alloc_workqueue(fs_info, "rmw", flags, max_active, 2);
fs_info->endio_write_workers =
- btrfs_alloc_workqueue("endio-write", flags, max_active, 2);
+ btrfs_alloc_workqueue(fs_info, "endio-write", flags,
+ max_active, 2);
fs_info->endio_freespace_worker =
- btrfs_alloc_workqueue("freespace-write", flags, max_active, 0);
+ btrfs_alloc_workqueue(fs_info, "freespace-write", flags,
+ max_active, 0);
fs_info->delayed_workers =
- btrfs_alloc_workqueue("delayed-meta", flags, max_active, 0);
+ btrfs_alloc_workqueue(fs_info, "delayed-meta", flags,
+ max_active, 0);
fs_info->readahead_workers =
- btrfs_alloc_workqueue("readahead", flags, max_active, 2);
+ btrfs_alloc_workqueue(fs_info, "readahead", flags,
+ max_active, 2);
fs_info->qgroup_rescan_workers =
- btrfs_alloc_workqueue("qgroup-rescan", flags, 1, 0);
+ btrfs_alloc_workqueue(fs_info, "qgroup-rescan", flags, 1, 0);
fs_info->extent_workers =
- btrfs_alloc_workqueue("extent-refs", flags,
+ btrfs_alloc_workqueue(fs_info, "extent-refs", flags,
min_t(u64, fs_devices->num_devices,
max_active), 8);
@@ -2624,6 +2662,7 @@ int open_ctree(struct super_block *sb,
atomic_set(&fs_info->qgroup_op_seq, 0);
atomic_set(&fs_info->reada_works_cnt, 0);
atomic64_set(&fs_info->tree_mod_seq, 0);
+ fs_info->fs_frozen = 0;
fs_info->sb = sb;
fs_info->max_inline = BTRFS_DEFAULT_MAX_INLINE;
fs_info->metadata_ratio = 0;
@@ -3017,8 +3056,8 @@ retry_root_backup:
if (IS_ERR(fs_info->transaction_kthread))
goto fail_cleaner;
- if (!btrfs_test_opt(tree_root, SSD) &&
- !btrfs_test_opt(tree_root, NOSSD) &&
+ if (!btrfs_test_opt(tree_root->fs_info, SSD) &&
+ !btrfs_test_opt(tree_root->fs_info, NOSSD) &&
!fs_info->fs_devices->rotating) {
btrfs_info(fs_info, "detected SSD devices, enabling SSD mode");
btrfs_set_opt(fs_info->mount_opt, SSD);
@@ -3031,9 +3070,9 @@ retry_root_backup:
btrfs_apply_pending_changes(fs_info);
#ifdef CONFIG_BTRFS_FS_CHECK_INTEGRITY
- if (btrfs_test_opt(tree_root, CHECK_INTEGRITY)) {
+ if (btrfs_test_opt(tree_root->fs_info, CHECK_INTEGRITY)) {
ret = btrfsic_mount(tree_root, fs_devices,
- btrfs_test_opt(tree_root,
+ btrfs_test_opt(tree_root->fs_info,
CHECK_INTEGRITY_INCLUDING_EXTENT_DATA) ?
1 : 0,
fs_info->check_integrity_print_mask);
@@ -3049,7 +3088,7 @@ retry_root_backup:
/* do not make disk changes in broken FS or nologreplay is given */
if (btrfs_super_log_root(disk_super) != 0 &&
- !btrfs_test_opt(tree_root, NOLOGREPLAY)) {
+ !btrfs_test_opt(tree_root->fs_info, NOLOGREPLAY)) {
ret = btrfs_replay_log(fs_info, fs_devices);
if (ret) {
err = ret;
@@ -3090,7 +3129,7 @@ retry_root_backup:
if (sb->s_flags & MS_RDONLY)
return 0;
- if (btrfs_test_opt(tree_root, FREE_SPACE_TREE) &&
+ if (btrfs_test_opt(tree_root->fs_info, FREE_SPACE_TREE) &&
!btrfs_fs_compat_ro(fs_info, FREE_SPACE_TREE)) {
btrfs_info(fs_info, "creating free space tree");
ret = btrfs_create_free_space_tree(fs_info);
@@ -3127,7 +3166,7 @@ retry_root_backup:
btrfs_qgroup_rescan_resume(fs_info);
- if (btrfs_test_opt(tree_root, CLEAR_CACHE) &&
+ if (btrfs_test_opt(tree_root->fs_info, CLEAR_CACHE) &&
btrfs_fs_compat_ro(fs_info, FREE_SPACE_TREE)) {
btrfs_info(fs_info, "clearing free space tree");
ret = btrfs_clear_free_space_tree(fs_info);
@@ -3148,7 +3187,7 @@ retry_root_backup:
close_ctree(tree_root);
return ret;
}
- } else if (btrfs_test_opt(tree_root, RESCAN_UUID_TREE) ||
+ } else if (btrfs_test_opt(tree_root->fs_info, RESCAN_UUID_TREE) ||
fs_info->generation !=
btrfs_super_uuid_tree_generation(disk_super)) {
btrfs_info(fs_info, "checking UUID tree");
@@ -3225,7 +3264,7 @@ fail:
return err;
recovery_tree_root:
- if (!btrfs_test_opt(tree_root, USEBACKUPROOT))
+ if (!btrfs_test_opt(tree_root->fs_info, USEBACKUPROOT))
goto fail_tree_roots;
free_root_pointers(fs_info, 0);
@@ -3419,9 +3458,9 @@ static int write_dev_supers(struct btrfs_device *device,
* to go down lazy.
*/
if (i == 0)
- ret = btrfsic_submit_bh(WRITE_FUA, bh);
+ ret = btrfsic_submit_bh(REQ_OP_WRITE, WRITE_FUA, bh);
else
- ret = btrfsic_submit_bh(WRITE_SYNC, bh);
+ ret = btrfsic_submit_bh(REQ_OP_WRITE, WRITE_SYNC, bh);
if (ret)
errors++;
}
@@ -3485,12 +3524,13 @@ static int write_dev_flush(struct btrfs_device *device, int wait)
bio->bi_end_io = btrfs_end_empty_barrier;
bio->bi_bdev = device->bdev;
+ bio_set_op_attrs(bio, REQ_OP_WRITE, WRITE_FLUSH);
init_completion(&device->flush_wait);
bio->bi_private = &device->flush_wait;
device->flush_bio = bio;
bio_get(bio);
- btrfsic_submit_bio(WRITE_FLUSH, bio);
+ btrfsic_submit_bio(bio);
return 0;
}
@@ -3640,7 +3680,7 @@ static int write_all_supers(struct btrfs_root *root, int max_mirrors)
int total_errors = 0;
u64 flags;
- do_barriers = !btrfs_test_opt(root, NOBARRIER);
+ do_barriers = !btrfs_test_opt(root->fs_info, NOBARRIER);
backup_super_roots(root->fs_info);
sb = root->fs_info->super_for_commit;
@@ -3738,8 +3778,15 @@ void btrfs_drop_and_free_fs_root(struct btrfs_fs_info *fs_info,
if (btrfs_root_refs(&root->root_item) == 0)
synchronize_srcu(&fs_info->subvol_srcu);
- if (test_bit(BTRFS_FS_STATE_ERROR, &fs_info->fs_state))
+ if (test_bit(BTRFS_FS_STATE_ERROR, &fs_info->fs_state)) {
btrfs_free_log(NULL, root);
+ if (root->reloc_root) {
+ free_extent_buffer(root->reloc_root->node);
+ free_extent_buffer(root->reloc_root->commit_root);
+ btrfs_put_fs_root(root->reloc_root);
+ root->reloc_root = NULL;
+ }
+ }
if (root->free_ino_pinned)
__btrfs_remove_free_space_cache(root->free_ino_pinned);
@@ -3924,7 +3971,7 @@ void close_ctree(struct btrfs_root *root)
iput(fs_info->btree_inode);
#ifdef CONFIG_BTRFS_FS_CHECK_INTEGRITY
- if (btrfs_test_opt(root, CHECK_INTEGRITY))
+ if (btrfs_test_opt(root->fs_info, CHECK_INTEGRITY))
btrfsic_unmount(root, fs_info->fs_devices);
#endif
diff --git a/fs/btrfs/disk-io.h b/fs/btrfs/disk-io.h
index 355e31f90..f19a982f5 100644
--- a/fs/btrfs/disk-io.h
+++ b/fs/btrfs/disk-io.h
@@ -92,7 +92,8 @@ void btrfs_drop_and_free_fs_root(struct btrfs_fs_info *fs_info,
void btrfs_free_fs_root(struct btrfs_root *root);
#ifdef CONFIG_BTRFS_FS_RUN_SANITY_TESTS
-struct btrfs_root *btrfs_alloc_dummy_root(u32 sectorsize, u32 nodesize);
+struct btrfs_root *btrfs_alloc_dummy_root(struct btrfs_fs_info *fs_info,
+ u32 sectorsize, u32 nodesize);
#endif
/*
@@ -124,7 +125,7 @@ void btrfs_csum_final(u32 crc, char *result);
int btrfs_bio_wq_end_io(struct btrfs_fs_info *info, struct bio *bio,
enum btrfs_wq_endio_type metadata);
int btrfs_wq_submit_bio(struct btrfs_fs_info *fs_info, struct inode *inode,
- int rw, struct bio *bio, int mirror_num,
+ struct bio *bio, int mirror_num,
unsigned long bio_flags, u64 bio_offset,
extent_submit_bio_hook_t *submit_bio_start,
extent_submit_bio_hook_t *submit_bio_done);
diff --git a/fs/btrfs/extent-tree.c b/fs/btrfs/extent-tree.c
index 82b912a29..665da8f66 100644
--- a/fs/btrfs/extent-tree.c
+++ b/fs/btrfs/extent-tree.c
@@ -60,21 +60,6 @@ enum {
CHUNK_ALLOC_FORCE = 2,
};
-/*
- * Control how reservations are dealt with.
- *
- * RESERVE_FREE - freeing a reservation.
- * RESERVE_ALLOC - allocating space and we need to update bytes_may_use for
- * ENOSPC accounting
- * RESERVE_ALLOC_NO_ACCOUNT - allocating space and we should not update
- * bytes_may_use as the ENOSPC accounting is done elsewhere
- */
-enum {
- RESERVE_FREE = 0,
- RESERVE_ALLOC = 1,
- RESERVE_ALLOC_NO_ACCOUNT = 2,
-};
-
static int update_block_group(struct btrfs_trans_handle *trans,
struct btrfs_root *root, u64 bytenr,
u64 num_bytes, int alloc);
@@ -104,13 +89,24 @@ static int find_next_key(struct btrfs_path *path, int level,
struct btrfs_key *key);
static void dump_space_info(struct btrfs_space_info *info, u64 bytes,
int dump_block_groups);
-static int btrfs_update_reserved_bytes(struct btrfs_block_group_cache *cache,
- u64 num_bytes, int reserve,
- int delalloc);
+static int btrfs_add_reserved_bytes(struct btrfs_block_group_cache *cache,
+ u64 ram_bytes, u64 num_bytes, int delalloc);
+static int btrfs_free_reserved_bytes(struct btrfs_block_group_cache *cache,
+ u64 num_bytes, int delalloc);
static int block_rsv_use_bytes(struct btrfs_block_rsv *block_rsv,
u64 num_bytes);
int btrfs_pin_extent(struct btrfs_root *root,
u64 bytenr, u64 num_bytes, int reserved);
+static int __reserve_metadata_bytes(struct btrfs_root *root,
+ struct btrfs_space_info *space_info,
+ u64 orig_bytes,
+ enum btrfs_reserve_flush_enum flush);
+static void space_info_add_new_bytes(struct btrfs_fs_info *fs_info,
+ struct btrfs_space_info *space_info,
+ u64 num_bytes);
+static void space_info_add_old_bytes(struct btrfs_fs_info *fs_info,
+ struct btrfs_space_info *space_info,
+ u64 num_bytes);
static noinline int
block_group_cache_done(struct btrfs_block_group_cache *cache)
@@ -2048,7 +2044,7 @@ int btrfs_discard_extent(struct btrfs_root *root, u64 bytenr,
*/
btrfs_bio_counter_inc_blocked(root->fs_info);
/* Tell the block device(s) that the sectors can be discarded */
- ret = btrfs_map_block(root->fs_info, REQ_DISCARD,
+ ret = btrfs_map_block(root->fs_info, REQ_OP_DISCARD,
bytenr, &num_bytes, &bbio, 0);
/* Error condition is -ENOMEM */
if (!ret) {
@@ -2170,7 +2166,7 @@ static int __btrfs_inc_extent_ref(struct btrfs_trans_handle *trans,
path, bytenr, parent, root_objectid,
owner, offset, refs_to_add);
if (ret)
- btrfs_abort_transaction(trans, root, ret);
+ btrfs_abort_transaction(trans, ret);
out:
btrfs_free_path(path);
return ret;
@@ -2194,7 +2190,7 @@ static int run_delayed_data_ref(struct btrfs_trans_handle *trans,
ins.type = BTRFS_EXTENT_ITEM_KEY;
ref = btrfs_delayed_node_to_data_ref(node);
- trace_run_delayed_data_ref(node, ref, node->action);
+ trace_run_delayed_data_ref(root->fs_info, node, ref, node->action);
if (node->type == BTRFS_SHARED_DATA_REF_KEY)
parent = ref->parent;
@@ -2349,7 +2345,7 @@ static int run_delayed_tree_ref(struct btrfs_trans_handle *trans,
SKINNY_METADATA);
ref = btrfs_delayed_node_to_tree_ref(node);
- trace_run_delayed_tree_ref(node, ref, node->action);
+ trace_run_delayed_tree_ref(root->fs_info, node, ref, node->action);
if (node->type == BTRFS_SHARED_BLOCK_REF_KEY)
parent = ref->parent;
@@ -2413,7 +2409,8 @@ static int run_one_delayed_ref(struct btrfs_trans_handle *trans,
*/
BUG_ON(extent_op);
head = btrfs_delayed_node_to_head(node);
- trace_run_delayed_ref_head(node, head, node->action);
+ trace_run_delayed_ref_head(root->fs_info, node, head,
+ node->action);
if (insert_reserved) {
btrfs_pin_extent(root, node->bytenr,
@@ -2768,7 +2765,7 @@ u64 btrfs_csum_bytes_to_leaves(struct btrfs_root *root, u64 csum_bytes)
u64 num_csums_per_leaf;
u64 num_csums;
- csum_size = BTRFS_LEAF_DATA_SIZE(root) - sizeof(struct btrfs_item);
+ csum_size = BTRFS_MAX_ITEM_SIZE(root);
num_csums_per_leaf = div64_u64(csum_size,
(u64)btrfs_super_csum_size(root->fs_info->super_copy));
num_csums = div64_u64(csum_bytes, root->sectorsize);
@@ -2960,7 +2957,7 @@ again:
trans->can_flush_pending_bgs = false;
ret = __btrfs_run_delayed_refs(trans, root, count);
if (ret < 0) {
- btrfs_abort_transaction(trans, root, ret);
+ btrfs_abort_transaction(trans, ret);
return ret;
}
@@ -3224,7 +3221,7 @@ static int __btrfs_mod_ref(struct btrfs_trans_handle *trans,
u64, u64, u64, u64, u64, u64);
- if (btrfs_test_is_dummy_root(root))
+ if (btrfs_is_testing(root->fs_info))
return 0;
ref_root = btrfs_header_owner(buf);
@@ -3419,7 +3416,7 @@ again:
* transaction, this only happens in really bad situations
* anyway.
*/
- btrfs_abort_transaction(trans, root, ret);
+ btrfs_abort_transaction(trans, ret);
goto out_put;
}
WARN_ON(ret);
@@ -3437,7 +3434,7 @@ again:
spin_lock(&block_group->lock);
if (block_group->cached != BTRFS_CACHE_FINISHED ||
- !btrfs_test_opt(root, SPACE_CACHE)) {
+ !btrfs_test_opt(root->fs_info, SPACE_CACHE)) {
/*
* don't bother trying to write stuff out _if_
* a) we're not cached,
@@ -3490,7 +3487,6 @@ again:
dcs = BTRFS_DC_SETUP;
else if (ret == -ENOSPC)
set_bit(BTRFS_TRANS_CACHE_ENOSPC, &trans->transaction->flags);
- btrfs_free_reserved_data_space(inode, 0, num_pages);
out_put:
iput(inode);
@@ -3514,7 +3510,7 @@ int btrfs_setup_space_cache(struct btrfs_trans_handle *trans,
struct btrfs_path *path;
if (list_empty(&cur_trans->dirty_bgs) ||
- !btrfs_test_opt(root, SPACE_CACHE))
+ !btrfs_test_opt(root->fs_info, SPACE_CACHE))
return 0;
path = btrfs_alloc_path();
@@ -3659,7 +3655,7 @@ again:
}
spin_unlock(&cur_trans->dirty_bgs_lock);
} else if (ret) {
- btrfs_abort_transaction(trans, root, ret);
+ btrfs_abort_transaction(trans, ret);
}
}
@@ -3805,7 +3801,7 @@ int btrfs_write_dirty_block_groups(struct btrfs_trans_handle *trans,
cache);
}
if (ret)
- btrfs_abort_transaction(trans, root, ret);
+ btrfs_abort_transaction(trans, ret);
}
/* if its not on the io list, we need to put the block group */
@@ -3913,6 +3909,7 @@ static const char *alloc_name(u64 flags)
static int update_space_info(struct btrfs_fs_info *info, u64 flags,
u64 total_bytes, u64 bytes_used,
+ u64 bytes_readonly,
struct btrfs_space_info **space_info)
{
struct btrfs_space_info *found;
@@ -3933,8 +3930,11 @@ static int update_space_info(struct btrfs_fs_info *info, u64 flags,
found->disk_total += total_bytes * factor;
found->bytes_used += bytes_used;
found->disk_used += bytes_used * factor;
+ found->bytes_readonly += bytes_readonly;
if (total_bytes > 0)
found->full = 0;
+ space_info_add_new_bytes(info, found, total_bytes -
+ bytes_used - bytes_readonly);
spin_unlock(&found->lock);
*space_info = found;
return 0;
@@ -3960,7 +3960,7 @@ static int update_space_info(struct btrfs_fs_info *info, u64 flags,
found->disk_used = bytes_used * factor;
found->bytes_pinned = 0;
found->bytes_reserved = 0;
- found->bytes_readonly = 0;
+ found->bytes_readonly = bytes_readonly;
found->bytes_may_use = 0;
found->full = 0;
found->max_extent_size = 0;
@@ -3969,6 +3969,8 @@ static int update_space_info(struct btrfs_fs_info *info, u64 flags,
found->flush = 0;
init_waitqueue_head(&found->wait);
INIT_LIST_HEAD(&found->ro_bgs);
+ INIT_LIST_HEAD(&found->tickets);
+ INIT_LIST_HEAD(&found->priority_tickets);
ret = kobject_init_and_add(&found->kobj, &space_info_ktype,
info->space_info_kobj, "%s",
@@ -4269,13 +4271,10 @@ int btrfs_check_data_free_space(struct inode *inode, u64 start, u64 len)
if (ret < 0)
return ret;
- /*
- * Use new btrfs_qgroup_reserve_data to reserve precious data space
- *
- * TODO: Find a good method to avoid reserve data space for NOCOW
- * range, but don't impact performance on quota disable case.
- */
+ /* Use new btrfs_qgroup_reserve_data to reserve precious data space. */
ret = btrfs_qgroup_reserve_data(inode, start, len);
+ if (ret)
+ btrfs_free_reserved_data_space_noquota(inode, start, len);
return ret;
}
@@ -4427,7 +4426,7 @@ void check_system_chunk(struct btrfs_trans_handle *trans,
thresh = btrfs_calc_trunc_metadata_size(root, num_devs) +
btrfs_calc_trans_metadata_size(root, 1);
- if (left < thresh && btrfs_test_opt(root, ENOSPC_DEBUG)) {
+ if (left < thresh && btrfs_test_opt(root->fs_info, ENOSPC_DEBUG)) {
btrfs_info(root->fs_info, "left=%llu, need=%llu, flags=%llu",
left, thresh, type);
dump_space_info(info, 0, 0);
@@ -4455,6 +4454,15 @@ void check_system_chunk(struct btrfs_trans_handle *trans,
}
}
+/*
+ * If force is CHUNK_ALLOC_FORCE:
+ * - return 1 if it successfully allocates a chunk,
+ * - return errors including -ENOSPC otherwise.
+ * If force is NOT CHUNK_ALLOC_FORCE:
+ * - return 0 if it doesn't need to allocate a new chunk,
+ * - return 1 if it successfully allocates a chunk,
+ * - return errors including -ENOSPC otherwise.
+ */
static int do_chunk_alloc(struct btrfs_trans_handle *trans,
struct btrfs_root *extent_root, u64 flags, int force)
{
@@ -4470,7 +4478,7 @@ static int do_chunk_alloc(struct btrfs_trans_handle *trans,
space_info = __find_space_info(extent_root->fs_info, flags);
if (!space_info) {
ret = update_space_info(extent_root->fs_info, flags,
- 0, 0, &space_info);
+ 0, 0, 0, &space_info);
BUG_ON(ret); /* -ENOMEM */
}
BUG_ON(!space_info); /* Logic error */
@@ -4572,7 +4580,7 @@ out:
*/
if (trans->can_flush_pending_bgs &&
trans->chunk_bytes_reserved >= (u64)SZ_2M) {
- btrfs_create_pending_block_groups(trans, trans->root);
+ btrfs_create_pending_block_groups(trans, extent_root);
btrfs_trans_release_chunk_metadata(trans);
}
return ret;
@@ -4582,12 +4590,19 @@ static int can_overcommit(struct btrfs_root *root,
struct btrfs_space_info *space_info, u64 bytes,
enum btrfs_reserve_flush_enum flush)
{
- struct btrfs_block_rsv *global_rsv = &root->fs_info->global_block_rsv;
- u64 profile = btrfs_get_alloc_profile(root, 0);
+ struct btrfs_block_rsv *global_rsv;
+ u64 profile;
u64 space_size;
u64 avail;
u64 used;
+ /* Don't overcommit when in mixed mode. */
+ if (space_info->flags & BTRFS_BLOCK_GROUP_DATA)
+ return 0;
+
+ BUG_ON(root->fs_info == NULL);
+ global_rsv = &root->fs_info->global_block_rsv;
+ profile = btrfs_get_alloc_profile(root, 0);
used = space_info->bytes_used + space_info->bytes_reserved +
space_info->bytes_pinned + space_info->bytes_readonly;
@@ -4739,6 +4754,11 @@ skip_async:
spin_unlock(&space_info->lock);
break;
}
+ if (list_empty(&space_info->tickets) &&
+ list_empty(&space_info->priority_tickets)) {
+ spin_unlock(&space_info->lock);
+ break;
+ }
spin_unlock(&space_info->lock);
loops++;
@@ -4807,13 +4827,11 @@ commit:
return btrfs_commit_transaction(trans, root);
}
-enum flush_state {
- FLUSH_DELAYED_ITEMS_NR = 1,
- FLUSH_DELAYED_ITEMS = 2,
- FLUSH_DELALLOC = 3,
- FLUSH_DELALLOC_WAIT = 4,
- ALLOC_CHUNK = 5,
- COMMIT_TRANS = 6,
+struct reserve_ticket {
+ u64 bytes;
+ int error;
+ struct list_head list;
+ wait_queue_head_t wait;
};
static int flush_space(struct btrfs_root *root,
@@ -4855,7 +4873,7 @@ static int flush_space(struct btrfs_root *root,
btrfs_get_alloc_profile(root, 0),
CHUNK_ALLOC_NO_FORCE);
btrfs_end_transaction(trans, root);
- if (ret == -ENOSPC)
+ if (ret > 0 || ret == -ENOSPC)
ret = 0;
break;
case COMMIT_TRANS:
@@ -4866,6 +4884,8 @@ static int flush_space(struct btrfs_root *root,
break;
}
+ trace_btrfs_flush_space(root->fs_info, space_info->flags, num_bytes,
+ orig_bytes, state, ret);
return ret;
}
@@ -4873,17 +4893,22 @@ static inline u64
btrfs_calc_reclaim_metadata_size(struct btrfs_root *root,
struct btrfs_space_info *space_info)
{
+ struct reserve_ticket *ticket;
u64 used;
u64 expected;
- u64 to_reclaim;
+ u64 to_reclaim = 0;
+
+ list_for_each_entry(ticket, &space_info->tickets, list)
+ to_reclaim += ticket->bytes;
+ list_for_each_entry(ticket, &space_info->priority_tickets, list)
+ to_reclaim += ticket->bytes;
+ if (to_reclaim)
+ return to_reclaim;
to_reclaim = min_t(u64, num_online_cpus() * SZ_1M, SZ_16M);
- spin_lock(&space_info->lock);
if (can_overcommit(root, space_info, to_reclaim,
- BTRFS_RESERVE_FLUSH_ALL)) {
- to_reclaim = 0;
- goto out;
- }
+ BTRFS_RESERVE_FLUSH_ALL))
+ return 0;
used = space_info->bytes_used + space_info->bytes_reserved +
space_info->bytes_pinned + space_info->bytes_readonly +
@@ -4899,14 +4924,11 @@ btrfs_calc_reclaim_metadata_size(struct btrfs_root *root,
to_reclaim = 0;
to_reclaim = min(to_reclaim, space_info->bytes_may_use +
space_info->bytes_reserved);
-out:
- spin_unlock(&space_info->lock);
-
return to_reclaim;
}
static inline int need_do_async_reclaim(struct btrfs_space_info *space_info,
- struct btrfs_fs_info *fs_info, u64 used)
+ struct btrfs_root *root, u64 used)
{
u64 thresh = div_factor_fine(space_info->total_bytes, 98);
@@ -4914,73 +4936,176 @@ static inline int need_do_async_reclaim(struct btrfs_space_info *space_info,
if ((space_info->bytes_used + space_info->bytes_reserved) >= thresh)
return 0;
- return (used >= thresh && !btrfs_fs_closing(fs_info) &&
- !test_bit(BTRFS_FS_STATE_REMOUNTING, &fs_info->fs_state));
+ if (!btrfs_calc_reclaim_metadata_size(root, space_info))
+ return 0;
+
+ return (used >= thresh && !btrfs_fs_closing(root->fs_info) &&
+ !test_bit(BTRFS_FS_STATE_REMOUNTING,
+ &root->fs_info->fs_state));
}
-static int btrfs_need_do_async_reclaim(struct btrfs_space_info *space_info,
- struct btrfs_fs_info *fs_info,
- int flush_state)
+static void wake_all_tickets(struct list_head *head)
{
- u64 used;
+ struct reserve_ticket *ticket;
- spin_lock(&space_info->lock);
- /*
- * We run out of space and have not got any free space via flush_space,
- * so don't bother doing async reclaim.
- */
- if (flush_state > COMMIT_TRANS && space_info->full) {
- spin_unlock(&space_info->lock);
- return 0;
- }
-
- used = space_info->bytes_used + space_info->bytes_reserved +
- space_info->bytes_pinned + space_info->bytes_readonly +
- space_info->bytes_may_use;
- if (need_do_async_reclaim(space_info, fs_info, used)) {
- spin_unlock(&space_info->lock);
- return 1;
+ while (!list_empty(head)) {
+ ticket = list_first_entry(head, struct reserve_ticket, list);
+ list_del_init(&ticket->list);
+ ticket->error = -ENOSPC;
+ wake_up(&ticket->wait);
}
- spin_unlock(&space_info->lock);
-
- return 0;
}
+/*
+ * This is for normal flushers, we can wait all goddamned day if we want to. We
+ * will loop and continuously try to flush as long as we are making progress.
+ * We count progress as clearing off tickets each time we have to loop.
+ */
static void btrfs_async_reclaim_metadata_space(struct work_struct *work)
{
struct btrfs_fs_info *fs_info;
struct btrfs_space_info *space_info;
u64 to_reclaim;
int flush_state;
+ int commit_cycles = 0;
+ u64 last_tickets_id;
fs_info = container_of(work, struct btrfs_fs_info, async_reclaim_work);
space_info = __find_space_info(fs_info, BTRFS_BLOCK_GROUP_METADATA);
+ spin_lock(&space_info->lock);
to_reclaim = btrfs_calc_reclaim_metadata_size(fs_info->fs_root,
space_info);
- if (!to_reclaim)
+ if (!to_reclaim) {
+ space_info->flush = 0;
+ spin_unlock(&space_info->lock);
return;
+ }
+ last_tickets_id = space_info->tickets_id;
+ spin_unlock(&space_info->lock);
flush_state = FLUSH_DELAYED_ITEMS_NR;
do {
+ struct reserve_ticket *ticket;
+ int ret;
+
+ ret = flush_space(fs_info->fs_root, space_info, to_reclaim,
+ to_reclaim, flush_state);
+ spin_lock(&space_info->lock);
+ if (list_empty(&space_info->tickets)) {
+ space_info->flush = 0;
+ spin_unlock(&space_info->lock);
+ return;
+ }
+ to_reclaim = btrfs_calc_reclaim_metadata_size(fs_info->fs_root,
+ space_info);
+ ticket = list_first_entry(&space_info->tickets,
+ struct reserve_ticket, list);
+ if (last_tickets_id == space_info->tickets_id) {
+ flush_state++;
+ } else {
+ last_tickets_id = space_info->tickets_id;
+ flush_state = FLUSH_DELAYED_ITEMS_NR;
+ if (commit_cycles)
+ commit_cycles--;
+ }
+
+ if (flush_state > COMMIT_TRANS) {
+ commit_cycles++;
+ if (commit_cycles > 2) {
+ wake_all_tickets(&space_info->tickets);
+ space_info->flush = 0;
+ } else {
+ flush_state = FLUSH_DELAYED_ITEMS_NR;
+ }
+ }
+ spin_unlock(&space_info->lock);
+ } while (flush_state <= COMMIT_TRANS);
+}
+
+void btrfs_init_async_reclaim_work(struct work_struct *work)
+{
+ INIT_WORK(work, btrfs_async_reclaim_metadata_space);
+}
+
+static void priority_reclaim_metadata_space(struct btrfs_fs_info *fs_info,
+ struct btrfs_space_info *space_info,
+ struct reserve_ticket *ticket)
+{
+ u64 to_reclaim;
+ int flush_state = FLUSH_DELAYED_ITEMS_NR;
+
+ spin_lock(&space_info->lock);
+ to_reclaim = btrfs_calc_reclaim_metadata_size(fs_info->fs_root,
+ space_info);
+ if (!to_reclaim) {
+ spin_unlock(&space_info->lock);
+ return;
+ }
+ spin_unlock(&space_info->lock);
+
+ do {
flush_space(fs_info->fs_root, space_info, to_reclaim,
to_reclaim, flush_state);
flush_state++;
- if (!btrfs_need_do_async_reclaim(space_info, fs_info,
- flush_state))
+ spin_lock(&space_info->lock);
+ if (ticket->bytes == 0) {
+ spin_unlock(&space_info->lock);
return;
+ }
+ spin_unlock(&space_info->lock);
+
+ /*
+ * Priority flushers can't wait on delalloc without
+ * deadlocking.
+ */
+ if (flush_state == FLUSH_DELALLOC ||
+ flush_state == FLUSH_DELALLOC_WAIT)
+ flush_state = ALLOC_CHUNK;
} while (flush_state < COMMIT_TRANS);
}
-void btrfs_init_async_reclaim_work(struct work_struct *work)
+static int wait_reserve_ticket(struct btrfs_fs_info *fs_info,
+ struct btrfs_space_info *space_info,
+ struct reserve_ticket *ticket, u64 orig_bytes)
+
{
- INIT_WORK(work, btrfs_async_reclaim_metadata_space);
+ DEFINE_WAIT(wait);
+ int ret = 0;
+
+ spin_lock(&space_info->lock);
+ while (ticket->bytes > 0 && ticket->error == 0) {
+ ret = prepare_to_wait_event(&ticket->wait, &wait, TASK_KILLABLE);
+ if (ret) {
+ ret = -EINTR;
+ break;
+ }
+ spin_unlock(&space_info->lock);
+
+ schedule();
+
+ finish_wait(&ticket->wait, &wait);
+ spin_lock(&space_info->lock);
+ }
+ if (!ret)
+ ret = ticket->error;
+ if (!list_empty(&ticket->list))
+ list_del_init(&ticket->list);
+ if (ticket->bytes && ticket->bytes < orig_bytes) {
+ u64 num_bytes = orig_bytes - ticket->bytes;
+ space_info->bytes_may_use -= num_bytes;
+ trace_btrfs_space_reservation(fs_info, "space_info",
+ space_info->flags, num_bytes, 0);
+ }
+ spin_unlock(&space_info->lock);
+
+ return ret;
}
/**
* reserve_metadata_bytes - try to reserve bytes from the block_rsv's space
* @root - the root we're allocating for
- * @block_rsv - the block_rsv we're allocating for
+ * @space_info - the space info we want to allocate from
* @orig_bytes - the number of bytes we want
* @flush - whether or not we can flush to make our reservation
*
@@ -4991,81 +5116,36 @@ void btrfs_init_async_reclaim_work(struct work_struct *work)
* regain reservations will be made and this will fail if there is not enough
* space already.
*/
-static int reserve_metadata_bytes(struct btrfs_root *root,
- struct btrfs_block_rsv *block_rsv,
- u64 orig_bytes,
- enum btrfs_reserve_flush_enum flush)
+static int __reserve_metadata_bytes(struct btrfs_root *root,
+ struct btrfs_space_info *space_info,
+ u64 orig_bytes,
+ enum btrfs_reserve_flush_enum flush)
{
- struct btrfs_space_info *space_info = block_rsv->space_info;
+ struct reserve_ticket ticket;
u64 used;
- u64 num_bytes = orig_bytes;
- int flush_state = FLUSH_DELAYED_ITEMS_NR;
int ret = 0;
- bool flushing = false;
-
-again:
- ret = 0;
- spin_lock(&space_info->lock);
- /*
- * We only want to wait if somebody other than us is flushing and we
- * are actually allowed to flush all things.
- */
- while (flush == BTRFS_RESERVE_FLUSH_ALL && !flushing &&
- space_info->flush) {
- spin_unlock(&space_info->lock);
- /*
- * If we have a trans handle we can't wait because the flusher
- * may have to commit the transaction, which would mean we would
- * deadlock since we are waiting for the flusher to finish, but
- * hold the current transaction open.
- */
- if (current->journal_info)
- return -EAGAIN;
- ret = wait_event_killable(space_info->wait, !space_info->flush);
- /* Must have been killed, return */
- if (ret)
- return -EINTR;
- spin_lock(&space_info->lock);
- }
+ ASSERT(orig_bytes);
+ ASSERT(!current->journal_info || flush != BTRFS_RESERVE_FLUSH_ALL);
+ spin_lock(&space_info->lock);
ret = -ENOSPC;
used = space_info->bytes_used + space_info->bytes_reserved +
space_info->bytes_pinned + space_info->bytes_readonly +
space_info->bytes_may_use;
/*
- * The idea here is that we've not already over-reserved the block group
- * then we can go ahead and save our reservation first and then start
- * flushing if we need to. Otherwise if we've already overcommitted
- * lets start flushing stuff first and then come back and try to make
- * our reservation.
+ * If we have enough space then hooray, make our reservation and carry
+ * on. If not see if we can overcommit, and if we can, hooray carry on.
+ * If not things get more complicated.
*/
- if (used <= space_info->total_bytes) {
- if (used + orig_bytes <= space_info->total_bytes) {
- space_info->bytes_may_use += orig_bytes;
- trace_btrfs_space_reservation(root->fs_info,
- "space_info", space_info->flags, orig_bytes, 1);
- ret = 0;
- } else {
- /*
- * Ok set num_bytes to orig_bytes since we aren't
- * overocmmitted, this way we only try and reclaim what
- * we need.
- */
- num_bytes = orig_bytes;
- }
- } else {
- /*
- * Ok we're over committed, set num_bytes to the overcommitted
- * amount plus the amount of bytes that we need for this
- * reservation.
- */
- num_bytes = used - space_info->total_bytes +
- (orig_bytes * 2);
- }
-
- if (ret && can_overcommit(root, space_info, orig_bytes, flush)) {
+ if (used + orig_bytes <= space_info->total_bytes) {
+ space_info->bytes_may_use += orig_bytes;
+ trace_btrfs_space_reservation(root->fs_info, "space_info",
+ space_info->flags, orig_bytes,
+ 1);
+ ret = 0;
+ } else if (can_overcommit(root, space_info, orig_bytes, flush)) {
space_info->bytes_may_use += orig_bytes;
trace_btrfs_space_reservation(root->fs_info, "space_info",
space_info->flags, orig_bytes,
@@ -5074,16 +5154,31 @@ again:
}
/*
- * Couldn't make our reservation, save our place so while we're trying
- * to reclaim space we can actually use it instead of somebody else
- * stealing it from us.
+ * If we couldn't make a reservation then setup our reservation ticket
+ * and kick the async worker if it's not already running.
*
- * We make the other tasks wait for the flush only when we can flush
- * all things.
+ * If we are a priority flusher then we just need to add our ticket to
+ * the list and we will do our own flushing further down.
*/
if (ret && flush != BTRFS_RESERVE_NO_FLUSH) {
- flushing = true;
- space_info->flush = 1;
+ ticket.bytes = orig_bytes;
+ ticket.error = 0;
+ init_waitqueue_head(&ticket.wait);
+ if (flush == BTRFS_RESERVE_FLUSH_ALL) {
+ list_add_tail(&ticket.list, &space_info->tickets);
+ if (!space_info->flush) {
+ space_info->flush = 1;
+ trace_btrfs_trigger_flush(root->fs_info,
+ space_info->flags,
+ orig_bytes, flush,
+ "enospc");
+ queue_work(system_unbound_wq,
+ &root->fs_info->async_reclaim_work);
+ }
+ } else {
+ list_add_tail(&ticket.list,
+ &space_info->priority_tickets);
+ }
} else if (!ret && space_info->flags & BTRFS_BLOCK_GROUP_METADATA) {
used += orig_bytes;
/*
@@ -5092,39 +5187,67 @@ again:
* the async reclaim as we will panic.
*/
if (!root->fs_info->log_root_recovering &&
- need_do_async_reclaim(space_info, root->fs_info, used) &&
- !work_busy(&root->fs_info->async_reclaim_work))
+ need_do_async_reclaim(space_info, root, used) &&
+ !work_busy(&root->fs_info->async_reclaim_work)) {
+ trace_btrfs_trigger_flush(root->fs_info,
+ space_info->flags,
+ orig_bytes, flush,
+ "preempt");
queue_work(system_unbound_wq,
&root->fs_info->async_reclaim_work);
+ }
}
spin_unlock(&space_info->lock);
-
if (!ret || flush == BTRFS_RESERVE_NO_FLUSH)
- goto out;
+ return ret;
- ret = flush_space(root, space_info, num_bytes, orig_bytes,
- flush_state);
- flush_state++;
+ if (flush == BTRFS_RESERVE_FLUSH_ALL)
+ return wait_reserve_ticket(root->fs_info, space_info, &ticket,
+ orig_bytes);
- /*
- * If we are FLUSH_LIMIT, we can not flush delalloc, or the deadlock
- * would happen. So skip delalloc flush.
- */
- if (flush == BTRFS_RESERVE_FLUSH_LIMIT &&
- (flush_state == FLUSH_DELALLOC ||
- flush_state == FLUSH_DELALLOC_WAIT))
- flush_state = ALLOC_CHUNK;
+ ret = 0;
+ priority_reclaim_metadata_space(root->fs_info, space_info, &ticket);
+ spin_lock(&space_info->lock);
+ if (ticket.bytes) {
+ if (ticket.bytes < orig_bytes) {
+ u64 num_bytes = orig_bytes - ticket.bytes;
+ space_info->bytes_may_use -= num_bytes;
+ trace_btrfs_space_reservation(root->fs_info,
+ "space_info", space_info->flags,
+ num_bytes, 0);
- if (!ret)
- goto again;
- else if (flush == BTRFS_RESERVE_FLUSH_LIMIT &&
- flush_state < COMMIT_TRANS)
- goto again;
- else if (flush == BTRFS_RESERVE_FLUSH_ALL &&
- flush_state <= COMMIT_TRANS)
- goto again;
+ }
+ list_del_init(&ticket.list);
+ ret = -ENOSPC;
+ }
+ spin_unlock(&space_info->lock);
+ ASSERT(list_empty(&ticket.list));
+ return ret;
+}
-out:
+/**
+ * reserve_metadata_bytes - try to reserve bytes from the block_rsv's space
+ * @root - the root we're allocating for
+ * @block_rsv - the block_rsv we're allocating for
+ * @orig_bytes - the number of bytes we want
+ * @flush - whether or not we can flush to make our reservation
+ *
+ * This will reserve orgi_bytes number of bytes from the space info associated
+ * with the block_rsv. If there is not enough space it will make an attempt to
+ * flush out space to make room. It will do this by flushing delalloc if
+ * possible or committing the transaction. If flush is 0 then no attempts to
+ * regain reservations will be made and this will fail if there is not enough
+ * space already.
+ */
+static int reserve_metadata_bytes(struct btrfs_root *root,
+ struct btrfs_block_rsv *block_rsv,
+ u64 orig_bytes,
+ enum btrfs_reserve_flush_enum flush)
+{
+ int ret;
+
+ ret = __reserve_metadata_bytes(root, block_rsv->space_info, orig_bytes,
+ flush);
if (ret == -ENOSPC &&
unlikely(root->orphan_cleanup_state == ORPHAN_CLEANUP_STARTED)) {
struct btrfs_block_rsv *global_rsv =
@@ -5137,13 +5260,8 @@ out:
if (ret == -ENOSPC)
trace_btrfs_space_reservation(root->fs_info,
"space_info:enospc",
- space_info->flags, orig_bytes, 1);
- if (flushing) {
- spin_lock(&space_info->lock);
- space_info->flush = 0;
- wake_up_all(&space_info->wait);
- spin_unlock(&space_info->lock);
- }
+ block_rsv->space_info->flags,
+ orig_bytes, 1);
return ret;
}
@@ -5219,6 +5337,110 @@ int btrfs_cond_migrate_bytes(struct btrfs_fs_info *fs_info,
return 0;
}
+/*
+ * This is for space we already have accounted in space_info->bytes_may_use, so
+ * basically when we're returning space from block_rsv's.
+ */
+static void space_info_add_old_bytes(struct btrfs_fs_info *fs_info,
+ struct btrfs_space_info *space_info,
+ u64 num_bytes)
+{
+ struct reserve_ticket *ticket;
+ struct list_head *head;
+ u64 used;
+ enum btrfs_reserve_flush_enum flush = BTRFS_RESERVE_NO_FLUSH;
+ bool check_overcommit = false;
+
+ spin_lock(&space_info->lock);
+ head = &space_info->priority_tickets;
+
+ /*
+ * If we are over our limit then we need to check and see if we can
+ * overcommit, and if we can't then we just need to free up our space
+ * and not satisfy any requests.
+ */
+ used = space_info->bytes_used + space_info->bytes_reserved +
+ space_info->bytes_pinned + space_info->bytes_readonly +
+ space_info->bytes_may_use;
+ if (used - num_bytes >= space_info->total_bytes)
+ check_overcommit = true;
+again:
+ while (!list_empty(head) && num_bytes) {
+ ticket = list_first_entry(head, struct reserve_ticket,
+ list);
+ /*
+ * We use 0 bytes because this space is already reserved, so
+ * adding the ticket space would be a double count.
+ */
+ if (check_overcommit &&
+ !can_overcommit(fs_info->extent_root, space_info, 0,
+ flush))
+ break;
+ if (num_bytes >= ticket->bytes) {
+ list_del_init(&ticket->list);
+ num_bytes -= ticket->bytes;
+ ticket->bytes = 0;
+ space_info->tickets_id++;
+ wake_up(&ticket->wait);
+ } else {
+ ticket->bytes -= num_bytes;
+ num_bytes = 0;
+ }
+ }
+
+ if (num_bytes && head == &space_info->priority_tickets) {
+ head = &space_info->tickets;
+ flush = BTRFS_RESERVE_FLUSH_ALL;
+ goto again;
+ }
+ space_info->bytes_may_use -= num_bytes;
+ trace_btrfs_space_reservation(fs_info, "space_info",
+ space_info->flags, num_bytes, 0);
+ spin_unlock(&space_info->lock);
+}
+
+/*
+ * This is for newly allocated space that isn't accounted in
+ * space_info->bytes_may_use yet. So if we allocate a chunk or unpin an extent
+ * we use this helper.
+ */
+static void space_info_add_new_bytes(struct btrfs_fs_info *fs_info,
+ struct btrfs_space_info *space_info,
+ u64 num_bytes)
+{
+ struct reserve_ticket *ticket;
+ struct list_head *head = &space_info->priority_tickets;
+
+again:
+ while (!list_empty(head) && num_bytes) {
+ ticket = list_first_entry(head, struct reserve_ticket,
+ list);
+ if (num_bytes >= ticket->bytes) {
+ trace_btrfs_space_reservation(fs_info, "space_info",
+ space_info->flags,
+ ticket->bytes, 1);
+ list_del_init(&ticket->list);
+ num_bytes -= ticket->bytes;
+ space_info->bytes_may_use += ticket->bytes;
+ ticket->bytes = 0;
+ space_info->tickets_id++;
+ wake_up(&ticket->wait);
+ } else {
+ trace_btrfs_space_reservation(fs_info, "space_info",
+ space_info->flags,
+ num_bytes, 1);
+ space_info->bytes_may_use += num_bytes;
+ ticket->bytes -= num_bytes;
+ num_bytes = 0;
+ }
+ }
+
+ if (num_bytes && head == &space_info->priority_tickets) {
+ head = &space_info->tickets;
+ goto again;
+ }
+}
+
static void block_rsv_release_bytes(struct btrfs_fs_info *fs_info,
struct btrfs_block_rsv *block_rsv,
struct btrfs_block_rsv *dest, u64 num_bytes)
@@ -5253,18 +5475,15 @@ static void block_rsv_release_bytes(struct btrfs_fs_info *fs_info,
}
spin_unlock(&dest->lock);
}
- if (num_bytes) {
- spin_lock(&space_info->lock);
- space_info->bytes_may_use -= num_bytes;
- trace_btrfs_space_reservation(fs_info, "space_info",
- space_info->flags, num_bytes, 0);
- spin_unlock(&space_info->lock);
- }
+ if (num_bytes)
+ space_info_add_old_bytes(fs_info, space_info,
+ num_bytes);
}
}
-static int block_rsv_migrate_bytes(struct btrfs_block_rsv *src,
- struct btrfs_block_rsv *dst, u64 num_bytes)
+int btrfs_block_rsv_migrate(struct btrfs_block_rsv *src,
+ struct btrfs_block_rsv *dst, u64 num_bytes,
+ int update_size)
{
int ret;
@@ -5272,7 +5491,7 @@ static int block_rsv_migrate_bytes(struct btrfs_block_rsv *src,
if (ret)
return ret;
- block_rsv_add_bytes(dst, num_bytes, 1);
+ block_rsv_add_bytes(dst, num_bytes, update_size);
return 0;
}
@@ -5379,13 +5598,6 @@ int btrfs_block_rsv_refill(struct btrfs_root *root,
return ret;
}
-int btrfs_block_rsv_migrate(struct btrfs_block_rsv *src_rsv,
- struct btrfs_block_rsv *dst_rsv,
- u64 num_bytes)
-{
- return block_rsv_migrate_bytes(src_rsv, dst_rsv, num_bytes);
-}
-
void btrfs_block_rsv_release(struct btrfs_root *root,
struct btrfs_block_rsv *block_rsv,
u64 num_bytes)
@@ -5398,48 +5610,21 @@ void btrfs_block_rsv_release(struct btrfs_root *root,
num_bytes);
}
-/*
- * helper to calculate size of global block reservation.
- * the desired value is sum of space used by extent tree,
- * checksum tree and root tree
- */
-static u64 calc_global_metadata_size(struct btrfs_fs_info *fs_info)
-{
- struct btrfs_space_info *sinfo;
- u64 num_bytes;
- u64 meta_used;
- u64 data_used;
- int csum_size = btrfs_super_csum_size(fs_info->super_copy);
-
- sinfo = __find_space_info(fs_info, BTRFS_BLOCK_GROUP_DATA);
- spin_lock(&sinfo->lock);
- data_used = sinfo->bytes_used;
- spin_unlock(&sinfo->lock);
-
- sinfo = __find_space_info(fs_info, BTRFS_BLOCK_GROUP_METADATA);
- spin_lock(&sinfo->lock);
- if (sinfo->flags & BTRFS_BLOCK_GROUP_DATA)
- data_used = 0;
- meta_used = sinfo->bytes_used;
- spin_unlock(&sinfo->lock);
-
- num_bytes = (data_used >> fs_info->sb->s_blocksize_bits) *
- csum_size * 2;
- num_bytes += div_u64(data_used + meta_used, 50);
-
- if (num_bytes * 3 > meta_used)
- num_bytes = div_u64(meta_used, 3);
-
- return ALIGN(num_bytes, fs_info->extent_root->nodesize << 10);
-}
-
static void update_global_block_rsv(struct btrfs_fs_info *fs_info)
{
struct btrfs_block_rsv *block_rsv = &fs_info->global_block_rsv;
struct btrfs_space_info *sinfo = block_rsv->space_info;
u64 num_bytes;
- num_bytes = calc_global_metadata_size(fs_info);
+ /*
+ * The global block rsv is based on the size of the extent tree, the
+ * checksum tree and the root tree. If the fs is empty we want to set
+ * it to a minimal amount for safety.
+ */
+ num_bytes = btrfs_root_used(&fs_info->extent_root->root_item) +
+ btrfs_root_used(&fs_info->csum_root->root_item) +
+ btrfs_root_used(&fs_info->tree_root->root_item);
+ num_bytes = max_t(u64, num_bytes, SZ_16M);
spin_lock(&sinfo->lock);
spin_lock(&block_rsv->lock);
@@ -5537,7 +5722,7 @@ void btrfs_trans_release_metadata(struct btrfs_trans_handle *trans,
*/
void btrfs_trans_release_chunk_metadata(struct btrfs_trans_handle *trans)
{
- struct btrfs_fs_info *fs_info = trans->root->fs_info;
+ struct btrfs_fs_info *fs_info = trans->fs_info;
if (!trans->chunk_bytes_reserved)
return;
@@ -5554,7 +5739,13 @@ int btrfs_orphan_reserve_metadata(struct btrfs_trans_handle *trans,
struct inode *inode)
{
struct btrfs_root *root = BTRFS_I(inode)->root;
- struct btrfs_block_rsv *src_rsv = get_block_rsv(trans, root);
+ /*
+ * We always use trans->block_rsv here as we will have reserved space
+ * for our orphan when starting the transaction, using get_block_rsv()
+ * here will sometimes make us choose the wrong block rsv as we could be
+ * doing a reloc inode for a non refcounted root.
+ */
+ struct btrfs_block_rsv *src_rsv = trans->block_rsv;
struct btrfs_block_rsv *dst_rsv = root->orphan_block_rsv;
/*
@@ -5565,7 +5756,7 @@ int btrfs_orphan_reserve_metadata(struct btrfs_trans_handle *trans,
u64 num_bytes = btrfs_calc_trans_metadata_size(root, 1);
trace_btrfs_space_reservation(root->fs_info, "orphan",
btrfs_ino(inode), num_bytes, 1);
- return block_rsv_migrate_bytes(src_rsv, dst_rsv, num_bytes);
+ return btrfs_block_rsv_migrate(src_rsv, dst_rsv, num_bytes, 1);
}
void btrfs_orphan_release_metadata(struct inode *inode)
@@ -5620,7 +5811,7 @@ int btrfs_subvolume_reserve_metadata(struct btrfs_root *root,
BTRFS_RESERVE_FLUSH_ALL);
if (ret == -ENOSPC && use_global_rsv)
- ret = btrfs_block_rsv_migrate(global_rsv, rsv, num_bytes);
+ ret = btrfs_block_rsv_migrate(global_rsv, rsv, num_bytes, 1);
if (ret && *qgroup_reserved)
btrfs_qgroup_free_meta(root, *qgroup_reserved);
@@ -5730,21 +5921,26 @@ int btrfs_delalloc_reserve_metadata(struct inode *inode, u64 num_bytes)
u64 to_reserve = 0;
u64 csum_bytes;
unsigned nr_extents = 0;
- int extra_reserve = 0;
enum btrfs_reserve_flush_enum flush = BTRFS_RESERVE_FLUSH_ALL;
int ret = 0;
bool delalloc_lock = true;
u64 to_free = 0;
unsigned dropped;
+ bool release_extra = false;
/* If we are a free space inode we need to not flush since we will be in
* the middle of a transaction commit. We also don't need the delalloc
* mutex since we won't race with anybody. We need this mostly to make
* lockdep shut its filthy mouth.
+ *
+ * If we have a transaction open (can happen if we call truncate_block
+ * from truncate), then we need FLUSH_LIMIT so we don't deadlock.
*/
if (btrfs_is_free_space_inode(inode)) {
flush = BTRFS_RESERVE_NO_FLUSH;
delalloc_lock = false;
+ } else if (current->journal_info) {
+ flush = BTRFS_RESERVE_FLUSH_LIMIT;
}
if (flush != BTRFS_RESERVE_NO_FLUSH &&
@@ -5761,24 +5957,15 @@ int btrfs_delalloc_reserve_metadata(struct inode *inode, u64 num_bytes)
BTRFS_MAX_EXTENT_SIZE - 1,
BTRFS_MAX_EXTENT_SIZE);
BTRFS_I(inode)->outstanding_extents += nr_extents;
- nr_extents = 0;
+ nr_extents = 0;
if (BTRFS_I(inode)->outstanding_extents >
BTRFS_I(inode)->reserved_extents)
- nr_extents = BTRFS_I(inode)->outstanding_extents -
+ nr_extents += BTRFS_I(inode)->outstanding_extents -
BTRFS_I(inode)->reserved_extents;
- /*
- * Add an item to reserve for updating the inode when we complete the
- * delalloc io.
- */
- if (!test_bit(BTRFS_INODE_DELALLOC_META_RESERVED,
- &BTRFS_I(inode)->runtime_flags)) {
- nr_extents++;
- extra_reserve = 1;
- }
-
- to_reserve = btrfs_calc_trans_metadata_size(root, nr_extents);
+ /* We always want to reserve a slot for updating the inode. */
+ to_reserve = btrfs_calc_trans_metadata_size(root, nr_extents + 1);
to_reserve += calc_csum_metadata_size(inode, num_bytes, 1);
csum_bytes = BTRFS_I(inode)->csum_bytes;
spin_unlock(&BTRFS_I(inode)->lock);
@@ -5790,17 +5977,17 @@ int btrfs_delalloc_reserve_metadata(struct inode *inode, u64 num_bytes)
goto out_fail;
}
- ret = reserve_metadata_bytes(root, block_rsv, to_reserve, flush);
+ ret = btrfs_block_rsv_add(root, block_rsv, to_reserve, flush);
if (unlikely(ret)) {
btrfs_qgroup_free_meta(root, nr_extents * root->nodesize);
goto out_fail;
}
spin_lock(&BTRFS_I(inode)->lock);
- if (extra_reserve) {
- set_bit(BTRFS_INODE_DELALLOC_META_RESERVED,
- &BTRFS_I(inode)->runtime_flags);
- nr_extents--;
+ if (test_and_set_bit(BTRFS_INODE_DELALLOC_META_RESERVED,
+ &BTRFS_I(inode)->runtime_flags)) {
+ to_reserve -= btrfs_calc_trans_metadata_size(root, 1);
+ release_extra = true;
}
BTRFS_I(inode)->reserved_extents += nr_extents;
spin_unlock(&BTRFS_I(inode)->lock);
@@ -5811,8 +5998,10 @@ int btrfs_delalloc_reserve_metadata(struct inode *inode, u64 num_bytes)
if (to_reserve)
trace_btrfs_space_reservation(root->fs_info, "delalloc",
btrfs_ino(inode), to_reserve, 1);
- block_rsv_add_bytes(block_rsv, to_reserve, 1);
-
+ if (release_extra)
+ btrfs_block_rsv_release(root, block_rsv,
+ btrfs_calc_trans_metadata_size(root,
+ 1));
return 0;
out_fail:
@@ -5904,7 +6093,7 @@ void btrfs_delalloc_release_metadata(struct inode *inode, u64 num_bytes)
if (dropped > 0)
to_free += btrfs_calc_trans_metadata_size(root, dropped);
- if (btrfs_test_is_dummy_root(root))
+ if (btrfs_is_testing(root->fs_info))
return;
trace_btrfs_space_reservation(root->fs_info, "delalloc",
@@ -6019,7 +6208,7 @@ static int update_block_group(struct btrfs_trans_handle *trans,
spin_lock(&cache->space_info->lock);
spin_lock(&cache->lock);
- if (btrfs_test_opt(root, SPACE_CACHE) &&
+ if (btrfs_test_opt(root->fs_info, SPACE_CACHE) &&
cache->disk_cache_state < BTRFS_DC_CLEAR)
cache->disk_cache_state = BTRFS_DC_CLEAR;
@@ -6044,6 +6233,9 @@ static int update_block_group(struct btrfs_trans_handle *trans,
spin_unlock(&cache->lock);
spin_unlock(&cache->space_info->lock);
+ trace_btrfs_space_reservation(root->fs_info, "pinned",
+ cache->space_info->flags,
+ num_bytes, 1);
set_extent_dirty(info->pinned_extents,
bytenr, bytenr + num_bytes - 1,
GFP_NOFS | __GFP_NOFAIL);
@@ -6118,10 +6310,10 @@ static int pin_down_extent(struct btrfs_root *root,
spin_unlock(&cache->lock);
spin_unlock(&cache->space_info->lock);
+ trace_btrfs_space_reservation(root->fs_info, "pinned",
+ cache->space_info->flags, num_bytes, 1);
set_extent_dirty(root->fs_info->pinned_extents, bytenr,
bytenr + num_bytes - 1, GFP_NOFS | __GFP_NOFAIL);
- if (reserved)
- trace_btrfs_reserved_extent_free(root, bytenr, num_bytes);
return 0;
}
@@ -6297,19 +6489,15 @@ void btrfs_wait_block_group_reservations(struct btrfs_block_group_cache *bg)
}
/**
- * btrfs_update_reserved_bytes - update the block_group and space info counters
+ * btrfs_add_reserved_bytes - update the block_group and space info counters
* @cache: The cache we are manipulating
+ * @ram_bytes: The number of bytes of file content, and will be same to
+ * @num_bytes except for the compress path.
* @num_bytes: The number of bytes in question
- * @reserve: One of the reservation enums
* @delalloc: The blocks are allocated for the delalloc write
*
- * This is called by the allocator when it reserves space, or by somebody who is
- * freeing space that was never actually used on disk. For example if you
- * reserve some space for a new leaf in transaction A and before transaction A
- * commits you free that leaf, you call this with reserve set to 0 in order to
- * clear the reservation.
- *
- * Metadata reservations should be called with RESERVE_ALLOC so we do the proper
+ * This is called by the allocator when it reserves space. Metadata
+ * reservations should be called with RESERVE_ALLOC so we do the proper
* ENOSPC accounting. For data we handle the reservation through clearing the
* delalloc bits in the io_tree. We have to do this since we could end up
* allocating less disk space for the amount of data we have reserved in the
@@ -6319,44 +6507,63 @@ void btrfs_wait_block_group_reservations(struct btrfs_block_group_cache *bg)
* make the reservation and return -EAGAIN, otherwise this function always
* succeeds.
*/
-static int btrfs_update_reserved_bytes(struct btrfs_block_group_cache *cache,
- u64 num_bytes, int reserve, int delalloc)
+static int btrfs_add_reserved_bytes(struct btrfs_block_group_cache *cache,
+ u64 ram_bytes, u64 num_bytes, int delalloc)
{
struct btrfs_space_info *space_info = cache->space_info;
int ret = 0;
spin_lock(&space_info->lock);
spin_lock(&cache->lock);
- if (reserve != RESERVE_FREE) {
- if (cache->ro) {
- ret = -EAGAIN;
- } else {
- cache->reserved += num_bytes;
- space_info->bytes_reserved += num_bytes;
- if (reserve == RESERVE_ALLOC) {
- trace_btrfs_space_reservation(cache->fs_info,
- "space_info", space_info->flags,
- num_bytes, 0);
- space_info->bytes_may_use -= num_bytes;
- }
-
- if (delalloc)
- cache->delalloc_bytes += num_bytes;
- }
+ if (cache->ro) {
+ ret = -EAGAIN;
} else {
- if (cache->ro)
- space_info->bytes_readonly += num_bytes;
- cache->reserved -= num_bytes;
- space_info->bytes_reserved -= num_bytes;
+ cache->reserved += num_bytes;
+ space_info->bytes_reserved += num_bytes;
+ trace_btrfs_space_reservation(cache->fs_info,
+ "space_info", space_info->flags,
+ ram_bytes, 0);
+ space_info->bytes_may_use -= ram_bytes;
if (delalloc)
- cache->delalloc_bytes -= num_bytes;
+ cache->delalloc_bytes += num_bytes;
}
spin_unlock(&cache->lock);
spin_unlock(&space_info->lock);
return ret;
}
+/**
+ * btrfs_free_reserved_bytes - update the block_group and space info counters
+ * @cache: The cache we are manipulating
+ * @num_bytes: The number of bytes in question
+ * @delalloc: The blocks are allocated for the delalloc write
+ *
+ * This is called by somebody who is freeing space that was never actually used
+ * on disk. For example if you reserve some space for a new leaf in transaction
+ * A and before transaction A commits you free that leaf, you call this with
+ * reserve set to 0 in order to clear the reservation.
+ */
+
+static int btrfs_free_reserved_bytes(struct btrfs_block_group_cache *cache,
+ u64 num_bytes, int delalloc)
+{
+ struct btrfs_space_info *space_info = cache->space_info;
+ int ret = 0;
+
+ spin_lock(&space_info->lock);
+ spin_lock(&cache->lock);
+ if (cache->ro)
+ space_info->bytes_readonly += num_bytes;
+ cache->reserved -= num_bytes;
+ space_info->bytes_reserved -= num_bytes;
+
+ if (delalloc)
+ cache->delalloc_bytes -= num_bytes;
+ spin_unlock(&cache->lock);
+ spin_unlock(&space_info->lock);
+ return ret;
+}
void btrfs_prepare_extent_commit(struct btrfs_trans_handle *trans,
struct btrfs_root *root)
{
@@ -6398,7 +6605,7 @@ fetch_cluster_info(struct btrfs_root *root, struct btrfs_space_info *space_info,
u64 *empty_cluster)
{
struct btrfs_free_cluster *ret = NULL;
- bool ssd = btrfs_test_opt(root, SSD);
+ bool ssd = btrfs_test_opt(root->fs_info, SSD);
*empty_cluster = 0;
if (btrfs_mixed_space_info(space_info))
@@ -6476,6 +6683,9 @@ static int unpin_extent_range(struct btrfs_root *root, u64 start, u64 end,
spin_lock(&cache->lock);
cache->pinned -= len;
space_info->bytes_pinned -= len;
+
+ trace_btrfs_space_reservation(fs_info, "pinned",
+ space_info->flags, len, 0);
space_info->max_extent_size = 0;
percpu_counter_add(&space_info->total_bytes_pinned, -len);
if (cache->ro) {
@@ -6483,17 +6693,29 @@ static int unpin_extent_range(struct btrfs_root *root, u64 start, u64 end,
readonly = true;
}
spin_unlock(&cache->lock);
- if (!readonly && global_rsv->space_info == space_info) {
+ if (!readonly && return_free_space &&
+ global_rsv->space_info == space_info) {
+ u64 to_add = len;
+ WARN_ON(!return_free_space);
spin_lock(&global_rsv->lock);
if (!global_rsv->full) {
- len = min(len, global_rsv->size -
- global_rsv->reserved);
- global_rsv->reserved += len;
- space_info->bytes_may_use += len;
+ to_add = min(len, global_rsv->size -
+ global_rsv->reserved);
+ global_rsv->reserved += to_add;
+ space_info->bytes_may_use += to_add;
if (global_rsv->reserved >= global_rsv->size)
global_rsv->full = 1;
+ trace_btrfs_space_reservation(fs_info,
+ "space_info",
+ space_info->flags,
+ to_add, 1);
+ len -= to_add;
}
spin_unlock(&global_rsv->lock);
+ /* Add to any tickets we may have */
+ if (len)
+ space_info_add_new_bytes(fs_info, space_info,
+ len);
}
spin_unlock(&space_info->lock);
}
@@ -6528,7 +6750,7 @@ int btrfs_finish_extent_commit(struct btrfs_trans_handle *trans,
break;
}
- if (btrfs_test_opt(root, DISCARD))
+ if (btrfs_test_opt(root->fs_info, DISCARD))
ret = btrfs_discard_extent(root, start,
end + 1 - start, NULL);
@@ -6666,7 +6888,7 @@ static int __btrfs_free_extent(struct btrfs_trans_handle *trans,
NULL, refs_to_drop,
is_data, &last_ref);
if (ret) {
- btrfs_abort_transaction(trans, extent_root, ret);
+ btrfs_abort_transaction(trans, ret);
goto out;
}
btrfs_release_path(path);
@@ -6715,7 +6937,7 @@ static int __btrfs_free_extent(struct btrfs_trans_handle *trans,
path->nodes[0]);
}
if (ret < 0) {
- btrfs_abort_transaction(trans, extent_root, ret);
+ btrfs_abort_transaction(trans, ret);
goto out;
}
extent_slot = path->slots[0];
@@ -6726,10 +6948,10 @@ static int __btrfs_free_extent(struct btrfs_trans_handle *trans,
"unable to find ref byte nr %llu parent %llu root %llu owner %llu offset %llu",
bytenr, parent, root_objectid, owner_objectid,
owner_offset);
- btrfs_abort_transaction(trans, extent_root, ret);
+ btrfs_abort_transaction(trans, ret);
goto out;
} else {
- btrfs_abort_transaction(trans, extent_root, ret);
+ btrfs_abort_transaction(trans, ret);
goto out;
}
@@ -6741,7 +6963,7 @@ static int __btrfs_free_extent(struct btrfs_trans_handle *trans,
ret = convert_extent_item_v0(trans, extent_root, path,
owner_objectid, 0);
if (ret < 0) {
- btrfs_abort_transaction(trans, extent_root, ret);
+ btrfs_abort_transaction(trans, ret);
goto out;
}
@@ -6760,7 +6982,7 @@ static int __btrfs_free_extent(struct btrfs_trans_handle *trans,
btrfs_print_leaf(extent_root, path->nodes[0]);
}
if (ret < 0) {
- btrfs_abort_transaction(trans, extent_root, ret);
+ btrfs_abort_transaction(trans, ret);
goto out;
}
@@ -6785,7 +7007,7 @@ static int __btrfs_free_extent(struct btrfs_trans_handle *trans,
btrfs_err(info, "trying to drop %d refs but we only have %Lu "
"for bytenr %Lu", refs_to_drop, refs, bytenr);
ret = -EINVAL;
- btrfs_abort_transaction(trans, extent_root, ret);
+ btrfs_abort_transaction(trans, ret);
goto out;
}
refs -= refs_to_drop;
@@ -6808,7 +7030,7 @@ static int __btrfs_free_extent(struct btrfs_trans_handle *trans,
iref, refs_to_drop,
is_data, &last_ref);
if (ret) {
- btrfs_abort_transaction(trans, extent_root, ret);
+ btrfs_abort_transaction(trans, ret);
goto out;
}
}
@@ -6831,7 +7053,7 @@ static int __btrfs_free_extent(struct btrfs_trans_handle *trans,
ret = btrfs_del_items(trans, extent_root, path, path->slots[0],
num_to_del);
if (ret) {
- btrfs_abort_transaction(trans, extent_root, ret);
+ btrfs_abort_transaction(trans, ret);
goto out;
}
btrfs_release_path(path);
@@ -6839,7 +7061,7 @@ static int __btrfs_free_extent(struct btrfs_trans_handle *trans,
if (is_data) {
ret = btrfs_del_csums(trans, root, bytenr, num_bytes);
if (ret) {
- btrfs_abort_transaction(trans, extent_root, ret);
+ btrfs_abort_transaction(trans, ret);
goto out;
}
}
@@ -6847,13 +7069,13 @@ static int __btrfs_free_extent(struct btrfs_trans_handle *trans,
ret = add_to_free_space_tree(trans, root->fs_info, bytenr,
num_bytes);
if (ret) {
- btrfs_abort_transaction(trans, extent_root, ret);
+ btrfs_abort_transaction(trans, ret);
goto out;
}
ret = update_block_group(trans, root, bytenr, num_bytes, 0);
if (ret) {
- btrfs_abort_transaction(trans, extent_root, ret);
+ btrfs_abort_transaction(trans, ret);
goto out;
}
}
@@ -6976,7 +7198,7 @@ void btrfs_free_tree_block(struct btrfs_trans_handle *trans,
WARN_ON(test_bit(EXTENT_BUFFER_DIRTY, &buf->bflags));
btrfs_add_free_space(cache, buf->start, buf->len);
- btrfs_update_reserved_bytes(cache, buf->len, RESERVE_FREE, 0);
+ btrfs_free_reserved_bytes(cache, buf->len, 0);
btrfs_put_block_group(cache);
trace_btrfs_reserved_extent_free(root, buf->start, buf->len);
pin = 0;
@@ -7002,7 +7224,7 @@ int btrfs_free_extent(struct btrfs_trans_handle *trans, struct btrfs_root *root,
int ret;
struct btrfs_fs_info *fs_info = root->fs_info;
- if (btrfs_test_is_dummy_root(root))
+ if (btrfs_is_testing(fs_info))
return 0;
add_pinned_bytes(root->fs_info, num_bytes, owner, root_objectid);
@@ -7201,9 +7423,9 @@ btrfs_release_block_group(struct btrfs_block_group_cache *cache,
* the free space extent currently.
*/
static noinline int find_free_extent(struct btrfs_root *orig_root,
- u64 num_bytes, u64 empty_size,
- u64 hint_byte, struct btrfs_key *ins,
- u64 flags, int delalloc)
+ u64 ram_bytes, u64 num_bytes, u64 empty_size,
+ u64 hint_byte, struct btrfs_key *ins,
+ u64 flags, int delalloc)
{
int ret = 0;
struct btrfs_root *root = orig_root->fs_info->extent_root;
@@ -7215,8 +7437,6 @@ static noinline int find_free_extent(struct btrfs_root *orig_root,
struct btrfs_space_info *space_info;
int loop = 0;
int index = __get_raid_index(flags);
- int alloc_type = (flags & BTRFS_BLOCK_GROUP_DATA) ?
- RESERVE_ALLOC_NO_ACCOUNT : RESERVE_ALLOC;
bool failed_cluster_refill = false;
bool failed_alloc = false;
bool use_cluster = true;
@@ -7548,8 +7768,8 @@ checks:
search_start - offset);
BUG_ON(offset > search_start);
- ret = btrfs_update_reserved_bytes(block_group, num_bytes,
- alloc_type, delalloc);
+ ret = btrfs_add_reserved_bytes(block_group, ram_bytes,
+ num_bytes, delalloc);
if (ret == -EAGAIN) {
btrfs_add_free_space(block_group, offset, num_bytes);
goto loop;
@@ -7637,8 +7857,7 @@ loop:
* can do more things.
*/
if (ret < 0 && ret != -ENOSPC)
- btrfs_abort_transaction(trans,
- root, ret);
+ btrfs_abort_transaction(trans, ret);
else
ret = 0;
if (!exist)
@@ -7692,8 +7911,8 @@ static void dump_space_info(struct btrfs_space_info *info, u64 bytes,
printk(KERN_INFO "BTRFS: space_info %llu has %llu free, is %sfull\n",
info->flags,
info->total_bytes - info->bytes_used - info->bytes_pinned -
- info->bytes_reserved - info->bytes_readonly,
- (info->full) ? "" : "not ");
+ info->bytes_reserved - info->bytes_readonly -
+ info->bytes_may_use, (info->full) ? "" : "not ");
printk(KERN_INFO "BTRFS: space_info total=%llu, used=%llu, pinned=%llu, "
"reserved=%llu, may_use=%llu, readonly=%llu\n",
info->total_bytes, info->bytes_used, info->bytes_pinned,
@@ -7722,7 +7941,7 @@ again:
up_read(&info->groups_sem);
}
-int btrfs_reserve_extent(struct btrfs_root *root,
+int btrfs_reserve_extent(struct btrfs_root *root, u64 ram_bytes,
u64 num_bytes, u64 min_alloc_size,
u64 empty_size, u64 hint_byte,
struct btrfs_key *ins, int is_data, int delalloc)
@@ -7734,8 +7953,8 @@ int btrfs_reserve_extent(struct btrfs_root *root,
flags = btrfs_get_alloc_profile(root, is_data);
again:
WARN_ON(num_bytes < root->sectorsize);
- ret = find_free_extent(root, num_bytes, empty_size, hint_byte, ins,
- flags, delalloc);
+ ret = find_free_extent(root, ram_bytes, num_bytes, empty_size,
+ hint_byte, ins, flags, delalloc);
if (!ret && !is_data) {
btrfs_dec_block_group_reservations(root->fs_info,
ins->objectid);
@@ -7744,10 +7963,11 @@ again:
num_bytes = min(num_bytes >> 1, ins->offset);
num_bytes = round_down(num_bytes, root->sectorsize);
num_bytes = max(num_bytes, min_alloc_size);
+ ram_bytes = num_bytes;
if (num_bytes == min_alloc_size)
final_tried = true;
goto again;
- } else if (btrfs_test_opt(root, ENOSPC_DEBUG)) {
+ } else if (btrfs_test_opt(root->fs_info, ENOSPC_DEBUG)) {
struct btrfs_space_info *sinfo;
sinfo = __find_space_info(root->fs_info, flags);
@@ -7778,16 +7998,14 @@ static int __btrfs_free_reserved_extent(struct btrfs_root *root,
if (pin)
pin_down_extent(root, cache, start, len, 1);
else {
- if (btrfs_test_opt(root, DISCARD))
+ if (btrfs_test_opt(root->fs_info, DISCARD))
ret = btrfs_discard_extent(root, start, len, NULL);
btrfs_add_free_space(cache, start, len);
- btrfs_update_reserved_bytes(cache, len, RESERVE_FREE, delalloc);
+ btrfs_free_reserved_bytes(cache, len, delalloc);
+ trace_btrfs_reserved_extent_free(root, start, len);
}
btrfs_put_block_group(cache);
-
- trace_btrfs_reserved_extent_free(root, start, len);
-
return ret;
}
@@ -7996,6 +8214,7 @@ int btrfs_alloc_logged_file_extent(struct btrfs_trans_handle *trans,
{
int ret;
struct btrfs_block_group_cache *block_group;
+ struct btrfs_space_info *space_info;
/*
* Mixed block groups will exclude before processing the log so we only
@@ -8011,9 +8230,14 @@ int btrfs_alloc_logged_file_extent(struct btrfs_trans_handle *trans,
if (!block_group)
return -EINVAL;
- ret = btrfs_update_reserved_bytes(block_group, ins->offset,
- RESERVE_ALLOC_NO_ACCOUNT, 0);
- BUG_ON(ret); /* logic error */
+ space_info = block_group->space_info;
+ spin_lock(&space_info->lock);
+ spin_lock(&block_group->lock);
+ space_info->bytes_reserved += ins->offset;
+ block_group->reserved += ins->offset;
+ spin_unlock(&block_group->lock);
+ spin_unlock(&space_info->lock);
+
ret = alloc_reserved_file_extent(trans, root, 0, root_objectid,
0, owner, offset, ins, 1);
btrfs_put_block_group(block_group);
@@ -8088,7 +8312,7 @@ again:
goto again;
}
- if (btrfs_test_opt(root, ENOSPC_DEBUG)) {
+ if (btrfs_test_opt(root->fs_info, ENOSPC_DEBUG)) {
static DEFINE_RATELIMIT_STATE(_rs,
DEFAULT_RATELIMIT_INTERVAL * 10,
/*DEFAULT_RATELIMIT_BURST*/ 1);
@@ -8142,19 +8366,21 @@ struct extent_buffer *btrfs_alloc_tree_block(struct btrfs_trans_handle *trans,
bool skinny_metadata = btrfs_fs_incompat(root->fs_info,
SKINNY_METADATA);
- if (btrfs_test_is_dummy_root(root)) {
+#ifdef CONFIG_BTRFS_FS_RUN_SANITY_TESTS
+ if (btrfs_is_testing(root->fs_info)) {
buf = btrfs_init_new_buffer(trans, root, root->alloc_bytenr,
level);
if (!IS_ERR(buf))
root->alloc_bytenr += blocksize;
return buf;
}
+#endif
block_rsv = use_block_rsv(trans, root, blocksize);
if (IS_ERR(block_rsv))
return ERR_CAST(block_rsv);
- ret = btrfs_reserve_extent(root, blocksize, blocksize,
+ ret = btrfs_reserve_extent(root, blocksize, blocksize, blocksize,
empty_size, hint, &ins, 0, 0);
if (ret)
goto out_unuse;
@@ -8307,34 +8533,6 @@ reada:
wc->reada_slot = slot;
}
-/*
- * These may not be seen by the usual inc/dec ref code so we have to
- * add them here.
- */
-static int record_one_subtree_extent(struct btrfs_trans_handle *trans,
- struct btrfs_root *root, u64 bytenr,
- u64 num_bytes)
-{
- struct btrfs_qgroup_extent_record *qrecord;
- struct btrfs_delayed_ref_root *delayed_refs;
-
- qrecord = kmalloc(sizeof(*qrecord), GFP_NOFS);
- if (!qrecord)
- return -ENOMEM;
-
- qrecord->bytenr = bytenr;
- qrecord->num_bytes = num_bytes;
- qrecord->old_roots = NULL;
-
- delayed_refs = &trans->transaction->delayed_refs;
- spin_lock(&delayed_refs->lock);
- if (btrfs_qgroup_insert_dirty_extent(delayed_refs, qrecord))
- kfree(qrecord);
- spin_unlock(&delayed_refs->lock);
-
- return 0;
-}
-
static int account_leaf_items(struct btrfs_trans_handle *trans,
struct btrfs_root *root,
struct extent_buffer *eb)
@@ -8368,7 +8566,8 @@ static int account_leaf_items(struct btrfs_trans_handle *trans,
num_bytes = btrfs_file_extent_disk_num_bytes(eb, fi);
- ret = record_one_subtree_extent(trans, root, bytenr, num_bytes);
+ ret = btrfs_qgroup_insert_dirty_extent(trans, root->fs_info,
+ bytenr, num_bytes, GFP_NOFS);
if (ret)
return ret;
}
@@ -8517,8 +8716,9 @@ walk_down:
btrfs_set_lock_blocking_rw(eb, BTRFS_READ_LOCK);
path->locks[level] = BTRFS_READ_LOCK_BLOCKING;
- ret = record_one_subtree_extent(trans, root, child_bytenr,
- root->nodesize);
+ ret = btrfs_qgroup_insert_dirty_extent(trans,
+ root->fs_info, child_bytenr,
+ root->nodesize, GFP_NOFS);
if (ret)
goto out;
}
@@ -9113,7 +9313,7 @@ int btrfs_drop_snapshot(struct btrfs_root *root,
&root->root_key,
root_item);
if (ret) {
- btrfs_abort_transaction(trans, tree_root, ret);
+ btrfs_abort_transaction(trans, ret);
err = ret;
goto out_end_trans;
}
@@ -9140,7 +9340,7 @@ int btrfs_drop_snapshot(struct btrfs_root *root,
ret = btrfs_del_root(trans, tree_root, &root->root_key);
if (ret) {
- btrfs_abort_transaction(trans, tree_root, ret);
+ btrfs_abort_transaction(trans, ret);
goto out_end_trans;
}
@@ -9148,7 +9348,7 @@ int btrfs_drop_snapshot(struct btrfs_root *root,
ret = btrfs_find_root(tree_root, &root->root_key, path,
NULL, NULL);
if (ret < 0) {
- btrfs_abort_transaction(trans, tree_root, ret);
+ btrfs_abort_transaction(trans, ret);
err = ret;
goto out_end_trans;
} else if (ret > 0) {
@@ -9519,7 +9719,7 @@ int btrfs_can_relocate(struct btrfs_root *root, u64 bytenr)
int full = 0;
int ret = 0;
- debug = btrfs_test_opt(root, ENOSPC_DEBUG);
+ debug = btrfs_test_opt(root->fs_info, ENOSPC_DEBUG);
block_group = btrfs_lookup_block_group(root->fs_info, bytenr);
@@ -9675,7 +9875,23 @@ static int find_first_block_group(struct btrfs_root *root,
if (found_key.objectid >= key->objectid &&
found_key.type == BTRFS_BLOCK_GROUP_ITEM_KEY) {
- ret = 0;
+ struct extent_map_tree *em_tree;
+ struct extent_map *em;
+
+ em_tree = &root->fs_info->mapping_tree.map_tree;
+ read_lock(&em_tree->lock);
+ em = lookup_extent_mapping(em_tree, found_key.objectid,
+ found_key.offset);
+ read_unlock(&em_tree->lock);
+ if (!em) {
+ btrfs_err(root->fs_info,
+ "logical %llu len %llu found bg but no related chunk",
+ found_key.objectid, found_key.offset);
+ ret = -ENOENT;
+ } else {
+ ret = 0;
+ }
+ free_extent_map(em);
goto out;
}
path->slots[0]++;
@@ -9712,6 +9928,7 @@ void btrfs_put_block_group_cache(struct btrfs_fs_info *info)
block_group->iref = 0;
block_group->inode = NULL;
spin_unlock(&block_group->lock);
+ ASSERT(block_group->io_ctl.inode == NULL);
iput(inode);
last = block_group->key.objectid + block_group->key.offset;
btrfs_put_block_group(block_group);
@@ -9769,6 +9986,10 @@ int btrfs_free_block_groups(struct btrfs_fs_info *info)
free_excluded_extents(info->extent_root, block_group);
btrfs_remove_free_space_cache(block_group);
+ ASSERT(list_empty(&block_group->dirty_list));
+ ASSERT(list_empty(&block_group->io_list));
+ ASSERT(list_empty(&block_group->bg_list));
+ ASSERT(atomic_read(&block_group->count) == 1);
btrfs_put_block_group(block_group);
spin_lock(&info->block_group_cache_lock);
@@ -9791,13 +10012,15 @@ int btrfs_free_block_groups(struct btrfs_fs_info *info)
space_info = list_entry(info->space_info.next,
struct btrfs_space_info,
list);
- if (btrfs_test_opt(info->tree_root, ENOSPC_DEBUG)) {
- if (WARN_ON(space_info->bytes_pinned > 0 ||
+
+ /*
+ * Do not hide this behind enospc_debug, this is actually
+ * important and indicates a real bug if this happens.
+ */
+ if (WARN_ON(space_info->bytes_pinned > 0 ||
space_info->bytes_reserved > 0 ||
- space_info->bytes_may_use > 0)) {
- dump_space_info(space_info, 0, 0);
- }
- }
+ space_info->bytes_may_use > 0))
+ dump_space_info(space_info, 0, 0);
list_del(&space_info->list);
for (i = 0; i < BTRFS_NR_RAID_TYPES; i++) {
struct kobject *kobj;
@@ -9915,10 +10138,10 @@ int btrfs_read_block_groups(struct btrfs_root *root)
path->reada = READA_FORWARD;
cache_gen = btrfs_super_cache_generation(root->fs_info->super_copy);
- if (btrfs_test_opt(root, SPACE_CACHE) &&
+ if (btrfs_test_opt(root->fs_info, SPACE_CACHE) &&
btrfs_super_generation(root->fs_info->super_copy) != cache_gen)
need_clear = 1;
- if (btrfs_test_opt(root, CLEAR_CACHE))
+ if (btrfs_test_opt(root->fs_info, CLEAR_CACHE))
need_clear = 1;
while (1) {
@@ -9949,7 +10172,7 @@ int btrfs_read_block_groups(struct btrfs_root *root)
* b) Setting 'dirty flag' makes sure that we flush
* the new space cache info onto disk.
*/
- if (btrfs_test_opt(root, SPACE_CACHE))
+ if (btrfs_test_opt(root->fs_info, SPACE_CACHE))
cache->disk_cache_state = BTRFS_DC_CLEAR;
}
@@ -10005,9 +10228,10 @@ int btrfs_read_block_groups(struct btrfs_root *root)
goto error;
}
+ trace_btrfs_add_block_group(root->fs_info, cache, 0);
ret = update_space_info(info, cache->flags, found_key.offset,
btrfs_block_group_used(&cache->item),
- &space_info);
+ cache->bytes_super, &space_info);
if (ret) {
btrfs_remove_free_space_cache(cache);
spin_lock(&info->block_group_cache_lock);
@@ -10020,9 +10244,6 @@ int btrfs_read_block_groups(struct btrfs_root *root)
}
cache->space_info = space_info;
- spin_lock(&cache->space_info->lock);
- cache->space_info->bytes_readonly += cache->bytes_super;
- spin_unlock(&cache->space_info->lock);
__link_block_group(space_info, cache);
@@ -10093,11 +10314,11 @@ void btrfs_create_pending_block_groups(struct btrfs_trans_handle *trans,
ret = btrfs_insert_item(trans, extent_root, &key, &item,
sizeof(item));
if (ret)
- btrfs_abort_transaction(trans, extent_root, ret);
+ btrfs_abort_transaction(trans, ret);
ret = btrfs_finish_chunk_alloc(trans, extent_root,
key.objectid, key.offset);
if (ret)
- btrfs_abort_transaction(trans, extent_root, ret);
+ btrfs_abort_transaction(trans, ret);
add_block_group_free_space(trans, root->fs_info, block_group);
/* already aborted the transaction if it failed. */
next:
@@ -10114,7 +10335,6 @@ int btrfs_make_block_group(struct btrfs_trans_handle *trans,
int ret;
struct btrfs_root *extent_root;
struct btrfs_block_group_cache *cache;
-
extent_root = root->fs_info->extent_root;
btrfs_set_log_full_commit(root->fs_info, trans);
@@ -10160,7 +10380,7 @@ int btrfs_make_block_group(struct btrfs_trans_handle *trans,
* assigned to our block group, but don't update its counters just yet.
* We want our bg to be added to the rbtree with its ->space_info set.
*/
- ret = update_space_info(root->fs_info, cache->flags, 0, 0,
+ ret = update_space_info(root->fs_info, cache->flags, 0, 0, 0,
&cache->space_info);
if (ret) {
btrfs_remove_free_space_cache(cache);
@@ -10179,8 +10399,9 @@ int btrfs_make_block_group(struct btrfs_trans_handle *trans,
* Now that our block group has its ->space_info set and is inserted in
* the rbtree, update the space info's counters.
*/
+ trace_btrfs_add_block_group(root->fs_info, cache, 1);
ret = update_space_info(root->fs_info, cache->flags, size, bytes_used,
- &cache->space_info);
+ cache->bytes_super, &cache->space_info);
if (ret) {
btrfs_remove_free_space_cache(cache);
spin_lock(&root->fs_info->block_group_cache_lock);
@@ -10193,16 +10414,11 @@ int btrfs_make_block_group(struct btrfs_trans_handle *trans,
}
update_global_block_rsv(root->fs_info);
- spin_lock(&cache->space_info->lock);
- cache->space_info->bytes_readonly += cache->bytes_super;
- spin_unlock(&cache->space_info->lock);
-
__link_block_group(cache->space_info, cache);
list_add_tail(&cache->bg_list, &trans->new_bgs);
set_avail_alloc_bits(extent_root->fs_info, type);
-
return 0;
}
@@ -10415,7 +10631,7 @@ int btrfs_remove_block_group(struct btrfs_trans_handle *trans,
spin_lock(&block_group->space_info->lock);
list_del_init(&block_group->ro_list);
- if (btrfs_test_opt(root, ENOSPC_DEBUG)) {
+ if (btrfs_test_opt(root->fs_info, ENOSPC_DEBUG)) {
WARN_ON(block_group->space_info->total_bytes
< block_group->key.offset);
WARN_ON(block_group->space_info->bytes_readonly
@@ -10683,7 +10899,7 @@ void btrfs_delete_unused_bgs(struct btrfs_fs_info *fs_info)
spin_unlock(&space_info->lock);
/* DISCARD can flip during remount */
- trimming = btrfs_test_opt(root, DISCARD);
+ trimming = btrfs_test_opt(root->fs_info, DISCARD);
/* Implicit trim during transaction commit. */
if (trimming)
@@ -10747,21 +10963,21 @@ int btrfs_init_space_info(struct btrfs_fs_info *fs_info)
mixed = 1;
flags = BTRFS_BLOCK_GROUP_SYSTEM;
- ret = update_space_info(fs_info, flags, 0, 0, &space_info);
+ ret = update_space_info(fs_info, flags, 0, 0, 0, &space_info);
if (ret)
goto out;
if (mixed) {
flags = BTRFS_BLOCK_GROUP_METADATA | BTRFS_BLOCK_GROUP_DATA;
- ret = update_space_info(fs_info, flags, 0, 0, &space_info);
+ ret = update_space_info(fs_info, flags, 0, 0, 0, &space_info);
} else {
flags = BTRFS_BLOCK_GROUP_METADATA;
- ret = update_space_info(fs_info, flags, 0, 0, &space_info);
+ ret = update_space_info(fs_info, flags, 0, 0, 0, &space_info);
if (ret)
goto out;
flags = BTRFS_BLOCK_GROUP_DATA;
- ret = update_space_info(fs_info, flags, 0, 0, &space_info);
+ ret = update_space_info(fs_info, flags, 0, 0, 0, &space_info);
}
out:
return ret;
diff --git a/fs/btrfs/extent_io.c b/fs/btrfs/extent_io.c
index 92fe3f801..44fe66b53 100644
--- a/fs/btrfs/extent_io.c
+++ b/fs/btrfs/extent_io.c
@@ -163,13 +163,13 @@ int __init extent_io_init(void)
{
extent_state_cache = kmem_cache_create("btrfs_extent_state",
sizeof(struct extent_state), 0,
- SLAB_RECLAIM_ACCOUNT | SLAB_MEM_SPREAD, NULL);
+ SLAB_MEM_SPREAD, NULL);
if (!extent_state_cache)
return -ENOMEM;
extent_buffer_cache = kmem_cache_create("btrfs_extent_buffer",
sizeof(struct extent_buffer), 0,
- SLAB_RECLAIM_ACCOUNT | SLAB_MEM_SPREAD, NULL);
+ SLAB_MEM_SPREAD, NULL);
if (!extent_buffer_cache)
goto free_state_cache;
@@ -2049,9 +2049,10 @@ int repair_io_failure(struct inode *inode, u64 start, u64 length, u64 logical,
return -EIO;
}
bio->bi_bdev = dev->bdev;
+ bio_set_op_attrs(bio, REQ_OP_WRITE, WRITE_SYNC);
bio_add_page(bio, page, length, pg_offset);
- if (btrfsic_submit_bio_wait(WRITE_SYNC, bio)) {
+ if (btrfsic_submit_bio_wait(bio)) {
/* try to remap that extent elsewhere? */
btrfs_bio_counter_dec(fs_info);
bio_put(bio);
@@ -2386,7 +2387,7 @@ static int bio_readpage_error(struct bio *failed_bio, u64 phy_offset,
int read_mode;
int ret;
- BUG_ON(failed_bio->bi_rw & REQ_WRITE);
+ BUG_ON(bio_op(failed_bio) == REQ_OP_WRITE);
ret = btrfs_get_io_failure_record(inode, start, end, &failrec);
if (ret)
@@ -2412,12 +2413,12 @@ static int bio_readpage_error(struct bio *failed_bio, u64 phy_offset,
free_io_failure(inode, failrec);
return -EIO;
}
+ bio_set_op_attrs(bio, REQ_OP_READ, read_mode);
pr_debug("Repair Read Error: submitting new read[%#x] to this_mirror=%d, in_validation=%d\n",
read_mode, failrec->this_mirror, failrec->in_validation);
- ret = tree->ops->submit_bio_hook(inode, read_mode, bio,
- failrec->this_mirror,
+ ret = tree->ops->submit_bio_hook(inode, bio, failrec->this_mirror,
failrec->bio_flags, 0);
if (ret) {
free_io_failure(inode, failrec);
@@ -2717,8 +2718,8 @@ struct bio *btrfs_io_bio_alloc(gfp_t gfp_mask, unsigned int nr_iovecs)
}
-static int __must_check submit_one_bio(int rw, struct bio *bio,
- int mirror_num, unsigned long bio_flags)
+static int __must_check submit_one_bio(struct bio *bio, int mirror_num,
+ unsigned long bio_flags)
{
int ret = 0;
struct bio_vec *bvec = bio->bi_io_vec + bio->bi_vcnt - 1;
@@ -2729,33 +2730,31 @@ static int __must_check submit_one_bio(int rw, struct bio *bio,
start = page_offset(page) + bvec->bv_offset;
bio->bi_private = NULL;
-
bio_get(bio);
if (tree->ops && tree->ops->submit_bio_hook)
- ret = tree->ops->submit_bio_hook(page->mapping->host, rw, bio,
+ ret = tree->ops->submit_bio_hook(page->mapping->host, bio,
mirror_num, bio_flags, start);
else
- btrfsic_submit_bio(rw, bio);
+ btrfsic_submit_bio(bio);
bio_put(bio);
return ret;
}
-static int merge_bio(int rw, struct extent_io_tree *tree, struct page *page,
+static int merge_bio(struct extent_io_tree *tree, struct page *page,
unsigned long offset, size_t size, struct bio *bio,
unsigned long bio_flags)
{
int ret = 0;
if (tree->ops && tree->ops->merge_bio_hook)
- ret = tree->ops->merge_bio_hook(rw, page, offset, size, bio,
+ ret = tree->ops->merge_bio_hook(page, offset, size, bio,
bio_flags);
- BUG_ON(ret < 0);
return ret;
}
-static int submit_extent_page(int rw, struct extent_io_tree *tree,
+static int submit_extent_page(int op, int op_flags, struct extent_io_tree *tree,
struct writeback_control *wbc,
struct page *page, sector_t sector,
size_t size, unsigned long offset,
@@ -2783,10 +2782,9 @@ static int submit_extent_page(int rw, struct extent_io_tree *tree,
if (prev_bio_flags != bio_flags || !contig ||
force_bio_submit ||
- merge_bio(rw, tree, page, offset, page_size, bio, bio_flags) ||
+ merge_bio(tree, page, offset, page_size, bio, bio_flags) ||
bio_add_page(bio, page, page_size, offset) < page_size) {
- ret = submit_one_bio(rw, bio, mirror_num,
- prev_bio_flags);
+ ret = submit_one_bio(bio, mirror_num, prev_bio_flags);
if (ret < 0) {
*bio_ret = NULL;
return ret;
@@ -2807,6 +2805,7 @@ static int submit_extent_page(int rw, struct extent_io_tree *tree,
bio_add_page(bio, page, page_size, offset);
bio->bi_end_io = end_io_func;
bio->bi_private = tree;
+ bio_set_op_attrs(bio, op, op_flags);
if (wbc) {
wbc_init_bio(wbc, bio);
wbc_account_io(wbc, page, page_size);
@@ -2815,7 +2814,7 @@ static int submit_extent_page(int rw, struct extent_io_tree *tree,
if (bio_ret)
*bio_ret = bio;
else
- ret = submit_one_bio(rw, bio, mirror_num, bio_flags);
+ ret = submit_one_bio(bio, mirror_num, bio_flags);
return ret;
}
@@ -2873,13 +2872,14 @@ __get_extent_map(struct inode *inode, struct page *page, size_t pg_offset,
* into the tree that are removed when the IO is done (by the end_io
* handlers)
* XXX JDM: This needs looking at to ensure proper page locking
+ * return 0 on success, otherwise return error
*/
static int __do_readpage(struct extent_io_tree *tree,
struct page *page,
get_extent_t *get_extent,
struct extent_map **em_cached,
struct bio **bio, int mirror_num,
- unsigned long *bio_flags, int rw,
+ unsigned long *bio_flags, int read_flags,
u64 *prev_em_start)
{
struct inode *inode = page->mapping->host;
@@ -2894,7 +2894,7 @@ static int __do_readpage(struct extent_io_tree *tree,
sector_t sector;
struct extent_map *em;
struct block_device *bdev;
- int ret;
+ int ret = 0;
int nr = 0;
size_t pg_offset = 0;
size_t iosize;
@@ -3062,8 +3062,8 @@ static int __do_readpage(struct extent_io_tree *tree,
}
pnr -= page->index;
- ret = submit_extent_page(rw, tree, NULL, page,
- sector, disk_io_size, pg_offset,
+ ret = submit_extent_page(REQ_OP_READ, read_flags, tree, NULL,
+ page, sector, disk_io_size, pg_offset,
bdev, bio, pnr,
end_bio_extent_readpage, mirror_num,
*bio_flags,
@@ -3075,6 +3075,7 @@ static int __do_readpage(struct extent_io_tree *tree,
} else {
SetPageError(page);
unlock_extent(tree, cur, cur + iosize - 1);
+ goto out;
}
cur = cur + iosize;
pg_offset += iosize;
@@ -3085,7 +3086,7 @@ out:
SetPageUptodate(page);
unlock_page(page);
}
- return 0;
+ return ret;
}
static inline void __do_contiguous_readpages(struct extent_io_tree *tree,
@@ -3094,7 +3095,7 @@ static inline void __do_contiguous_readpages(struct extent_io_tree *tree,
get_extent_t *get_extent,
struct extent_map **em_cached,
struct bio **bio, int mirror_num,
- unsigned long *bio_flags, int rw,
+ unsigned long *bio_flags,
u64 *prev_em_start)
{
struct inode *inode;
@@ -3115,7 +3116,7 @@ static inline void __do_contiguous_readpages(struct extent_io_tree *tree,
for (index = 0; index < nr_pages; index++) {
__do_readpage(tree, pages[index], get_extent, em_cached, bio,
- mirror_num, bio_flags, rw, prev_em_start);
+ mirror_num, bio_flags, 0, prev_em_start);
put_page(pages[index]);
}
}
@@ -3125,7 +3126,7 @@ static void __extent_readpages(struct extent_io_tree *tree,
int nr_pages, get_extent_t *get_extent,
struct extent_map **em_cached,
struct bio **bio, int mirror_num,
- unsigned long *bio_flags, int rw,
+ unsigned long *bio_flags,
u64 *prev_em_start)
{
u64 start = 0;
@@ -3147,7 +3148,7 @@ static void __extent_readpages(struct extent_io_tree *tree,
index - first_index, start,
end, get_extent, em_cached,
bio, mirror_num, bio_flags,
- rw, prev_em_start);
+ prev_em_start);
start = page_start;
end = start + PAGE_SIZE - 1;
first_index = index;
@@ -3158,7 +3159,7 @@ static void __extent_readpages(struct extent_io_tree *tree,
__do_contiguous_readpages(tree, &pages[first_index],
index - first_index, start,
end, get_extent, em_cached, bio,
- mirror_num, bio_flags, rw,
+ mirror_num, bio_flags,
prev_em_start);
}
@@ -3166,7 +3167,7 @@ static int __extent_read_full_page(struct extent_io_tree *tree,
struct page *page,
get_extent_t *get_extent,
struct bio **bio, int mirror_num,
- unsigned long *bio_flags, int rw)
+ unsigned long *bio_flags, int read_flags)
{
struct inode *inode = page->mapping->host;
struct btrfs_ordered_extent *ordered;
@@ -3186,7 +3187,7 @@ static int __extent_read_full_page(struct extent_io_tree *tree,
}
ret = __do_readpage(tree, page, get_extent, NULL, bio, mirror_num,
- bio_flags, rw, NULL);
+ bio_flags, read_flags, NULL);
return ret;
}
@@ -3198,9 +3199,9 @@ int extent_read_full_page(struct extent_io_tree *tree, struct page *page,
int ret;
ret = __extent_read_full_page(tree, page, get_extent, &bio, mirror_num,
- &bio_flags, READ);
+ &bio_flags, 0);
if (bio)
- ret = submit_one_bio(READ, bio, mirror_num, bio_flags);
+ ret = submit_one_bio(bio, mirror_num, bio_flags);
return ret;
}
@@ -3434,8 +3435,8 @@ static noinline_for_stack int __extent_writepage_io(struct inode *inode,
page->index, cur, end);
}
- ret = submit_extent_page(write_flags, tree, wbc, page,
- sector, iosize, pg_offset,
+ ret = submit_extent_page(REQ_OP_WRITE, write_flags, tree, wbc,
+ page, sector, iosize, pg_offset,
bdev, &epd->bio, max_nr,
end_bio_extent_writepage,
0, 0, 0, false);
@@ -3474,13 +3475,11 @@ static int __extent_writepage(struct page *page, struct writeback_control *wbc,
size_t pg_offset = 0;
loff_t i_size = i_size_read(inode);
unsigned long end_index = i_size >> PAGE_SHIFT;
- int write_flags;
+ int write_flags = 0;
unsigned long nr_written = 0;
if (wbc->sync_mode == WB_SYNC_ALL)
write_flags = WRITE_SYNC;
- else
- write_flags = WRITE;
trace___extent_writepage(page, inode, wbc);
@@ -3724,7 +3723,7 @@ static noinline_for_stack int write_one_eb(struct extent_buffer *eb,
u64 offset = eb->start;
unsigned long i, num_pages;
unsigned long bio_flags = 0;
- int rw = (epd->sync_io ? WRITE_SYNC : WRITE) | REQ_META;
+ int write_flags = (epd->sync_io ? WRITE_SYNC : 0) | REQ_META;
int ret = 0;
clear_bit(EXTENT_BUFFER_WRITE_ERR, &eb->bflags);
@@ -3738,9 +3737,10 @@ static noinline_for_stack int write_one_eb(struct extent_buffer *eb,
clear_page_dirty_for_io(p);
set_page_writeback(p);
- ret = submit_extent_page(rw, tree, wbc, p, offset >> 9,
- PAGE_SIZE, 0, bdev, &epd->bio,
- -1, end_bio_extent_buffer_writepage,
+ ret = submit_extent_page(REQ_OP_WRITE, write_flags, tree, wbc,
+ p, offset >> 9, PAGE_SIZE, 0, bdev,
+ &epd->bio, -1,
+ end_bio_extent_buffer_writepage,
0, epd->bio_flags, bio_flags, false);
epd->bio_flags = bio_flags;
if (ret) {
@@ -4050,13 +4050,12 @@ retry:
static void flush_epd_write_bio(struct extent_page_data *epd)
{
if (epd->bio) {
- int rw = WRITE;
int ret;
- if (epd->sync_io)
- rw = WRITE_SYNC;
+ bio_set_op_attrs(epd->bio, REQ_OP_WRITE,
+ epd->sync_io ? WRITE_SYNC : 0);
- ret = submit_one_bio(rw, epd->bio, 0, epd->bio_flags);
+ ret = submit_one_bio(epd->bio, 0, epd->bio_flags);
BUG_ON(ret < 0); /* -ENOMEM */
epd->bio = NULL;
}
@@ -4174,7 +4173,8 @@ int extent_readpages(struct extent_io_tree *tree,
prefetchw(&page->flags);
list_del(&page->lru);
if (add_to_page_cache_lru(page, mapping,
- page->index, GFP_NOFS)) {
+ page->index,
+ readahead_gfp_mask(mapping))) {
put_page(page);
continue;
}
@@ -4183,19 +4183,19 @@ int extent_readpages(struct extent_io_tree *tree,
if (nr < ARRAY_SIZE(pagepool))
continue;
__extent_readpages(tree, pagepool, nr, get_extent, &em_cached,
- &bio, 0, &bio_flags, READ, &prev_em_start);
+ &bio, 0, &bio_flags, &prev_em_start);
nr = 0;
}
if (nr)
__extent_readpages(tree, pagepool, nr, get_extent, &em_cached,
- &bio, 0, &bio_flags, READ, &prev_em_start);
+ &bio, 0, &bio_flags, &prev_em_start);
if (em_cached)
free_extent_map(em_cached);
BUG_ON(!list_empty(pages));
if (bio)
- return submit_one_bio(READ, bio, 0, bio_flags);
+ return submit_one_bio(bio, 0, bio_flags);
return 0;
}
@@ -5225,22 +5225,38 @@ int read_extent_buffer_pages(struct extent_io_tree *tree,
atomic_set(&eb->io_pages, num_reads);
for (i = start_i; i < num_pages; i++) {
page = eb->pages[i];
+
if (!PageUptodate(page)) {
+ if (ret) {
+ atomic_dec(&eb->io_pages);
+ unlock_page(page);
+ continue;
+ }
+
ClearPageError(page);
err = __extent_read_full_page(tree, page,
get_extent, &bio,
mirror_num, &bio_flags,
- READ | REQ_META);
- if (err)
+ REQ_META);
+ if (err) {
ret = err;
+ /*
+ * We use &bio in above __extent_read_full_page,
+ * so we ensure that if it returns error, the
+ * current page fails to add itself to bio and
+ * it's been unlocked.
+ *
+ * We must dec io_pages by ourselves.
+ */
+ atomic_dec(&eb->io_pages);
+ }
} else {
unlock_page(page);
}
}
if (bio) {
- err = submit_one_bio(READ | REQ_META, bio, mirror_num,
- bio_flags);
+ err = submit_one_bio(bio, mirror_num, bio_flags);
if (err)
return err;
}
diff --git a/fs/btrfs/extent_io.h b/fs/btrfs/extent_io.h
index c0c1c4fef..28cd88fcc 100644
--- a/fs/btrfs/extent_io.h
+++ b/fs/btrfs/extent_io.h
@@ -20,6 +20,7 @@
#define EXTENT_DAMAGED (1U << 14)
#define EXTENT_NORESERVE (1U << 15)
#define EXTENT_QGROUP_RESERVED (1U << 16)
+#define EXTENT_CLEAR_DATA_RESV (1U << 17)
#define EXTENT_IOBITS (EXTENT_LOCKED | EXTENT_WRITEBACK)
#define EXTENT_CTLBITS (EXTENT_DO_ACCOUNTING | EXTENT_FIRST_DELALLOC)
@@ -63,16 +64,16 @@ struct btrfs_root;
struct btrfs_io_bio;
struct io_failure_record;
-typedef int (extent_submit_bio_hook_t)(struct inode *inode, int rw,
- struct bio *bio, int mirror_num,
- unsigned long bio_flags, u64 bio_offset);
+typedef int (extent_submit_bio_hook_t)(struct inode *inode, struct bio *bio,
+ int mirror_num, unsigned long bio_flags,
+ u64 bio_offset);
struct extent_io_ops {
int (*fill_delalloc)(struct inode *inode, struct page *locked_page,
u64 start, u64 end, int *page_started,
unsigned long *nr_written);
int (*writepage_start_hook)(struct page *page, u64 start, u64 end);
extent_submit_bio_hook_t *submit_bio_hook;
- int (*merge_bio_hook)(int rw, struct page *page, unsigned long offset,
+ int (*merge_bio_hook)(struct page *page, unsigned long offset,
size_t size, struct bio *bio,
unsigned long bio_flags);
int (*readpage_io_failed_hook)(struct page *page, int failed_mirror);
diff --git a/fs/btrfs/extent_map.c b/fs/btrfs/extent_map.c
index e0715fcfb..26f9ac719 100644
--- a/fs/btrfs/extent_map.c
+++ b/fs/btrfs/extent_map.c
@@ -13,7 +13,7 @@ int __init extent_map_init(void)
{
extent_map_cache = kmem_cache_create("btrfs_extent_map",
sizeof(struct extent_map), 0,
- SLAB_RECLAIM_ACCOUNT | SLAB_MEM_SPREAD, NULL);
+ SLAB_MEM_SPREAD, NULL);
if (!extent_map_cache)
return -ENOMEM;
return 0;
diff --git a/fs/btrfs/file-item.c b/fs/btrfs/file-item.c
index 62a81ee13..d0d571c47 100644
--- a/fs/btrfs/file-item.c
+++ b/fs/btrfs/file-item.c
@@ -250,7 +250,7 @@ static int __btrfs_lookup_bio_sums(struct btrfs_root *root,
offset + root->sectorsize - 1,
EXTENT_NODATASUM);
} else {
- btrfs_info(BTRFS_I(inode)->root->fs_info,
+ btrfs_info_rl(BTRFS_I(inode)->root->fs_info,
"no csum found for inode %llu start %llu",
btrfs_ino(inode), offset);
}
@@ -699,7 +699,7 @@ int btrfs_del_csums(struct btrfs_trans_handle *trans,
*/
ret = btrfs_split_item(trans, root, path, &key, offset);
if (ret && ret != -EAGAIN) {
- btrfs_abort_transaction(trans, root, ret);
+ btrfs_abort_transaction(trans, ret);
goto out;
}
diff --git a/fs/btrfs/file.c b/fs/btrfs/file.c
index b56887b35..fea31a4a6 100644
--- a/fs/btrfs/file.c
+++ b/fs/btrfs/file.c
@@ -132,7 +132,7 @@ static int __btrfs_add_inode_defrag(struct inode *inode,
static inline int __need_auto_defrag(struct btrfs_root *root)
{
- if (!btrfs_test_opt(root, AUTO_DEFRAG))
+ if (!btrfs_test_opt(root->fs_info, AUTO_DEFRAG))
return 0;
if (btrfs_fs_closing(root->fs_info))
@@ -950,7 +950,7 @@ delete_extent_item:
ret = btrfs_del_items(trans, root, path, del_slot,
del_nr);
if (ret) {
- btrfs_abort_transaction(trans, root, ret);
+ btrfs_abort_transaction(trans, ret);
break;
}
@@ -974,7 +974,7 @@ delete_extent_item:
path->slots[0] = del_slot;
ret = btrfs_del_items(trans, root, path, del_slot, del_nr);
if (ret)
- btrfs_abort_transaction(trans, root, ret);
+ btrfs_abort_transaction(trans, ret);
}
leaf = path->nodes[0];
@@ -1190,7 +1190,7 @@ again:
goto again;
}
if (ret < 0) {
- btrfs_abort_transaction(trans, root, ret);
+ btrfs_abort_transaction(trans, ret);
goto out;
}
@@ -1278,7 +1278,7 @@ again:
ret = btrfs_del_items(trans, root, path, del_slot, del_nr);
if (ret < 0) {
- btrfs_abort_transaction(trans, root, ret);
+ btrfs_abort_transaction(trans, ret);
goto out;
}
}
@@ -2033,6 +2033,14 @@ int btrfs_sync_file(struct file *file, loff_t start, loff_t end, int datasync)
*/
clear_bit(BTRFS_INODE_NEEDS_FULL_SYNC,
&BTRFS_I(inode)->runtime_flags);
+ /*
+ * An ordered extent might have started before and completed
+ * already with io errors, in which case the inode was not
+ * updated and we end up here. So check the inode's mapping
+ * flags for any errors that might have happened while doing
+ * writeback of file data.
+ */
+ ret = btrfs_inode_check_errors(inode);
inode_unlock(inode);
goto out;
}
@@ -2062,7 +2070,7 @@ int btrfs_sync_file(struct file *file, loff_t start, loff_t end, int datasync)
}
trans->sync = true;
- btrfs_init_log_ctx(&ctx);
+ btrfs_init_log_ctx(&ctx, inode);
ret = btrfs_log_dentry_safe(trans, root, dentry, start, end, &ctx);
if (ret < 0) {
@@ -2477,7 +2485,7 @@ static int btrfs_punch_hole(struct inode *inode, loff_t offset, loff_t len)
}
ret = btrfs_block_rsv_migrate(&root->fs_info->trans_block_rsv, rsv,
- min_size);
+ min_size, 0);
BUG_ON(ret);
trans->block_rsv = rsv;
@@ -2520,7 +2528,7 @@ static int btrfs_punch_hole(struct inode *inode, loff_t offset, loff_t len)
}
ret = btrfs_block_rsv_migrate(&root->fs_info->trans_block_rsv,
- rsv, min_size);
+ rsv, min_size, 0);
BUG_ON(ret); /* shouldn't happen */
trans->block_rsv = rsv;
@@ -2667,6 +2675,7 @@ static long btrfs_fallocate(struct file *file, int mode,
alloc_start = round_down(offset, blocksize);
alloc_end = round_up(offset + len, blocksize);
+ cur_offset = alloc_start;
/* Make sure we aren't being give some crap mode */
if (mode & ~(FALLOC_FL_KEEP_SIZE | FALLOC_FL_PUNCH_HOLE))
@@ -2759,7 +2768,6 @@ static long btrfs_fallocate(struct file *file, int mode,
/* First, check if we exceed the qgroup limit */
INIT_LIST_HEAD(&reserve_list);
- cur_offset = alloc_start;
while (1) {
em = btrfs_get_extent(inode, NULL, 0, cur_offset,
alloc_end - cur_offset, 0);
@@ -2786,6 +2794,14 @@ static long btrfs_fallocate(struct file *file, int mode,
last_byte - cur_offset);
if (ret < 0)
break;
+ } else {
+ /*
+ * Do not need to reserve unwritten extent for this
+ * range, free reserved data space first, otherwise
+ * it'll result in false ENOSPC error.
+ */
+ btrfs_free_reserved_data_space(inode, cur_offset,
+ last_byte - cur_offset);
}
free_extent_map(em);
cur_offset = last_byte;
@@ -2803,6 +2819,9 @@ static long btrfs_fallocate(struct file *file, int mode,
range->start,
range->len, 1 << inode->i_blkbits,
offset + len, &alloc_hint);
+ else
+ btrfs_free_reserved_data_space(inode, range->start,
+ range->len);
list_del(&range->list);
kfree(range);
}
@@ -2837,18 +2856,11 @@ out_unlock:
unlock_extent_cached(&BTRFS_I(inode)->io_tree, alloc_start, locked_end,
&cached_state, GFP_KERNEL);
out:
- /*
- * As we waited the extent range, the data_rsv_map must be empty
- * in the range, as written data range will be released from it.
- * And for prealloacted extent, it will also be released when
- * its metadata is written.
- * So this is completely used as cleanup.
- */
- btrfs_qgroup_free_data(inode, alloc_start, alloc_end - alloc_start);
inode_unlock(inode);
/* Let go of our reservation. */
- btrfs_free_reserved_data_space(inode, alloc_start,
- alloc_end - alloc_start);
+ if (ret != 0)
+ btrfs_free_reserved_data_space(inode, alloc_start,
+ alloc_end - cur_offset);
return ret;
}
@@ -2975,7 +2987,7 @@ int btrfs_auto_defrag_init(void)
{
btrfs_inode_defrag_cachep = kmem_cache_create("btrfs_inode_defrag",
sizeof(struct inode_defrag), 0,
- SLAB_RECLAIM_ACCOUNT | SLAB_MEM_SPREAD,
+ SLAB_MEM_SPREAD,
NULL);
if (!btrfs_inode_defrag_cachep)
return -ENOMEM;
diff --git a/fs/btrfs/free-space-cache.c b/fs/btrfs/free-space-cache.c
index 69d270f66..d571bd2b6 100644
--- a/fs/btrfs/free-space-cache.c
+++ b/fs/btrfs/free-space-cache.c
@@ -280,7 +280,7 @@ fail:
if (locked)
mutex_unlock(&trans->transaction->cache_write_mutex);
if (ret)
- btrfs_abort_transaction(trans, root, ret);
+ btrfs_abort_transaction(trans, ret);
return ret;
}
@@ -3026,7 +3026,7 @@ int btrfs_find_space_cluster(struct btrfs_root *root,
* For metadata, allow allocates with smaller extents. For
* data, keep it dense.
*/
- if (btrfs_test_opt(root, SSD_SPREAD)) {
+ if (btrfs_test_opt(root->fs_info, SSD_SPREAD)) {
cont1_bytes = min_bytes = bytes + empty_size;
} else if (block_group->flags & BTRFS_BLOCK_GROUP_METADATA) {
cont1_bytes = bytes;
@@ -3470,7 +3470,7 @@ int load_free_ino_cache(struct btrfs_fs_info *fs_info, struct btrfs_root *root)
int ret = 0;
u64 root_gen = btrfs_root_generation(&root->root_item);
- if (!btrfs_test_opt(root, INODE_MAP_CACHE))
+ if (!btrfs_test_opt(root->fs_info, INODE_MAP_CACHE))
return 0;
/*
@@ -3514,7 +3514,7 @@ int btrfs_write_out_ino_cache(struct btrfs_root *root,
struct btrfs_io_ctl io_ctl;
bool release_metadata = true;
- if (!btrfs_test_opt(root, INODE_MAP_CACHE))
+ if (!btrfs_test_opt(root->fs_info, INODE_MAP_CACHE))
return 0;
memset(&io_ctl, 0, sizeof(io_ctl));
diff --git a/fs/btrfs/free-space-tree.c b/fs/btrfs/free-space-tree.c
index 53dbeaf6c..87e7e3d3e 100644
--- a/fs/btrfs/free-space-tree.c
+++ b/fs/btrfs/free-space-tree.c
@@ -305,7 +305,7 @@ int convert_free_space_to_bitmaps(struct btrfs_trans_handle *trans,
out:
kvfree(bitmap);
if (ret)
- btrfs_abort_transaction(trans, root, ret);
+ btrfs_abort_transaction(trans, ret);
return ret;
}
@@ -454,7 +454,7 @@ int convert_free_space_to_extents(struct btrfs_trans_handle *trans,
out:
kvfree(bitmap);
if (ret)
- btrfs_abort_transaction(trans, root, ret);
+ btrfs_abort_transaction(trans, ret);
return ret;
}
@@ -851,7 +851,7 @@ int remove_from_free_space_tree(struct btrfs_trans_handle *trans,
out:
btrfs_free_path(path);
if (ret)
- btrfs_abort_transaction(trans, fs_info->free_space_root, ret);
+ btrfs_abort_transaction(trans, ret);
return ret;
}
@@ -1047,7 +1047,7 @@ int add_to_free_space_tree(struct btrfs_trans_handle *trans,
out:
btrfs_free_path(path);
if (ret)
- btrfs_abort_transaction(trans, fs_info->free_space_root, ret);
+ btrfs_abort_transaction(trans, ret);
return ret;
}
@@ -1193,7 +1193,7 @@ int btrfs_create_free_space_tree(struct btrfs_fs_info *fs_info)
abort:
fs_info->creating_free_space_tree = 0;
- btrfs_abort_transaction(trans, tree_root, ret);
+ btrfs_abort_transaction(trans, ret);
btrfs_end_transaction(trans, tree_root);
return ret;
}
@@ -1280,7 +1280,7 @@ int btrfs_clear_free_space_tree(struct btrfs_fs_info *fs_info)
return 0;
abort:
- btrfs_abort_transaction(trans, tree_root, ret);
+ btrfs_abort_transaction(trans, ret);
btrfs_end_transaction(trans, tree_root);
return ret;
}
@@ -1333,7 +1333,7 @@ out:
btrfs_free_path(path);
mutex_unlock(&block_group->free_space_lock);
if (ret)
- btrfs_abort_transaction(trans, fs_info->free_space_root, ret);
+ btrfs_abort_transaction(trans, ret);
return ret;
}
@@ -1410,7 +1410,7 @@ int remove_block_group_free_space(struct btrfs_trans_handle *trans,
out:
btrfs_free_path(path);
if (ret)
- btrfs_abort_transaction(trans, root, ret);
+ btrfs_abort_transaction(trans, ret);
return ret;
}
diff --git a/fs/btrfs/inode-map.c b/fs/btrfs/inode-map.c
index 70107f7c9..359ee861b 100644
--- a/fs/btrfs/inode-map.c
+++ b/fs/btrfs/inode-map.c
@@ -38,7 +38,7 @@ static int caching_kthread(void *data)
int slot;
int ret;
- if (!btrfs_test_opt(root, INODE_MAP_CACHE))
+ if (!btrfs_test_opt(root->fs_info, INODE_MAP_CACHE))
return 0;
path = btrfs_alloc_path();
@@ -141,7 +141,7 @@ static void start_caching(struct btrfs_root *root)
int ret;
u64 objectid;
- if (!btrfs_test_opt(root, INODE_MAP_CACHE))
+ if (!btrfs_test_opt(root->fs_info, INODE_MAP_CACHE))
return;
spin_lock(&root->ino_cache_lock);
@@ -185,7 +185,7 @@ static void start_caching(struct btrfs_root *root)
int btrfs_find_free_ino(struct btrfs_root *root, u64 *objectid)
{
- if (!btrfs_test_opt(root, INODE_MAP_CACHE))
+ if (!btrfs_test_opt(root->fs_info, INODE_MAP_CACHE))
return btrfs_find_free_objectid(root, objectid);
again:
@@ -211,7 +211,7 @@ void btrfs_return_ino(struct btrfs_root *root, u64 objectid)
{
struct btrfs_free_space_ctl *pinned = root->free_ino_pinned;
- if (!btrfs_test_opt(root, INODE_MAP_CACHE))
+ if (!btrfs_test_opt(root->fs_info, INODE_MAP_CACHE))
return;
again:
if (root->ino_cache_state == BTRFS_CACHE_FINISHED) {
@@ -251,7 +251,7 @@ void btrfs_unpin_free_ino(struct btrfs_root *root)
struct rb_node *n;
u64 count;
- if (!btrfs_test_opt(root, INODE_MAP_CACHE))
+ if (!btrfs_test_opt(root->fs_info, INODE_MAP_CACHE))
return;
while (1) {
@@ -412,7 +412,7 @@ int btrfs_save_ino_cache(struct btrfs_root *root,
if (btrfs_root_refs(&root->root_item) == 0)
return 0;
- if (!btrfs_test_opt(root, INODE_MAP_CACHE))
+ if (!btrfs_test_opt(root->fs_info, INODE_MAP_CACHE))
return 0;
path = btrfs_alloc_path();
@@ -458,7 +458,7 @@ again:
BTRFS_I(inode)->generation = 0;
ret = btrfs_update_inode(trans, root, inode);
if (ret) {
- btrfs_abort_transaction(trans, root, ret);
+ btrfs_abort_transaction(trans, ret);
goto out_put;
}
@@ -466,7 +466,7 @@ again:
ret = btrfs_truncate_free_space_cache(root, trans, NULL, inode);
if (ret) {
if (ret != -ENOSPC)
- btrfs_abort_transaction(trans, root, ret);
+ btrfs_abort_transaction(trans, ret);
goto out_put;
}
}
@@ -495,10 +495,9 @@ again:
ret = btrfs_prealloc_file_range_trans(inode, trans, 0, 0, prealloc,
prealloc, prealloc, &alloc_hint);
if (ret) {
- btrfs_delalloc_release_space(inode, 0, prealloc);
+ btrfs_delalloc_release_metadata(inode, prealloc);
goto out_put;
}
- btrfs_free_reserved_data_space(inode, 0, prealloc);
ret = btrfs_write_out_ino_cache(root, trans, path, inode);
out_put:
diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c
index 442195472..e6811c42e 100644
--- a/fs/btrfs/inode.c
+++ b/fs/btrfs/inode.c
@@ -60,6 +60,7 @@
#include "hash.h"
#include "props.h"
#include "qgroup.h"
+#include "dedupe.h"
struct btrfs_iget_args {
struct btrfs_key *location;
@@ -105,8 +106,9 @@ static int btrfs_truncate(struct inode *inode);
static int btrfs_finish_ordered_io(struct btrfs_ordered_extent *ordered_extent);
static noinline int cow_file_range(struct inode *inode,
struct page *locked_page,
- u64 start, u64 end, int *page_started,
- unsigned long *nr_written, int unlock);
+ u64 start, u64 end, u64 delalloc_end,
+ int *page_started, unsigned long *nr_written,
+ int unlock, struct btrfs_dedupe_hash *hash);
static struct extent_map *create_pinned_em(struct inode *inode, u64 start,
u64 len, u64 orig_start,
u64 block_start, u64 block_len,
@@ -294,7 +296,7 @@ static noinline int cow_file_range_inline(struct btrfs_root *root,
start, aligned_end, NULL,
1, 1, extent_item_size, &extent_inserted);
if (ret) {
- btrfs_abort_transaction(trans, root, ret);
+ btrfs_abort_transaction(trans, ret);
goto out;
}
@@ -305,7 +307,7 @@ static noinline int cow_file_range_inline(struct btrfs_root *root,
inline_len, compressed_size,
compress_type, compressed_pages);
if (ret && ret != -ENOSPC) {
- btrfs_abort_transaction(trans, root, ret);
+ btrfs_abort_transaction(trans, ret);
goto out;
} else if (ret == -ENOSPC) {
ret = 1;
@@ -374,12 +376,12 @@ static inline int inode_need_compress(struct inode *inode)
struct btrfs_root *root = BTRFS_I(inode)->root;
/* force compress */
- if (btrfs_test_opt(root, FORCE_COMPRESS))
+ if (btrfs_test_opt(root->fs_info, FORCE_COMPRESS))
return 1;
/* bad compression ratios */
if (BTRFS_I(inode)->flags & BTRFS_INODE_NOCOMPRESS)
return 0;
- if (btrfs_test_opt(root, COMPRESS) ||
+ if (btrfs_test_opt(root->fs_info, COMPRESS) ||
BTRFS_I(inode)->flags & BTRFS_INODE_COMPRESS ||
BTRFS_I(inode)->force_compress)
return 1;
@@ -564,6 +566,8 @@ cont:
PAGE_SET_WRITEBACK |
page_error_op |
PAGE_END_WRITEBACK);
+ btrfs_free_reserved_data_space_noquota(inode, start,
+ end - start + 1);
goto free_pages_out;
}
}
@@ -585,9 +589,27 @@ cont:
will_compress = 0;
} else {
num_bytes = total_in;
+ *num_added += 1;
+
+ /*
+ * The async work queues will take care of doing actual
+ * allocation on disk for these compressed pages, and
+ * will submit them to the elevator.
+ */
+ add_async_extent(async_cow, start, num_bytes,
+ total_compressed, pages, nr_pages_ret,
+ compress_type);
+
+ if (start + num_bytes < end) {
+ start += num_bytes;
+ pages = NULL;
+ cond_resched();
+ goto again;
+ }
+ return;
}
}
- if (!will_compress && pages) {
+ if (pages) {
/*
* the compression code ran but failed to make things smaller,
* free any pages it allocated and our page pointer array
@@ -602,48 +624,28 @@ cont:
nr_pages_ret = 0;
/* flag the file so we don't compress in the future */
- if (!btrfs_test_opt(root, FORCE_COMPRESS) &&
+ if (!btrfs_test_opt(root->fs_info, FORCE_COMPRESS) &&
!(BTRFS_I(inode)->force_compress)) {
BTRFS_I(inode)->flags |= BTRFS_INODE_NOCOMPRESS;
}
}
- if (will_compress) {
- *num_added += 1;
-
- /* the async work queues will take care of doing actual
- * allocation on disk for these compressed pages,
- * and will submit them to the elevator.
- */
- add_async_extent(async_cow, start, num_bytes,
- total_compressed, pages, nr_pages_ret,
- compress_type);
-
- if (start + num_bytes < end) {
- start += num_bytes;
- pages = NULL;
- cond_resched();
- goto again;
- }
- } else {
cleanup_and_bail_uncompressed:
- /*
- * No compression, but we still need to write the pages in
- * the file we've been given so far. redirty the locked
- * page if it corresponds to our extent and set things up
- * for the async work queue to run cow_file_range to do
- * the normal delalloc dance
- */
- if (page_offset(locked_page) >= start &&
- page_offset(locked_page) <= end) {
- __set_page_dirty_nobuffers(locked_page);
- /* unlocked later on in the async handlers */
- }
- if (redirty)
- extent_range_redirty_for_io(inode, start, end);
- add_async_extent(async_cow, start, end - start + 1,
- 0, NULL, 0, BTRFS_COMPRESS_NONE);
- *num_added += 1;
- }
+ /*
+ * No compression, but we still need to write the pages in the file
+ * we've been given so far. redirty the locked page if it corresponds
+ * to our extent and set things up for the async work queue to run
+ * cow_file_range to do the normal delalloc dance.
+ */
+ if (page_offset(locked_page) >= start &&
+ page_offset(locked_page) <= end)
+ __set_page_dirty_nobuffers(locked_page);
+ /* unlocked later on in the async handlers */
+
+ if (redirty)
+ extent_range_redirty_for_io(inode, start, end);
+ add_async_extent(async_cow, start, end - start + 1, 0, NULL, 0,
+ BTRFS_COMPRESS_NONE);
+ *num_added += 1;
return;
@@ -712,7 +714,10 @@ retry:
async_extent->start,
async_extent->start +
async_extent->ram_size - 1,
- &page_started, &nr_written, 0);
+ async_extent->start +
+ async_extent->ram_size - 1,
+ &page_started, &nr_written, 0,
+ NULL);
/* JDM XXX */
@@ -739,7 +744,7 @@ retry:
lock_extent(io_tree, async_extent->start,
async_extent->start + async_extent->ram_size - 1);
- ret = btrfs_reserve_extent(root,
+ ret = btrfs_reserve_extent(root, async_extent->ram_size,
async_extent->compressed_size,
async_extent->compressed_size,
0, alloc_hint, &ins, 1, 1);
@@ -925,9 +930,9 @@ static u64 get_extent_allocation_hint(struct inode *inode, u64 start,
*/
static noinline int cow_file_range(struct inode *inode,
struct page *locked_page,
- u64 start, u64 end, int *page_started,
- unsigned long *nr_written,
- int unlock)
+ u64 start, u64 end, u64 delalloc_end,
+ int *page_started, unsigned long *nr_written,
+ int unlock, struct btrfs_dedupe_hash *hash)
{
struct btrfs_root *root = BTRFS_I(inode)->root;
u64 alloc_hint = 0;
@@ -966,7 +971,8 @@ static noinline int cow_file_range(struct inode *inode,
EXTENT_DEFRAG, PAGE_UNLOCK |
PAGE_CLEAR_DIRTY | PAGE_SET_WRITEBACK |
PAGE_END_WRITEBACK);
-
+ btrfs_free_reserved_data_space_noquota(inode, start,
+ end - start + 1);
*nr_written = *nr_written +
(end - start + PAGE_SIZE) / PAGE_SIZE;
*page_started = 1;
@@ -986,7 +992,7 @@ static noinline int cow_file_range(struct inode *inode,
unsigned long op;
cur_alloc_size = disk_num_bytes;
- ret = btrfs_reserve_extent(root, cur_alloc_size,
+ ret = btrfs_reserve_extent(root, cur_alloc_size, cur_alloc_size,
root->sectorsize, 0, alloc_hint,
&ins, 1, 1);
if (ret < 0)
@@ -1156,7 +1162,7 @@ static int cow_file_range_async(struct inode *inode, struct page *locked_page,
async_cow->start = start;
if (BTRFS_I(inode)->flags & BTRFS_INODE_NOCOMPRESS &&
- !btrfs_test_opt(root, FORCE_COMPRESS))
+ !btrfs_test_opt(root->fs_info, FORCE_COMPRESS))
cur_end = end;
else
cur_end = min(end, start + SZ_512K - 1);
@@ -1418,7 +1424,8 @@ out_check:
if (cow_start != (u64)-1) {
ret = cow_file_range(inode, locked_page,
cow_start, found_key.offset - 1,
- page_started, nr_written, 1);
+ end, page_started, nr_written, 1,
+ NULL);
if (ret) {
if (!nolock && nocow)
btrfs_end_write_no_snapshoting(root);
@@ -1485,8 +1492,10 @@ out_check:
extent_clear_unlock_delalloc(inode, cur_offset,
cur_offset + num_bytes - 1,
locked_page, EXTENT_LOCKED |
- EXTENT_DELALLOC, PAGE_UNLOCK |
- PAGE_SET_PRIVATE2);
+ EXTENT_DELALLOC |
+ EXTENT_CLEAR_DATA_RESV,
+ PAGE_UNLOCK | PAGE_SET_PRIVATE2);
+
if (!nolock && nocow)
btrfs_end_write_no_snapshoting(root);
cur_offset = extent_end;
@@ -1501,8 +1510,8 @@ out_check:
}
if (cow_start != (u64)-1) {
- ret = cow_file_range(inode, locked_page, cow_start, end,
- page_started, nr_written, 1);
+ ret = cow_file_range(inode, locked_page, cow_start, end, end,
+ page_started, nr_written, 1, NULL);
if (ret)
goto error;
}
@@ -1561,8 +1570,8 @@ static int run_delalloc_range(struct inode *inode, struct page *locked_page,
ret = run_delalloc_nocow(inode, locked_page, start, end,
page_started, 0, nr_written);
} else if (!inode_need_compress(inode)) {
- ret = cow_file_range(inode, locked_page, start, end,
- page_started, nr_written, 1);
+ ret = cow_file_range(inode, locked_page, start, end, end,
+ page_started, nr_written, 1, NULL);
} else {
set_bit(BTRFS_INODE_HAS_ASYNC_EXTENT,
&BTRFS_I(inode)->runtime_flags);
@@ -1740,7 +1749,7 @@ static void btrfs_set_bit_hook(struct inode *inode,
}
/* For sanity tests */
- if (btrfs_test_is_dummy_root(root))
+ if (btrfs_is_testing(root->fs_info))
return;
__percpu_counter_add(&root->fs_info->delalloc_bytes, len,
@@ -1799,11 +1808,13 @@ static void btrfs_clear_bit_hook(struct inode *inode,
btrfs_delalloc_release_metadata(inode, len);
/* For sanity tests. */
- if (btrfs_test_is_dummy_root(root))
+ if (btrfs_is_testing(root->fs_info))
return;
if (root->root_key.objectid != BTRFS_DATA_RELOC_TREE_OBJECTID
- && do_list && !(state->state & EXTENT_NORESERVE))
+ && do_list && !(state->state & EXTENT_NORESERVE)
+ && (*bits & (EXTENT_DO_ACCOUNTING |
+ EXTENT_CLEAR_DATA_RESV)))
btrfs_free_reserved_data_space_noquota(inode,
state->start, len);
@@ -1822,8 +1833,12 @@ static void btrfs_clear_bit_hook(struct inode *inode,
/*
* extent_io.c merge_bio_hook, this must check the chunk tree to make sure
* we don't create bios that span stripes or chunks
+ *
+ * return 1 if page cannot be merged to bio
+ * return 0 if page can be merged to bio
+ * return error otherwise
*/
-int btrfs_merge_bio_hook(int rw, struct page *page, unsigned long offset,
+int btrfs_merge_bio_hook(struct page *page, unsigned long offset,
size_t size, struct bio *bio,
unsigned long bio_flags)
{
@@ -1838,10 +1853,10 @@ int btrfs_merge_bio_hook(int rw, struct page *page, unsigned long offset,
length = bio->bi_iter.bi_size;
map_length = length;
- ret = btrfs_map_block(root->fs_info, rw, logical,
+ ret = btrfs_map_block(root->fs_info, bio_op(bio), logical,
&map_length, NULL, 0);
- /* Will always return 0 with map_multi == NULL */
- BUG_ON(ret < 0);
+ if (ret < 0)
+ return ret;
if (map_length < length + size)
return 1;
return 0;
@@ -1855,9 +1870,8 @@ int btrfs_merge_bio_hook(int rw, struct page *page, unsigned long offset,
* At IO completion time the cums attached on the ordered extent record
* are inserted into the btree
*/
-static int __btrfs_submit_bio_start(struct inode *inode, int rw,
- struct bio *bio, int mirror_num,
- unsigned long bio_flags,
+static int __btrfs_submit_bio_start(struct inode *inode, struct bio *bio,
+ int mirror_num, unsigned long bio_flags,
u64 bio_offset)
{
struct btrfs_root *root = BTRFS_I(inode)->root;
@@ -1876,14 +1890,14 @@ static int __btrfs_submit_bio_start(struct inode *inode, int rw,
* At IO completion time the cums attached on the ordered extent record
* are inserted into the btree
*/
-static int __btrfs_submit_bio_done(struct inode *inode, int rw, struct bio *bio,
+static int __btrfs_submit_bio_done(struct inode *inode, struct bio *bio,
int mirror_num, unsigned long bio_flags,
u64 bio_offset)
{
struct btrfs_root *root = BTRFS_I(inode)->root;
int ret;
- ret = btrfs_map_bio(root, rw, bio, mirror_num, 1);
+ ret = btrfs_map_bio(root, bio, mirror_num, 1);
if (ret) {
bio->bi_error = ret;
bio_endio(bio);
@@ -1895,7 +1909,7 @@ static int __btrfs_submit_bio_done(struct inode *inode, int rw, struct bio *bio,
* extent_io.c submission hook. This does the right thing for csum calculation
* on write, or reading the csums from the tree before a read
*/
-static int btrfs_submit_bio_hook(struct inode *inode, int rw, struct bio *bio,
+static int btrfs_submit_bio_hook(struct inode *inode, struct bio *bio,
int mirror_num, unsigned long bio_flags,
u64 bio_offset)
{
@@ -1910,7 +1924,7 @@ static int btrfs_submit_bio_hook(struct inode *inode, int rw, struct bio *bio,
if (btrfs_is_free_space_inode(inode))
metadata = BTRFS_WQ_ENDIO_FREE_SPACE;
- if (!(rw & REQ_WRITE)) {
+ if (bio_op(bio) != REQ_OP_WRITE) {
ret = btrfs_bio_wq_end_io(root->fs_info, bio, metadata);
if (ret)
goto out;
@@ -1932,7 +1946,7 @@ static int btrfs_submit_bio_hook(struct inode *inode, int rw, struct bio *bio,
goto mapit;
/* we're doing a write, do the async checksumming */
ret = btrfs_wq_submit_bio(BTRFS_I(inode)->root->fs_info,
- inode, rw, bio, mirror_num,
+ inode, bio, mirror_num,
bio_flags, bio_offset,
__btrfs_submit_bio_start,
__btrfs_submit_bio_done);
@@ -1944,7 +1958,7 @@ static int btrfs_submit_bio_hook(struct inode *inode, int rw, struct bio *bio,
}
mapit:
- ret = btrfs_map_bio(root, rw, bio, mirror_num, 0);
+ ret = btrfs_map_bio(root, bio, mirror_num, 0);
out:
if (ret < 0) {
@@ -2595,7 +2609,7 @@ again:
ret = btrfs_insert_empty_item(trans, root, path, &key,
sizeof(*extent));
if (ret) {
- btrfs_abort_transaction(trans, root, ret);
+ btrfs_abort_transaction(trans, ret);
goto out_free_path;
}
@@ -2622,7 +2636,7 @@ again:
backref->root_id, backref->inum,
new->file_pos); /* start - extent_offset */
if (ret) {
- btrfs_abort_transaction(trans, root, ret);
+ btrfs_abort_transaction(trans, ret);
goto out_free_path;
}
@@ -2891,7 +2905,7 @@ static int btrfs_finish_ordered_io(struct btrfs_ordered_extent *ordered_extent)
trans->block_rsv = &root->fs_info->delalloc_block_rsv;
ret = btrfs_update_inode_fallback(trans, root, inode);
if (ret) /* -ENOMEM or corruption */
- btrfs_abort_transaction(trans, root, ret);
+ btrfs_abort_transaction(trans, ret);
goto out;
}
@@ -2951,7 +2965,7 @@ static int btrfs_finish_ordered_io(struct btrfs_ordered_extent *ordered_extent)
ordered_extent->file_offset, ordered_extent->len,
trans->transid);
if (ret < 0) {
- btrfs_abort_transaction(trans, root, ret);
+ btrfs_abort_transaction(trans, ret);
goto out_unlock;
}
@@ -2961,7 +2975,7 @@ static int btrfs_finish_ordered_io(struct btrfs_ordered_extent *ordered_extent)
btrfs_ordered_update_i_size(inode, 0, ordered_extent);
ret = btrfs_update_inode_fallback(trans, root, inode);
if (ret) { /* -ENOMEM or corruption */
- btrfs_abort_transaction(trans, root, ret);
+ btrfs_abort_transaction(trans, ret);
goto out_unlock;
}
ret = 0;
@@ -3205,7 +3219,7 @@ void btrfs_orphan_commit_root(struct btrfs_trans_handle *trans,
ret = btrfs_del_orphan_item(trans, root->fs_info->tree_root,
root->root_key.objectid);
if (ret)
- btrfs_abort_transaction(trans, root, ret);
+ btrfs_abort_transaction(trans, ret);
else
clear_bit(BTRFS_ROOT_ORPHAN_ITEM_INSERTED,
&root->state);
@@ -3296,7 +3310,7 @@ int btrfs_orphan_add(struct btrfs_trans_handle *trans, struct inode *inode)
if (ret != -EEXIST) {
clear_bit(BTRFS_INODE_HAS_ORPHAN_ITEM,
&BTRFS_I(inode)->runtime_flags);
- btrfs_abort_transaction(trans, root, ret);
+ btrfs_abort_transaction(trans, ret);
return ret;
}
}
@@ -3308,7 +3322,7 @@ int btrfs_orphan_add(struct btrfs_trans_handle *trans, struct inode *inode)
ret = btrfs_insert_orphan_item(trans, root->fs_info->tree_root,
root->root_key.objectid);
if (ret && ret != -EEXIST) {
- btrfs_abort_transaction(trans, root, ret);
+ btrfs_abort_transaction(trans, ret);
return ret;
}
}
@@ -3428,10 +3442,10 @@ int btrfs_orphan_cleanup(struct btrfs_root *root)
found_key.offset = 0;
inode = btrfs_iget(root->fs_info->sb, &found_key, root, NULL);
ret = PTR_ERR_OR_ZERO(inode);
- if (ret && ret != -ESTALE)
+ if (ret && ret != -ENOENT)
goto out;
- if (ret == -ESTALE && root == root->fs_info->tree_root) {
+ if (ret == -ENOENT && root == root->fs_info->tree_root) {
struct btrfs_root *dead_root;
struct btrfs_fs_info *fs_info = root->fs_info;
int is_dead_root = 0;
@@ -3467,7 +3481,7 @@ int btrfs_orphan_cleanup(struct btrfs_root *root)
* Inode is already gone but the orphan item is still there,
* kill the orphan item.
*/
- if (ret == -ESTALE) {
+ if (ret == -ENOENT) {
trans = btrfs_start_transaction(root, 1);
if (IS_ERR(trans)) {
ret = PTR_ERR(trans);
@@ -3626,7 +3640,7 @@ static noinline int acls_after_inode_item(struct extent_buffer *leaf,
/*
* read an inode from the btree into the in-memory inode
*/
-static void btrfs_read_locked_inode(struct inode *inode)
+static int btrfs_read_locked_inode(struct inode *inode)
{
struct btrfs_path *path;
struct extent_buffer *leaf;
@@ -3645,14 +3659,19 @@ static void btrfs_read_locked_inode(struct inode *inode)
filled = true;
path = btrfs_alloc_path();
- if (!path)
+ if (!path) {
+ ret = -ENOMEM;
goto make_bad;
+ }
memcpy(&location, &BTRFS_I(inode)->location, sizeof(location));
ret = btrfs_lookup_inode(NULL, root, path, &location, 0);
- if (ret)
+ if (ret) {
+ if (ret > 0)
+ ret = -ENOENT;
goto make_bad;
+ }
leaf = path->nodes[0];
@@ -3805,11 +3824,12 @@ cache_acl:
}
btrfs_update_iflags(inode);
- return;
+ return 0;
make_bad:
btrfs_free_path(path);
make_bad_inode(inode);
+ return ret;
}
/*
@@ -4007,20 +4027,20 @@ static int __btrfs_unlink_inode(struct btrfs_trans_handle *trans,
btrfs_info(root->fs_info,
"failed to delete reference to %.*s, inode %llu parent %llu",
name_len, name, ino, dir_ino);
- btrfs_abort_transaction(trans, root, ret);
+ btrfs_abort_transaction(trans, ret);
goto err;
}
skip_backref:
ret = btrfs_delete_delayed_dir_index(trans, root, dir, index);
if (ret) {
- btrfs_abort_transaction(trans, root, ret);
+ btrfs_abort_transaction(trans, ret);
goto err;
}
ret = btrfs_del_inode_ref_in_log(trans, root, name, name_len,
inode, dir_ino);
if (ret != 0 && ret != -ENOENT) {
- btrfs_abort_transaction(trans, root, ret);
+ btrfs_abort_transaction(trans, ret);
goto err;
}
@@ -4029,7 +4049,7 @@ skip_backref:
if (ret == -ENOENT)
ret = 0;
else if (ret)
- btrfs_abort_transaction(trans, root, ret);
+ btrfs_abort_transaction(trans, ret);
err:
btrfs_free_path(path);
if (ret)
@@ -4143,7 +4163,7 @@ int btrfs_unlink_subvol(struct btrfs_trans_handle *trans,
WARN_ON(key.type != BTRFS_ROOT_ITEM_KEY || key.objectid != objectid);
ret = btrfs_delete_one_dir_name(trans, root, path, di);
if (ret) {
- btrfs_abort_transaction(trans, root, ret);
+ btrfs_abort_transaction(trans, ret);
goto out;
}
btrfs_release_path(path);
@@ -4153,7 +4173,7 @@ int btrfs_unlink_subvol(struct btrfs_trans_handle *trans,
dir_ino, &index, name, name_len);
if (ret < 0) {
if (ret != -ENOENT) {
- btrfs_abort_transaction(trans, root, ret);
+ btrfs_abort_transaction(trans, ret);
goto out;
}
di = btrfs_search_dir_index_item(root, path, dir_ino,
@@ -4163,7 +4183,7 @@ int btrfs_unlink_subvol(struct btrfs_trans_handle *trans,
ret = -ENOENT;
else
ret = PTR_ERR(di);
- btrfs_abort_transaction(trans, root, ret);
+ btrfs_abort_transaction(trans, ret);
goto out;
}
@@ -4176,7 +4196,7 @@ int btrfs_unlink_subvol(struct btrfs_trans_handle *trans,
ret = btrfs_delete_delayed_dir_index(trans, root, dir, index);
if (ret) {
- btrfs_abort_transaction(trans, root, ret);
+ btrfs_abort_transaction(trans, ret);
goto out;
}
@@ -4185,7 +4205,7 @@ int btrfs_unlink_subvol(struct btrfs_trans_handle *trans,
dir->i_mtime = dir->i_ctime = current_fs_time(dir->i_sb);
ret = btrfs_update_inode_fallback(trans, root, dir);
if (ret)
- btrfs_abort_transaction(trans, root, ret);
+ btrfs_abort_transaction(trans, ret);
out:
btrfs_free_path(path);
return ret;
@@ -4197,6 +4217,7 @@ static int btrfs_rmdir(struct inode *dir, struct dentry *dentry)
int err = 0;
struct btrfs_root *root = BTRFS_I(dir)->root;
struct btrfs_trans_handle *trans;
+ u64 last_unlink_trans;
if (inode->i_size > BTRFS_EMPTY_DIR_SIZE)
return -ENOTEMPTY;
@@ -4219,11 +4240,27 @@ static int btrfs_rmdir(struct inode *dir, struct dentry *dentry)
if (err)
goto out;
+ last_unlink_trans = BTRFS_I(inode)->last_unlink_trans;
+
/* now the directory is empty */
err = btrfs_unlink_inode(trans, root, dir, d_inode(dentry),
dentry->d_name.name, dentry->d_name.len);
- if (!err)
+ if (!err) {
btrfs_i_size_write(inode, 0);
+ /*
+ * Propagate the last_unlink_trans value of the deleted dir to
+ * its parent directory. This is to prevent an unrecoverable
+ * log tree in the case we do something like this:
+ * 1) create dir foo
+ * 2) create snapshot under dir foo
+ * 3) delete the snapshot
+ * 4) rmdir foo
+ * 5) mkdir foo
+ * 6) fsync foo or some file inside foo
+ */
+ if (last_unlink_trans >= trans->transid)
+ BTRFS_I(dir)->last_unlink_trans = last_unlink_trans;
+ }
out:
btrfs_end_transaction(trans, root);
btrfs_btree_balance_dirty(root);
@@ -4506,7 +4543,6 @@ search_again:
pending_del_nr);
if (err) {
btrfs_abort_transaction(trans,
- root,
err);
goto error;
}
@@ -4518,8 +4554,7 @@ search_again:
item_end,
new_size);
if (err) {
- btrfs_abort_transaction(trans,
- root, err);
+ btrfs_abort_transaction(trans, err);
goto error;
}
} else if (test_bit(BTRFS_ROOT_REF_COWS,
@@ -4583,8 +4618,7 @@ delete:
pending_del_slot,
pending_del_nr);
if (ret) {
- btrfs_abort_transaction(trans,
- root, ret);
+ btrfs_abort_transaction(trans, ret);
goto error;
}
pending_del_nr = 0;
@@ -4617,7 +4651,7 @@ out:
ret = btrfs_del_items(trans, root, path, pending_del_slot,
pending_del_nr);
if (ret)
- btrfs_abort_transaction(trans, root, ret);
+ btrfs_abort_transaction(trans, ret);
}
error:
if (root->root_key.objectid != BTRFS_TREE_LOG_OBJECTID)
@@ -4786,7 +4820,7 @@ static int maybe_insert_hole(struct btrfs_root *root, struct inode *inode,
ret = btrfs_drop_extents(trans, root, inode, offset, offset + len, 1);
if (ret) {
- btrfs_abort_transaction(trans, root, ret);
+ btrfs_abort_transaction(trans, ret);
btrfs_end_transaction(trans, root);
return ret;
}
@@ -4794,7 +4828,7 @@ static int maybe_insert_hole(struct btrfs_root *root, struct inode *inode,
ret = btrfs_insert_file_extent(trans, root, btrfs_ino(inode), offset,
0, 0, len, 0, len, 0, 0, 0);
if (ret)
- btrfs_abort_transaction(trans, root, ret);
+ btrfs_abort_transaction(trans, ret);
else
btrfs_update_inode(trans, root, inode);
btrfs_end_transaction(trans, root);
@@ -5021,7 +5055,7 @@ static int btrfs_setsize(struct inode *inode, struct iattr *attr)
i_size_write(inode, BTRFS_I(inode)->disk_i_size);
err = btrfs_orphan_del(trans, inode);
if (err)
- btrfs_abort_transaction(trans, root, err);
+ btrfs_abort_transaction(trans, err);
btrfs_end_transaction(trans, root);
}
}
@@ -5159,11 +5193,18 @@ void btrfs_evict_inode(struct inode *inode)
struct btrfs_root *root = BTRFS_I(inode)->root;
struct btrfs_block_rsv *rsv, *global_rsv;
int steal_from_global = 0;
- u64 min_size = btrfs_calc_trunc_metadata_size(root, 1);
+ u64 min_size;
int ret;
trace_btrfs_inode_evict(inode);
+ if (!root) {
+ kmem_cache_free(btrfs_inode_cachep, BTRFS_I(inode));
+ return;
+ }
+
+ min_size = btrfs_calc_trunc_metadata_size(root, 1);
+
evict_inode_truncate_pages(inode);
if (inode->i_nlink &&
@@ -5264,7 +5305,7 @@ void btrfs_evict_inode(struct inode *inode)
if (steal_from_global) {
if (!btrfs_check_space_for_delayed_refs(trans, root))
ret = btrfs_block_rsv_migrate(global_rsv, rsv,
- min_size);
+ min_size, 0);
else
ret = -ENOSPC;
}
@@ -5595,7 +5636,9 @@ struct inode *btrfs_iget(struct super_block *s, struct btrfs_key *location,
return ERR_PTR(-ENOMEM);
if (inode->i_state & I_NEW) {
- btrfs_read_locked_inode(inode);
+ int ret;
+
+ ret = btrfs_read_locked_inode(inode);
if (!is_bad_inode(inode)) {
inode_tree_add(inode);
unlock_new_inode(inode);
@@ -5604,7 +5647,8 @@ struct inode *btrfs_iget(struct super_block *s, struct btrfs_key *location,
} else {
unlock_new_inode(inode);
iput(inode);
- inode = ERR_PTR(-ESTALE);
+ ASSERT(ret < 0);
+ inode = ERR_PTR(ret < 0 ? ret : -ESTALE);
}
}
@@ -6240,9 +6284,9 @@ static struct inode *btrfs_new_inode(struct btrfs_trans_handle *trans,
btrfs_inherit_iflags(inode, dir);
if (S_ISREG(mode)) {
- if (btrfs_test_opt(root, NODATASUM))
+ if (btrfs_test_opt(root->fs_info, NODATASUM))
BTRFS_I(inode)->flags |= BTRFS_INODE_NODATASUM;
- if (btrfs_test_opt(root, NODATACOW))
+ if (btrfs_test_opt(root->fs_info, NODATACOW))
BTRFS_I(inode)->flags |= BTRFS_INODE_NODATACOW |
BTRFS_INODE_NODATASUM;
}
@@ -6320,7 +6364,7 @@ int btrfs_add_link(struct btrfs_trans_handle *trans,
if (ret == -EEXIST || ret == -EOVERFLOW)
goto fail_dir_item;
else if (ret) {
- btrfs_abort_transaction(trans, root, ret);
+ btrfs_abort_transaction(trans, ret);
return ret;
}
@@ -6331,7 +6375,7 @@ int btrfs_add_link(struct btrfs_trans_handle *trans,
current_fs_time(parent_inode->i_sb);
ret = btrfs_update_inode(trans, root, parent_inode);
if (ret)
- btrfs_abort_transaction(trans, root, ret);
+ btrfs_abort_transaction(trans, ret);
return ret;
fail_dir_item:
@@ -7214,7 +7258,7 @@ static struct extent_map *btrfs_new_extent_direct(struct inode *inode,
int ret;
alloc_hint = get_extent_allocation_hint(inode, start, len);
- ret = btrfs_reserve_extent(root, len, root->sectorsize, 0,
+ ret = btrfs_reserve_extent(root, len, len, root->sectorsize, 0,
alloc_hint, &ins, 1, 1);
if (ret)
return ERR_PTR(ret);
@@ -7714,6 +7758,13 @@ static int btrfs_get_blocks_direct(struct inode *inode, sector_t iblock,
ret = PTR_ERR(em2);
goto unlock_err;
}
+ /*
+ * For inode marked NODATACOW or extent marked PREALLOC,
+ * use the existing or preallocated extent, so does not
+ * need to adjust btrfs_space_info's bytes_may_use.
+ */
+ btrfs_free_reserved_data_space_noquota(inode,
+ start, len);
goto unlock;
}
}
@@ -7748,7 +7799,6 @@ unlock:
i_size_write(inode, start + len);
adjust_dio_outstanding_extents(inode, dio_data, len);
- btrfs_free_reserved_data_space(inode, start, len);
WARN_ON(dio_data->reserve < len);
dio_data->reserve -= len;
dio_data->unsubmitted_oe_range_end = start + len;
@@ -7790,12 +7840,12 @@ err:
}
static inline int submit_dio_repair_bio(struct inode *inode, struct bio *bio,
- int rw, int mirror_num)
+ int mirror_num)
{
struct btrfs_root *root = BTRFS_I(inode)->root;
int ret;
- BUG_ON(rw & REQ_WRITE);
+ BUG_ON(bio_op(bio) == REQ_OP_WRITE);
bio_get(bio);
@@ -7804,7 +7854,7 @@ static inline int submit_dio_repair_bio(struct inode *inode, struct bio *bio,
if (ret)
goto err;
- ret = btrfs_map_bio(root, rw, bio, mirror_num, 0);
+ ret = btrfs_map_bio(root, bio, mirror_num, 0);
err:
bio_put(bio);
return ret;
@@ -7855,7 +7905,7 @@ static int dio_read_error(struct inode *inode, struct bio *failed_bio,
int read_mode;
int ret;
- BUG_ON(failed_bio->bi_rw & REQ_WRITE);
+ BUG_ON(bio_op(failed_bio) == REQ_OP_WRITE);
ret = btrfs_get_io_failure_record(inode, start, end, &failrec);
if (ret)
@@ -7883,13 +7933,13 @@ static int dio_read_error(struct inode *inode, struct bio *failed_bio,
free_io_failure(inode, failrec);
return -EIO;
}
+ bio_set_op_attrs(bio, REQ_OP_READ, read_mode);
btrfs_debug(BTRFS_I(inode)->root->fs_info,
"Repair DIO Read Error: submitting new dio read[%#x] to this_mirror=%d, in_validation=%d\n",
read_mode, failrec->this_mirror, failrec->in_validation);
- ret = submit_dio_repair_bio(inode, bio, read_mode,
- failrec->this_mirror);
+ ret = submit_dio_repair_bio(inode, bio, failrec->this_mirror);
if (ret) {
free_io_failure(inode, failrec);
bio_put(bio);
@@ -8179,7 +8229,7 @@ static void btrfs_endio_direct_write(struct bio *bio)
bio_put(bio);
}
-static int __btrfs_submit_bio_start_direct_io(struct inode *inode, int rw,
+static int __btrfs_submit_bio_start_direct_io(struct inode *inode,
struct bio *bio, int mirror_num,
unsigned long bio_flags, u64 offset)
{
@@ -8197,8 +8247,8 @@ static void btrfs_end_dio_bio(struct bio *bio)
if (err)
btrfs_warn(BTRFS_I(dip->inode)->root->fs_info,
- "direct IO failed ino %llu rw %lu sector %#Lx len %u err no %d",
- btrfs_ino(dip->inode), bio->bi_rw,
+ "direct IO failed ino %llu rw %d,%u sector %#Lx len %u err no %d",
+ btrfs_ino(dip->inode), bio_op(bio), bio->bi_opf,
(unsigned long long)bio->bi_iter.bi_sector,
bio->bi_iter.bi_size, err);
@@ -8272,11 +8322,11 @@ static inline int btrfs_lookup_and_bind_dio_csum(struct btrfs_root *root,
}
static inline int __btrfs_submit_dio_bio(struct bio *bio, struct inode *inode,
- int rw, u64 file_offset, int skip_sum,
+ u64 file_offset, int skip_sum,
int async_submit)
{
struct btrfs_dio_private *dip = bio->bi_private;
- int write = rw & REQ_WRITE;
+ bool write = bio_op(bio) == REQ_OP_WRITE;
struct btrfs_root *root = BTRFS_I(inode)->root;
int ret;
@@ -8297,8 +8347,7 @@ static inline int __btrfs_submit_dio_bio(struct bio *bio, struct inode *inode,
if (write && async_submit) {
ret = btrfs_wq_submit_bio(root->fs_info,
- inode, rw, bio, 0, 0,
- file_offset,
+ inode, bio, 0, 0, file_offset,
__btrfs_submit_bio_start_direct_io,
__btrfs_submit_bio_done);
goto err;
@@ -8317,13 +8366,13 @@ static inline int __btrfs_submit_dio_bio(struct bio *bio, struct inode *inode,
goto err;
}
map:
- ret = btrfs_map_bio(root, rw, bio, 0, async_submit);
+ ret = btrfs_map_bio(root, bio, 0, async_submit);
err:
bio_put(bio);
return ret;
}
-static int btrfs_submit_direct_hook(int rw, struct btrfs_dio_private *dip,
+static int btrfs_submit_direct_hook(struct btrfs_dio_private *dip,
int skip_sum)
{
struct inode *inode = dip->inode;
@@ -8342,8 +8391,8 @@ static int btrfs_submit_direct_hook(int rw, struct btrfs_dio_private *dip,
int i;
map_length = orig_bio->bi_iter.bi_size;
- ret = btrfs_map_block(root->fs_info, rw, start_sector << 9,
- &map_length, NULL, 0);
+ ret = btrfs_map_block(root->fs_info, bio_op(orig_bio),
+ start_sector << 9, &map_length, NULL, 0);
if (ret)
return -EIO;
@@ -8363,6 +8412,7 @@ static int btrfs_submit_direct_hook(int rw, struct btrfs_dio_private *dip,
if (!bio)
return -ENOMEM;
+ bio_set_op_attrs(bio, bio_op(orig_bio), orig_bio->bi_opf);
bio->bi_private = dip;
bio->bi_end_io = btrfs_end_dio_bio;
btrfs_io_bio(bio)->logical = file_offset;
@@ -8382,7 +8432,7 @@ next_block:
* before we're done setting it up
*/
atomic_inc(&dip->pending_bios);
- ret = __btrfs_submit_dio_bio(bio, inode, rw,
+ ret = __btrfs_submit_dio_bio(bio, inode,
file_offset, skip_sum,
async_submit);
if (ret) {
@@ -8400,12 +8450,13 @@ next_block:
start_sector, GFP_NOFS);
if (!bio)
goto out_err;
+ bio_set_op_attrs(bio, bio_op(orig_bio), orig_bio->bi_opf);
bio->bi_private = dip;
bio->bi_end_io = btrfs_end_dio_bio;
btrfs_io_bio(bio)->logical = file_offset;
map_length = orig_bio->bi_iter.bi_size;
- ret = btrfs_map_block(root->fs_info, rw,
+ ret = btrfs_map_block(root->fs_info, bio_op(orig_bio),
start_sector << 9,
&map_length, NULL, 0);
if (ret) {
@@ -8425,7 +8476,7 @@ next_block:
}
submit:
- ret = __btrfs_submit_dio_bio(bio, inode, rw, file_offset, skip_sum,
+ ret = __btrfs_submit_dio_bio(bio, inode, file_offset, skip_sum,
async_submit);
if (!ret)
return 0;
@@ -8445,14 +8496,14 @@ out_err:
return 0;
}
-static void btrfs_submit_direct(int rw, struct bio *dio_bio,
- struct inode *inode, loff_t file_offset)
+static void btrfs_submit_direct(struct bio *dio_bio, struct inode *inode,
+ loff_t file_offset)
{
struct btrfs_dio_private *dip = NULL;
struct bio *io_bio = NULL;
struct btrfs_io_bio *btrfs_bio;
int skip_sum;
- int write = rw & REQ_WRITE;
+ bool write = (bio_op(dio_bio) == REQ_OP_WRITE);
int ret = 0;
skip_sum = BTRFS_I(inode)->flags & BTRFS_INODE_NODATASUM;
@@ -8503,7 +8554,7 @@ static void btrfs_submit_direct(int rw, struct bio *dio_bio,
dio_data->unsubmitted_oe_range_end;
}
- ret = btrfs_submit_direct_hook(rw, dip, skip_sum);
+ ret = btrfs_submit_direct_hook(dip, skip_sum);
if (!ret)
return;
@@ -9116,7 +9167,7 @@ static int btrfs_truncate(struct inode *inode)
/* Migrate the slack space for the truncate to our reserve */
ret = btrfs_block_rsv_migrate(&root->fs_info->trans_block_rsv, rsv,
- min_size);
+ min_size, 0);
BUG_ON(ret);
/*
@@ -9156,7 +9207,7 @@ static int btrfs_truncate(struct inode *inode)
}
ret = btrfs_block_rsv_migrate(&root->fs_info->trans_block_rsv,
- rsv, min_size);
+ rsv, min_size, 0);
BUG_ON(ret); /* shouldn't happen */
trans->block_rsv = rsv;
}
@@ -9177,7 +9228,6 @@ static int btrfs_truncate(struct inode *inode)
ret = btrfs_end_transaction(trans, root);
btrfs_btree_balance_dirty(root);
}
-
out:
btrfs_free_block_rsv(root, rsv);
@@ -9386,25 +9436,25 @@ int btrfs_init_cachep(void)
btrfs_trans_handle_cachep = kmem_cache_create("btrfs_trans_handle",
sizeof(struct btrfs_trans_handle), 0,
- SLAB_RECLAIM_ACCOUNT | SLAB_MEM_SPREAD, NULL);
+ SLAB_TEMPORARY | SLAB_MEM_SPREAD, NULL);
if (!btrfs_trans_handle_cachep)
goto fail;
btrfs_transaction_cachep = kmem_cache_create("btrfs_transaction",
sizeof(struct btrfs_transaction), 0,
- SLAB_RECLAIM_ACCOUNT | SLAB_MEM_SPREAD, NULL);
+ SLAB_TEMPORARY | SLAB_MEM_SPREAD, NULL);
if (!btrfs_transaction_cachep)
goto fail;
btrfs_path_cachep = kmem_cache_create("btrfs_path",
sizeof(struct btrfs_path), 0,
- SLAB_RECLAIM_ACCOUNT | SLAB_MEM_SPREAD, NULL);
+ SLAB_MEM_SPREAD, NULL);
if (!btrfs_path_cachep)
goto fail;
btrfs_free_space_cachep = kmem_cache_create("btrfs_free_space",
sizeof(struct btrfs_free_space), 0,
- SLAB_RECLAIM_ACCOUNT | SLAB_MEM_SPREAD, NULL);
+ SLAB_MEM_SPREAD, NULL);
if (!btrfs_free_space_cachep)
goto fail;
@@ -9554,7 +9604,7 @@ static int btrfs_rename_exchange(struct inode *old_dir,
ret = btrfs_update_inode(trans, root, old_inode);
}
if (ret) {
- btrfs_abort_transaction(trans, root, ret);
+ btrfs_abort_transaction(trans, ret);
goto out_fail;
}
@@ -9574,7 +9624,7 @@ static int btrfs_rename_exchange(struct inode *old_dir,
ret = btrfs_update_inode(trans, dest, new_inode);
}
if (ret) {
- btrfs_abort_transaction(trans, root, ret);
+ btrfs_abort_transaction(trans, ret);
goto out_fail;
}
@@ -9582,7 +9632,7 @@ static int btrfs_rename_exchange(struct inode *old_dir,
new_dentry->d_name.name,
new_dentry->d_name.len, 0, old_idx);
if (ret) {
- btrfs_abort_transaction(trans, root, ret);
+ btrfs_abort_transaction(trans, ret);
goto out_fail;
}
@@ -9590,7 +9640,7 @@ static int btrfs_rename_exchange(struct inode *old_dir,
old_dentry->d_name.name,
old_dentry->d_name.len, 0, new_idx);
if (ret) {
- btrfs_abort_transaction(trans, root, ret);
+ btrfs_abort_transaction(trans, ret);
goto out_fail;
}
@@ -9829,7 +9879,7 @@ static int btrfs_rename(struct inode *old_dir, struct dentry *old_dentry,
ret = btrfs_update_inode(trans, root, old_inode);
}
if (ret) {
- btrfs_abort_transaction(trans, root, ret);
+ btrfs_abort_transaction(trans, ret);
goto out_fail;
}
@@ -9853,7 +9903,7 @@ static int btrfs_rename(struct inode *old_dir, struct dentry *old_dentry,
if (!ret && new_inode->i_nlink == 0)
ret = btrfs_orphan_add(trans, d_inode(new_dentry));
if (ret) {
- btrfs_abort_transaction(trans, root, ret);
+ btrfs_abort_transaction(trans, ret);
goto out_fail;
}
}
@@ -9862,7 +9912,7 @@ static int btrfs_rename(struct inode *old_dir, struct dentry *old_dentry,
new_dentry->d_name.name,
new_dentry->d_name.len, 0, index);
if (ret) {
- btrfs_abort_transaction(trans, root, ret);
+ btrfs_abort_transaction(trans, ret);
goto out_fail;
}
@@ -9882,7 +9932,7 @@ static int btrfs_rename(struct inode *old_dir, struct dentry *old_dentry,
old_dentry);
if (ret) {
- btrfs_abort_transaction(trans, root, ret);
+ btrfs_abort_transaction(trans, ret);
goto out_fail;
}
}
@@ -10269,6 +10319,7 @@ static int __btrfs_prealloc_file_range(struct inode *inode, int mode,
u64 last_alloc = (u64)-1;
int ret = 0;
bool own_trans = true;
+ u64 end = start + num_bytes - 1;
if (trans)
own_trans = false;
@@ -10290,8 +10341,8 @@ static int __btrfs_prealloc_file_range(struct inode *inode, int mode,
* sized chunks.
*/
cur_bytes = min(cur_bytes, last_alloc);
- ret = btrfs_reserve_extent(root, cur_bytes, min_size, 0,
- *alloc_hint, &ins, 1, 0);
+ ret = btrfs_reserve_extent(root, cur_bytes, cur_bytes,
+ min_size, 0, *alloc_hint, &ins, 1, 0);
if (ret) {
if (own_trans)
btrfs_end_transaction(trans, root);
@@ -10308,7 +10359,7 @@ static int __btrfs_prealloc_file_range(struct inode *inode, int mode,
if (ret) {
btrfs_free_reserved_extent(root, ins.objectid,
ins.offset, 0);
- btrfs_abort_transaction(trans, root, ret);
+ btrfs_abort_transaction(trans, ret);
if (own_trans)
btrfs_end_transaction(trans, root);
break;
@@ -10368,7 +10419,7 @@ next:
ret = btrfs_update_inode(trans, root, inode);
if (ret) {
- btrfs_abort_transaction(trans, root, ret);
+ btrfs_abort_transaction(trans, ret);
if (own_trans)
btrfs_end_transaction(trans, root);
break;
@@ -10377,6 +10428,9 @@ next:
if (own_trans)
btrfs_end_transaction(trans, root);
}
+ if (cur_offset < end)
+ btrfs_free_reserved_data_space(inode, cur_offset,
+ end - cur_offset + 1);
return ret;
}
diff --git a/fs/btrfs/ioctl.c b/fs/btrfs/ioctl.c
index 4ffcf0c27..7fd939bfb 100644
--- a/fs/btrfs/ioctl.c
+++ b/fs/btrfs/ioctl.c
@@ -561,7 +561,7 @@ static noinline int create_subvol(struct inode *dir,
new_root = btrfs_read_fs_root_no_name(root->fs_info, &key);
if (IS_ERR(new_root)) {
ret = PTR_ERR(new_root);
- btrfs_abort_transaction(trans, root, ret);
+ btrfs_abort_transaction(trans, ret);
goto fail;
}
@@ -570,7 +570,7 @@ static noinline int create_subvol(struct inode *dir,
ret = btrfs_create_subvol_root(trans, new_root, root, new_dirid);
if (ret) {
/* We potentially lose an unused inode item here */
- btrfs_abort_transaction(trans, root, ret);
+ btrfs_abort_transaction(trans, ret);
goto fail;
}
@@ -583,7 +583,7 @@ static noinline int create_subvol(struct inode *dir,
*/
ret = btrfs_set_inode_index(dir, &index);
if (ret) {
- btrfs_abort_transaction(trans, root, ret);
+ btrfs_abort_transaction(trans, ret);
goto fail;
}
@@ -591,7 +591,7 @@ static noinline int create_subvol(struct inode *dir,
name, namelen, dir, &key,
BTRFS_FT_DIR, index);
if (ret) {
- btrfs_abort_transaction(trans, root, ret);
+ btrfs_abort_transaction(trans, ret);
goto fail;
}
@@ -608,7 +608,7 @@ static noinline int create_subvol(struct inode *dir,
root_item->uuid, BTRFS_UUID_KEY_SUBVOL,
objectid);
if (ret)
- btrfs_abort_transaction(trans, root, ret);
+ btrfs_abort_transaction(trans, ret);
fail:
kfree(root_item);
@@ -1957,8 +1957,7 @@ static noinline int key_in_sk(struct btrfs_key *key,
return 1;
}
-static noinline int copy_to_sk(struct btrfs_root *root,
- struct btrfs_path *path,
+static noinline int copy_to_sk(struct btrfs_path *path,
struct btrfs_key *key,
struct btrfs_ioctl_search_key *sk,
size_t *buf_size,
@@ -2129,7 +2128,7 @@ static noinline int search_ioctl(struct inode *inode,
ret = 0;
goto err;
}
- ret = copy_to_sk(root, path, &key, sk, buf_size, ubuf,
+ ret = copy_to_sk(path, &key, sk, buf_size, ubuf,
&sk_offset, &num_found);
btrfs_release_path(path);
if (ret)
@@ -2418,7 +2417,7 @@ static noinline int btrfs_ioctl_snap_destroy(struct file *file,
* rmdir(2).
*/
err = -EPERM;
- if (!btrfs_test_opt(root, USER_SUBVOL_RM_ALLOWED))
+ if (!btrfs_test_opt(root->fs_info, USER_SUBVOL_RM_ALLOWED))
goto out_dput;
/*
@@ -2501,7 +2500,7 @@ static noinline int btrfs_ioctl_snap_destroy(struct file *file,
dentry->d_name.len);
if (ret) {
err = ret;
- btrfs_abort_transaction(trans, root, ret);
+ btrfs_abort_transaction(trans, ret);
goto out_end_trans;
}
@@ -2517,7 +2516,7 @@ static noinline int btrfs_ioctl_snap_destroy(struct file *file,
root->fs_info->tree_root,
dest->root_key.objectid);
if (ret) {
- btrfs_abort_transaction(trans, root, ret);
+ btrfs_abort_transaction(trans, ret);
err = ret;
goto out_end_trans;
}
@@ -2527,7 +2526,7 @@ static noinline int btrfs_ioctl_snap_destroy(struct file *file,
dest->root_item.uuid, BTRFS_UUID_KEY_SUBVOL,
dest->root_key.objectid);
if (ret && ret != -ENOENT) {
- btrfs_abort_transaction(trans, root, ret);
+ btrfs_abort_transaction(trans, ret);
err = ret;
goto out_end_trans;
}
@@ -2537,7 +2536,7 @@ static noinline int btrfs_ioctl_snap_destroy(struct file *file,
BTRFS_UUID_KEY_RECEIVED_SUBVOL,
dest->root_key.objectid);
if (ret && ret != -ENOENT) {
- btrfs_abort_transaction(trans, root, ret);
+ btrfs_abort_transaction(trans, ret);
err = ret;
goto out_end_trans;
}
@@ -3304,7 +3303,7 @@ static int clone_finish_inode_update(struct btrfs_trans_handle *trans,
ret = btrfs_update_inode(trans, root, inode);
if (ret) {
- btrfs_abort_transaction(trans, root, ret);
+ btrfs_abort_transaction(trans, ret);
btrfs_end_transaction(trans, root);
goto out;
}
@@ -3706,7 +3705,7 @@ process_slot:
if (ret) {
if (ret != -EOPNOTSUPP)
btrfs_abort_transaction(trans,
- root, ret);
+ ret);
btrfs_end_transaction(trans, root);
goto out;
}
@@ -3714,8 +3713,7 @@ process_slot:
ret = btrfs_insert_empty_item(trans, root, path,
&new_key, size);
if (ret) {
- btrfs_abort_transaction(trans, root,
- ret);
+ btrfs_abort_transaction(trans, ret);
btrfs_end_transaction(trans, root);
goto out;
}
@@ -3747,7 +3745,6 @@ process_slot:
new_key.offset - datao);
if (ret) {
btrfs_abort_transaction(trans,
- root,
ret);
btrfs_end_transaction(trans,
root);
@@ -3784,7 +3781,6 @@ process_slot:
if (ret) {
if (ret != -EOPNOTSUPP)
btrfs_abort_transaction(trans,
- root,
ret);
btrfs_end_transaction(trans, root);
goto out;
@@ -3840,7 +3836,7 @@ process_slot:
last_dest_end, destoff + len, 1);
if (ret) {
if (ret != -EOPNOTSUPP)
- btrfs_abort_transaction(trans, root, ret);
+ btrfs_abort_transaction(trans, ret);
btrfs_end_transaction(trans, root);
goto out;
}
@@ -5176,13 +5172,13 @@ static long _btrfs_ioctl_set_received_subvol(struct file *file,
BTRFS_UUID_KEY_RECEIVED_SUBVOL,
root->root_key.objectid);
if (ret < 0 && ret != -EEXIST) {
- btrfs_abort_transaction(trans, root, ret);
+ btrfs_abort_transaction(trans, ret);
goto out;
}
}
ret = btrfs_commit_transaction(trans, root);
if (ret < 0) {
- btrfs_abort_transaction(trans, root, ret);
+ btrfs_abort_transaction(trans, ret);
goto out;
}
diff --git a/fs/btrfs/ordered-data.c b/fs/btrfs/ordered-data.c
index aca8264f4..3b78d3817 100644
--- a/fs/btrfs/ordered-data.c
+++ b/fs/btrfs/ordered-data.c
@@ -1122,7 +1122,7 @@ int __init ordered_data_init(void)
{
btrfs_ordered_extent_cache = kmem_cache_create("btrfs_ordered_extent",
sizeof(struct btrfs_ordered_extent), 0,
- SLAB_RECLAIM_ACCOUNT | SLAB_MEM_SPREAD,
+ SLAB_MEM_SPREAD,
NULL);
if (!btrfs_ordered_extent_cache)
return -ENOMEM;
diff --git a/fs/btrfs/props.c b/fs/btrfs/props.c
index 36992128c..cf0b444ac 100644
--- a/fs/btrfs/props.c
+++ b/fs/btrfs/props.c
@@ -350,6 +350,7 @@ int btrfs_subvol_inherit_props(struct btrfs_trans_handle *trans,
struct btrfs_root *root,
struct btrfs_root *parent_root)
{
+ struct super_block *sb = root->fs_info->sb;
struct btrfs_key key;
struct inode *parent_inode, *child_inode;
int ret;
@@ -358,12 +359,11 @@ int btrfs_subvol_inherit_props(struct btrfs_trans_handle *trans,
key.type = BTRFS_INODE_ITEM_KEY;
key.offset = 0;
- parent_inode = btrfs_iget(parent_root->fs_info->sb, &key,
- parent_root, NULL);
+ parent_inode = btrfs_iget(sb, &key, parent_root, NULL);
if (IS_ERR(parent_inode))
return PTR_ERR(parent_inode);
- child_inode = btrfs_iget(root->fs_info->sb, &key, root, NULL);
+ child_inode = btrfs_iget(sb, &key, root, NULL);
if (IS_ERR(child_inode)) {
iput(parent_inode);
return PTR_ERR(child_inode);
diff --git a/fs/btrfs/qgroup.c b/fs/btrfs/qgroup.c
index 4904ebee4..8db2e29fd 100644
--- a/fs/btrfs/qgroup.c
+++ b/fs/btrfs/qgroup.c
@@ -571,7 +571,7 @@ static int add_qgroup_item(struct btrfs_trans_handle *trans,
struct extent_buffer *leaf;
struct btrfs_key key;
- if (btrfs_test_is_dummy_root(quota_root))
+ if (btrfs_is_testing(quota_root->fs_info))
return 0;
path = btrfs_alloc_path();
@@ -728,7 +728,7 @@ static int update_qgroup_info_item(struct btrfs_trans_handle *trans,
int ret;
int slot;
- if (btrfs_test_is_dummy_root(root))
+ if (btrfs_is_testing(root->fs_info))
return 0;
key.objectid = 0;
@@ -1453,9 +1453,9 @@ int btrfs_qgroup_prepare_account_extents(struct btrfs_trans_handle *trans,
return ret;
}
-struct btrfs_qgroup_extent_record
-*btrfs_qgroup_insert_dirty_extent(struct btrfs_delayed_ref_root *delayed_refs,
- struct btrfs_qgroup_extent_record *record)
+int btrfs_qgroup_insert_dirty_extent_nolock(struct btrfs_fs_info *fs_info,
+ struct btrfs_delayed_ref_root *delayed_refs,
+ struct btrfs_qgroup_extent_record *record)
{
struct rb_node **p = &delayed_refs->dirty_extent_root.rb_node;
struct rb_node *parent_node = NULL;
@@ -1463,7 +1463,7 @@ struct btrfs_qgroup_extent_record
u64 bytenr = record->bytenr;
assert_spin_locked(&delayed_refs->lock);
- trace_btrfs_qgroup_insert_dirty_extent(record);
+ trace_btrfs_qgroup_insert_dirty_extent(fs_info, record);
while (*p) {
parent_node = *p;
@@ -1474,12 +1474,42 @@ struct btrfs_qgroup_extent_record
else if (bytenr > entry->bytenr)
p = &(*p)->rb_right;
else
- return entry;
+ return 1;
}
rb_link_node(&record->node, parent_node, p);
rb_insert_color(&record->node, &delayed_refs->dirty_extent_root);
- return NULL;
+ return 0;
+}
+
+int btrfs_qgroup_insert_dirty_extent(struct btrfs_trans_handle *trans,
+ struct btrfs_fs_info *fs_info, u64 bytenr, u64 num_bytes,
+ gfp_t gfp_flag)
+{
+ struct btrfs_qgroup_extent_record *record;
+ struct btrfs_delayed_ref_root *delayed_refs;
+ int ret;
+
+ if (!fs_info->quota_enabled || bytenr == 0 || num_bytes == 0)
+ return 0;
+ if (WARN_ON(trans == NULL))
+ return -EINVAL;
+ record = kmalloc(sizeof(*record), gfp_flag);
+ if (!record)
+ return -ENOMEM;
+
+ delayed_refs = &trans->transaction->delayed_refs;
+ record->bytenr = bytenr;
+ record->num_bytes = num_bytes;
+ record->old_roots = NULL;
+
+ spin_lock(&delayed_refs->lock);
+ ret = btrfs_qgroup_insert_dirty_extent_nolock(fs_info, delayed_refs,
+ record);
+ spin_unlock(&delayed_refs->lock);
+ if (ret > 0)
+ kfree(record);
+ return 0;
}
#define UPDATE_NEW 0
@@ -1595,8 +1625,8 @@ static int qgroup_update_counters(struct btrfs_fs_info *fs_info,
cur_old_count = btrfs_qgroup_get_old_refcnt(qg, seq);
cur_new_count = btrfs_qgroup_get_new_refcnt(qg, seq);
- trace_qgroup_update_counters(qg->qgroupid, cur_old_count,
- cur_new_count);
+ trace_qgroup_update_counters(fs_info, qg->qgroupid,
+ cur_old_count, cur_new_count);
/* Rfer update part */
if (cur_old_count == 0 && cur_new_count > 0) {
@@ -1687,8 +1717,8 @@ btrfs_qgroup_account_extent(struct btrfs_trans_handle *trans,
goto out_free;
BUG_ON(!fs_info->quota_root);
- trace_btrfs_qgroup_account_extent(bytenr, num_bytes, nr_old_roots,
- nr_new_roots);
+ trace_btrfs_qgroup_account_extent(fs_info, bytenr, num_bytes,
+ nr_old_roots, nr_new_roots);
qgroups = ulist_alloc(GFP_NOFS);
if (!qgroups) {
@@ -1759,7 +1789,7 @@ int btrfs_qgroup_account_extents(struct btrfs_trans_handle *trans,
record = rb_entry(node, struct btrfs_qgroup_extent_record,
node);
- trace_btrfs_qgroup_account_extents(record);
+ trace_btrfs_qgroup_account_extents(fs_info, record);
if (!ret) {
/*
@@ -2195,7 +2225,7 @@ void assert_qgroups_uptodate(struct btrfs_trans_handle *trans)
{
if (list_empty(&trans->qgroup_ref_list) && !trans->delayed_ref_elem.seq)
return;
- btrfs_err(trans->root->fs_info,
+ btrfs_err(trans->fs_info,
"qgroups not uptodate in trans handle %p: list is%s empty, "
"seq is %#x.%x",
trans, list_empty(&trans->qgroup_ref_list) ? "" : " not",
diff --git a/fs/btrfs/qgroup.h b/fs/btrfs/qgroup.h
index 3d73e4c9c..1bc64c864 100644
--- a/fs/btrfs/qgroup.h
+++ b/fs/btrfs/qgroup.h
@@ -64,9 +64,35 @@ void btrfs_free_qgroup_config(struct btrfs_fs_info *fs_info);
struct btrfs_delayed_extent_op;
int btrfs_qgroup_prepare_account_extents(struct btrfs_trans_handle *trans,
struct btrfs_fs_info *fs_info);
-struct btrfs_qgroup_extent_record
-*btrfs_qgroup_insert_dirty_extent(struct btrfs_delayed_ref_root *delayed_refs,
- struct btrfs_qgroup_extent_record *record);
+/*
+ * Insert one dirty extent record into @delayed_refs, informing qgroup to
+ * account that extent at commit trans time.
+ *
+ * No lock version, caller must acquire delayed ref lock and allocate memory.
+ *
+ * Return 0 for success insert
+ * Return >0 for existing record, caller can free @record safely.
+ * Error is not possible
+ */
+int btrfs_qgroup_insert_dirty_extent_nolock(
+ struct btrfs_fs_info *fs_info,
+ struct btrfs_delayed_ref_root *delayed_refs,
+ struct btrfs_qgroup_extent_record *record);
+
+/*
+ * Insert one dirty extent record into @delayed_refs, informing qgroup to
+ * account that extent at commit trans time.
+ *
+ * Better encapsulated version.
+ *
+ * Return 0 if the operation is done.
+ * Return <0 for error, like memory allocation failure or invalid parameter
+ * (NULL trans)
+ */
+int btrfs_qgroup_insert_dirty_extent(struct btrfs_trans_handle *trans,
+ struct btrfs_fs_info *fs_info, u64 bytenr, u64 num_bytes,
+ gfp_t gfp_flag);
+
int
btrfs_qgroup_account_extent(struct btrfs_trans_handle *trans,
struct btrfs_fs_info *fs_info,
@@ -89,7 +115,7 @@ static inline void btrfs_qgroup_free_delayed_ref(struct btrfs_fs_info *fs_info,
u64 ref_root, u64 num_bytes)
{
btrfs_qgroup_free_refroot(fs_info, ref_root, num_bytes);
- trace_btrfs_qgroup_free_delayed_ref(ref_root, num_bytes);
+ trace_btrfs_qgroup_free_delayed_ref(fs_info, ref_root, num_bytes);
}
void assert_qgroups_uptodate(struct btrfs_trans_handle *trans);
diff --git a/fs/btrfs/raid56.c b/fs/btrfs/raid56.c
index f8b6d411a..cd8d302a1 100644
--- a/fs/btrfs/raid56.c
+++ b/fs/btrfs/raid56.c
@@ -1320,7 +1320,9 @@ write_data:
bio->bi_private = rbio;
bio->bi_end_io = raid_write_end_io;
- submit_bio(WRITE, bio);
+ bio_set_op_attrs(bio, REQ_OP_WRITE, 0);
+
+ submit_bio(bio);
}
return;
@@ -1573,11 +1575,12 @@ static int raid56_rmw_stripe(struct btrfs_raid_bio *rbio)
bio->bi_private = rbio;
bio->bi_end_io = raid_rmw_end_io;
+ bio_set_op_attrs(bio, REQ_OP_READ, 0);
btrfs_bio_wq_end_io(rbio->fs_info, bio,
BTRFS_WQ_ENDIO_RAID56);
- submit_bio(READ, bio);
+ submit_bio(bio);
}
/* the actual write will happen once the reads are done */
return 0;
@@ -2097,11 +2100,12 @@ static int __raid56_parity_recover(struct btrfs_raid_bio *rbio)
bio->bi_private = rbio;
bio->bi_end_io = raid_recover_end_io;
+ bio_set_op_attrs(bio, REQ_OP_READ, 0);
btrfs_bio_wq_end_io(rbio->fs_info, bio,
BTRFS_WQ_ENDIO_RAID56);
- submit_bio(READ, bio);
+ submit_bio(bio);
}
out:
return 0;
@@ -2433,7 +2437,9 @@ submit_write:
bio->bi_private = rbio;
bio->bi_end_io = raid_write_end_io;
- submit_bio(WRITE, bio);
+ bio_set_op_attrs(bio, REQ_OP_WRITE, 0);
+
+ submit_bio(bio);
}
return;
@@ -2610,11 +2616,12 @@ static void raid56_parity_scrub_stripe(struct btrfs_raid_bio *rbio)
bio->bi_private = rbio;
bio->bi_end_io = raid56_parity_scrub_end_io;
+ bio_set_op_attrs(bio, REQ_OP_READ, 0);
btrfs_bio_wq_end_io(rbio->fs_info, bio,
BTRFS_WQ_ENDIO_RAID56);
- submit_bio(READ, bio);
+ submit_bio(bio);
}
/* the actual write will happen once the reads are done */
return;
diff --git a/fs/btrfs/relocation.c b/fs/btrfs/relocation.c
index 0477dca15..c0c13dc6f 100644
--- a/fs/btrfs/relocation.c
+++ b/fs/btrfs/relocation.c
@@ -31,6 +31,7 @@
#include "async-thread.h"
#include "free-space-cache.h"
#include "inode-map.h"
+#include "qgroup.h"
/*
* backref_node, mapping_node and tree_block start with this
@@ -235,12 +236,12 @@ static void backref_cache_cleanup(struct backref_cache *cache)
cache->last_trans = 0;
for (i = 0; i < BTRFS_MAX_LEVEL; i++)
- BUG_ON(!list_empty(&cache->pending[i]));
- BUG_ON(!list_empty(&cache->changed));
- BUG_ON(!list_empty(&cache->detached));
- BUG_ON(!RB_EMPTY_ROOT(&cache->rb_root));
- BUG_ON(cache->nr_nodes);
- BUG_ON(cache->nr_edges);
+ ASSERT(list_empty(&cache->pending[i]));
+ ASSERT(list_empty(&cache->changed));
+ ASSERT(list_empty(&cache->detached));
+ ASSERT(RB_EMPTY_ROOT(&cache->rb_root));
+ ASSERT(!cache->nr_nodes);
+ ASSERT(!cache->nr_edges);
}
static struct backref_node *alloc_backref_node(struct backref_cache *cache)
@@ -1171,8 +1172,12 @@ out:
lower = list_entry(useless.next,
struct backref_node, list);
list_del_init(&lower->list);
+ if (lower == node)
+ node = NULL;
free_backref_node(cache, lower);
}
+
+ free_backref_node(cache, node);
return ERR_PTR(err);
}
ASSERT(!node || !node->detached);
@@ -1719,7 +1724,7 @@ int replace_file_extents(struct btrfs_trans_handle *trans,
btrfs_header_owner(leaf),
key.objectid, key.offset);
if (ret) {
- btrfs_abort_transaction(trans, root, ret);
+ btrfs_abort_transaction(trans, ret);
break;
}
@@ -1727,7 +1732,7 @@ int replace_file_extents(struct btrfs_trans_handle *trans,
parent, btrfs_header_owner(leaf),
key.objectid, key.offset);
if (ret) {
- btrfs_abort_transaction(trans, root, ret);
+ btrfs_abort_transaction(trans, ret);
break;
}
}
@@ -2604,25 +2609,28 @@ static int reserve_metadata_space(struct btrfs_trans_handle *trans,
trans->block_rsv = rc->block_rsv;
rc->reserved_bytes += num_bytes;
+
+ /*
+ * We are under a transaction here so we can only do limited flushing.
+ * If we get an enospc just kick back -EAGAIN so we know to drop the
+ * transaction and try to refill when we can flush all the things.
+ */
ret = btrfs_block_rsv_refill(root, rc->block_rsv, num_bytes,
- BTRFS_RESERVE_FLUSH_ALL);
+ BTRFS_RESERVE_FLUSH_LIMIT);
if (ret) {
- if (ret == -EAGAIN) {
- tmp = rc->extent_root->nodesize *
- RELOCATION_RESERVED_NODES;
- while (tmp <= rc->reserved_bytes)
- tmp <<= 1;
- /*
- * only one thread can access block_rsv at this point,
- * so we don't need hold lock to protect block_rsv.
- * we expand more reservation size here to allow enough
- * space for relocation and we will return earlier in
- * enospc case.
- */
- rc->block_rsv->size = tmp + rc->extent_root->nodesize *
- RELOCATION_RESERVED_NODES;
- }
- return ret;
+ tmp = rc->extent_root->nodesize * RELOCATION_RESERVED_NODES;
+ while (tmp <= rc->reserved_bytes)
+ tmp <<= 1;
+ /*
+ * only one thread can access block_rsv at this point,
+ * so we don't need hold lock to protect block_rsv.
+ * we expand more reservation size here to allow enough
+ * space for relocation and we will return eailer in
+ * enospc case.
+ */
+ rc->block_rsv->size = tmp + rc->extent_root->nodesize *
+ RELOCATION_RESERVED_NODES;
+ return -EAGAIN;
}
return 0;
@@ -3030,15 +3038,19 @@ int prealloc_file_extent_cluster(struct inode *inode,
u64 num_bytes;
int nr = 0;
int ret = 0;
+ u64 prealloc_start = cluster->start - offset;
+ u64 prealloc_end = cluster->end - offset;
+ u64 cur_offset;
BUG_ON(cluster->start != cluster->boundary[0]);
inode_lock(inode);
- ret = btrfs_check_data_free_space(inode, cluster->start,
- cluster->end + 1 - cluster->start);
+ ret = btrfs_check_data_free_space(inode, prealloc_start,
+ prealloc_end + 1 - prealloc_start);
if (ret)
goto out;
+ cur_offset = prealloc_start;
while (nr < cluster->nr) {
start = cluster->boundary[nr] - offset;
if (nr + 1 < cluster->nr)
@@ -3048,16 +3060,21 @@ int prealloc_file_extent_cluster(struct inode *inode,
lock_extent(&BTRFS_I(inode)->io_tree, start, end);
num_bytes = end + 1 - start;
+ if (cur_offset < start)
+ btrfs_free_reserved_data_space(inode, cur_offset,
+ start - cur_offset);
ret = btrfs_prealloc_file_range(inode, 0, start,
num_bytes, num_bytes,
end + 1, &alloc_hint);
+ cur_offset = end + 1;
unlock_extent(&BTRFS_I(inode)->io_tree, start, end);
if (ret)
break;
nr++;
}
- btrfs_free_reserved_data_space(inode, cluster->start,
- cluster->end + 1 - cluster->start);
+ if (cur_offset < prealloc_end)
+ btrfs_free_reserved_data_space(inode, cur_offset,
+ prealloc_end + 1 - cur_offset);
out:
inode_unlock(inode);
return ret;
@@ -3871,6 +3888,7 @@ static noinline_for_stack
int prepare_to_relocate(struct reloc_control *rc)
{
struct btrfs_trans_handle *trans;
+ int ret;
rc->block_rsv = btrfs_alloc_block_rsv(rc->extent_root,
BTRFS_BLOCK_RSV_TEMP);
@@ -3885,6 +3903,11 @@ int prepare_to_relocate(struct reloc_control *rc)
rc->reserved_bytes = 0;
rc->block_rsv->size = rc->extent_root->nodesize *
RELOCATION_RESERVED_NODES;
+ ret = btrfs_block_rsv_refill(rc->extent_root,
+ rc->block_rsv, rc->block_rsv->size,
+ BTRFS_RESERVE_FLUSH_ALL);
+ if (ret)
+ return ret;
rc->create_reloc_tree = 1;
set_reloc_control(rc);
@@ -3903,6 +3926,90 @@ int prepare_to_relocate(struct reloc_control *rc)
return 0;
}
+/*
+ * Qgroup fixer for data chunk relocation.
+ * The data relocation is done in the following steps
+ * 1) Copy data extents into data reloc tree
+ * 2) Create tree reloc tree(special snapshot) for related subvolumes
+ * 3) Modify file extents in tree reloc tree
+ * 4) Merge tree reloc tree with original fs tree, by swapping tree blocks
+ *
+ * The problem is, data and tree reloc tree are not accounted to qgroup,
+ * and 4) will only info qgroup to track tree blocks change, not file extents
+ * in the tree blocks.
+ *
+ * The good news is, related data extents are all in data reloc tree, so we
+ * only need to info qgroup to track all file extents in data reloc tree
+ * before commit trans.
+ */
+static int qgroup_fix_relocated_data_extents(struct btrfs_trans_handle *trans,
+ struct reloc_control *rc)
+{
+ struct btrfs_fs_info *fs_info = rc->extent_root->fs_info;
+ struct inode *inode = rc->data_inode;
+ struct btrfs_root *data_reloc_root = BTRFS_I(inode)->root;
+ struct btrfs_path *path;
+ struct btrfs_key key;
+ int ret = 0;
+
+ if (!fs_info->quota_enabled)
+ return 0;
+
+ /*
+ * Only for stage where we update data pointers the qgroup fix is
+ * valid.
+ * For MOVING_DATA stage, we will miss the timing of swapping tree
+ * blocks, and won't fix it.
+ */
+ if (!(rc->stage == UPDATE_DATA_PTRS && rc->extents_found))
+ return 0;
+
+ path = btrfs_alloc_path();
+ if (!path)
+ return -ENOMEM;
+ key.objectid = btrfs_ino(inode);
+ key.type = BTRFS_EXTENT_DATA_KEY;
+ key.offset = 0;
+
+ ret = btrfs_search_slot(NULL, data_reloc_root, &key, path, 0, 0);
+ if (ret < 0)
+ goto out;
+
+ lock_extent(&BTRFS_I(inode)->io_tree, 0, (u64)-1);
+ while (1) {
+ struct btrfs_file_extent_item *fi;
+
+ btrfs_item_key_to_cpu(path->nodes[0], &key, path->slots[0]);
+ if (key.objectid > btrfs_ino(inode))
+ break;
+ if (key.type != BTRFS_EXTENT_DATA_KEY)
+ goto next;
+ fi = btrfs_item_ptr(path->nodes[0], path->slots[0],
+ struct btrfs_file_extent_item);
+ if (btrfs_file_extent_type(path->nodes[0], fi) !=
+ BTRFS_FILE_EXTENT_REG)
+ goto next;
+ ret = btrfs_qgroup_insert_dirty_extent(trans, fs_info,
+ btrfs_file_extent_disk_bytenr(path->nodes[0], fi),
+ btrfs_file_extent_disk_num_bytes(path->nodes[0], fi),
+ GFP_NOFS);
+ if (ret < 0)
+ break;
+next:
+ ret = btrfs_next_item(data_reloc_root, path);
+ if (ret < 0)
+ break;
+ if (ret > 0) {
+ ret = 0;
+ break;
+ }
+ }
+ unlock_extent(&BTRFS_I(inode)->io_tree, 0 , (u64)-1);
+out:
+ btrfs_free_path(path);
+ return ret;
+}
+
static noinline_for_stack int relocate_block_group(struct reloc_control *rc)
{
struct rb_root blocks = RB_ROOT;
@@ -4089,10 +4196,18 @@ restart:
/* get rid of pinned extents */
trans = btrfs_join_transaction(rc->extent_root);
- if (IS_ERR(trans))
+ if (IS_ERR(trans)) {
err = PTR_ERR(trans);
- else
- btrfs_commit_transaction(trans, rc->extent_root);
+ goto out_free;
+ }
+ ret = qgroup_fix_relocated_data_extents(trans, rc);
+ if (ret < 0) {
+ btrfs_abort_transaction(trans, ret);
+ if (!err)
+ err = ret;
+ goto out_free;
+ }
+ btrfs_commit_transaction(trans, rc->extent_root);
out_free:
btrfs_free_block_rsv(rc->extent_root, rc->block_rsv);
btrfs_free_path(path);
@@ -4455,10 +4570,16 @@ int btrfs_recover_relocation(struct btrfs_root *root)
unset_reloc_control(rc);
trans = btrfs_join_transaction(rc->extent_root);
- if (IS_ERR(trans))
+ if (IS_ERR(trans)) {
err = PTR_ERR(trans);
- else
- err = btrfs_commit_transaction(trans, rc->extent_root);
+ goto out_free;
+ }
+ err = qgroup_fix_relocated_data_extents(trans, rc);
+ if (err < 0) {
+ btrfs_abort_transaction(trans, err);
+ goto out_free;
+ }
+ err = btrfs_commit_transaction(trans, rc->extent_root);
out_free:
kfree(rc);
out:
@@ -4643,7 +4764,7 @@ int btrfs_reloc_post_snapshot(struct btrfs_trans_handle *trans,
if (rc->merge_reloc_tree) {
ret = btrfs_block_rsv_migrate(&pending->block_rsv,
rc->block_rsv,
- rc->nodes_relocated);
+ rc->nodes_relocated, 1);
if (ret)
return ret;
}
diff --git a/fs/btrfs/root-tree.c b/fs/btrfs/root-tree.c
index 3454aa4fa..091296062 100644
--- a/fs/btrfs/root-tree.c
+++ b/fs/btrfs/root-tree.c
@@ -150,7 +150,7 @@ int btrfs_update_root(struct btrfs_trans_handle *trans, struct btrfs_root
ret = btrfs_search_slot(trans, root, key, path, 0, 1);
if (ret < 0) {
- btrfs_abort_transaction(trans, root, ret);
+ btrfs_abort_transaction(trans, ret);
goto out;
}
@@ -176,20 +176,20 @@ int btrfs_update_root(struct btrfs_trans_handle *trans, struct btrfs_root
ret = btrfs_search_slot(trans, root, key, path,
-1, 1);
if (ret < 0) {
- btrfs_abort_transaction(trans, root, ret);
+ btrfs_abort_transaction(trans, ret);
goto out;
}
ret = btrfs_del_item(trans, root, path);
if (ret < 0) {
- btrfs_abort_transaction(trans, root, ret);
+ btrfs_abort_transaction(trans, ret);
goto out;
}
btrfs_release_path(path);
ret = btrfs_insert_empty_item(trans, root, path,
key, sizeof(*item));
if (ret < 0) {
- btrfs_abort_transaction(trans, root, ret);
+ btrfs_abort_transaction(trans, ret);
goto out;
}
l = path->nodes[0];
@@ -457,7 +457,7 @@ again:
ret = btrfs_insert_empty_item(trans, tree_root, path, &key,
sizeof(*ref) + name_len);
if (ret) {
- btrfs_abort_transaction(trans, tree_root, ret);
+ btrfs_abort_transaction(trans, ret);
btrfs_free_path(path);
return ret;
}
diff --git a/fs/btrfs/scrub.c b/fs/btrfs/scrub.c
index 70427ef66..1d195d2b3 100644
--- a/fs/btrfs/scrub.c
+++ b/fs/btrfs/scrub.c
@@ -1504,8 +1504,9 @@ static void scrub_recheck_block(struct btrfs_fs_info *fs_info,
sblock->no_io_error_seen = 0;
} else {
bio->bi_iter.bi_sector = page->physical >> 9;
+ bio_set_op_attrs(bio, REQ_OP_READ, 0);
- if (btrfsic_submit_bio_wait(READ, bio))
+ if (btrfsic_submit_bio_wait(bio))
sblock->no_io_error_seen = 0;
}
@@ -1583,6 +1584,7 @@ static int scrub_repair_page_from_good_copy(struct scrub_block *sblock_bad,
return -EIO;
bio->bi_bdev = page_bad->dev->bdev;
bio->bi_iter.bi_sector = page_bad->physical >> 9;
+ bio_set_op_attrs(bio, REQ_OP_WRITE, 0);
ret = bio_add_page(bio, page_good->page, PAGE_SIZE, 0);
if (PAGE_SIZE != ret) {
@@ -1590,7 +1592,7 @@ static int scrub_repair_page_from_good_copy(struct scrub_block *sblock_bad,
return -EIO;
}
- if (btrfsic_submit_bio_wait(WRITE, bio)) {
+ if (btrfsic_submit_bio_wait(bio)) {
btrfs_dev_stat_inc_and_print(page_bad->dev,
BTRFS_DEV_STAT_WRITE_ERRS);
btrfs_dev_replace_stats_inc(
@@ -1684,6 +1686,7 @@ again:
bio->bi_end_io = scrub_wr_bio_end_io;
bio->bi_bdev = sbio->dev->bdev;
bio->bi_iter.bi_sector = sbio->physical >> 9;
+ bio_set_op_attrs(bio, REQ_OP_WRITE, 0);
sbio->err = 0;
} else if (sbio->physical + sbio->page_count * PAGE_SIZE !=
spage->physical_for_dev_replace ||
@@ -1731,7 +1734,7 @@ static void scrub_wr_submit(struct scrub_ctx *sctx)
* orders the requests before sending them to the driver which
* doubled the write performance on spinning disks when measured
* with Linux 3.5 */
- btrfsic_submit_bio(WRITE, sbio->bio);
+ btrfsic_submit_bio(sbio->bio);
}
static void scrub_wr_bio_end_io(struct bio *bio)
@@ -2041,7 +2044,7 @@ static void scrub_submit(struct scrub_ctx *sctx)
sbio = sctx->bios[sctx->curr];
sctx->curr = -1;
scrub_pending_bio_inc(sctx);
- btrfsic_submit_bio(READ, sbio->bio);
+ btrfsic_submit_bio(sbio->bio);
}
static int scrub_add_page_to_rd_bio(struct scrub_ctx *sctx,
@@ -2088,6 +2091,7 @@ again:
bio->bi_end_io = scrub_bio_end_io;
bio->bi_bdev = sbio->dev->bdev;
bio->bi_iter.bi_sector = sbio->physical >> 9;
+ bio_set_op_attrs(bio, REQ_OP_READ, 0);
sbio->err = 0;
} else if (sbio->physical + sbio->page_count * PAGE_SIZE !=
spage->physical ||
@@ -3781,27 +3785,27 @@ static noinline_for_stack int scrub_workers_get(struct btrfs_fs_info *fs_info,
if (fs_info->scrub_workers_refcnt == 0) {
if (is_dev_replace)
fs_info->scrub_workers =
- btrfs_alloc_workqueue("scrub", flags,
+ btrfs_alloc_workqueue(fs_info, "scrub", flags,
1, 4);
else
fs_info->scrub_workers =
- btrfs_alloc_workqueue("scrub", flags,
+ btrfs_alloc_workqueue(fs_info, "scrub", flags,
max_active, 4);
if (!fs_info->scrub_workers)
goto fail_scrub_workers;
fs_info->scrub_wr_completion_workers =
- btrfs_alloc_workqueue("scrubwrc", flags,
+ btrfs_alloc_workqueue(fs_info, "scrubwrc", flags,
max_active, 2);
if (!fs_info->scrub_wr_completion_workers)
goto fail_scrub_wr_completion_workers;
fs_info->scrub_nocow_workers =
- btrfs_alloc_workqueue("scrubnc", flags, 1, 0);
+ btrfs_alloc_workqueue(fs_info, "scrubnc", flags, 1, 0);
if (!fs_info->scrub_nocow_workers)
goto fail_scrub_nocow_workers;
fs_info->scrub_parity_workers =
- btrfs_alloc_workqueue("scrubparity", flags,
+ btrfs_alloc_workqueue(fs_info, "scrubparity", flags,
max_active, 2);
if (!fs_info->scrub_parity_workers)
goto fail_scrub_parity_workers;
@@ -3856,7 +3860,7 @@ int btrfs_scrub_dev(struct btrfs_fs_info *fs_info, u64 devid, u64 start,
if (fs_info->chunk_root->sectorsize != PAGE_SIZE) {
/* not supported for data w/o checksums */
- btrfs_err(fs_info,
+ btrfs_err_rl(fs_info,
"scrub: size assumption sectorsize != PAGE_SIZE "
"(%d != %lu) fails",
fs_info->chunk_root->sectorsize, PAGE_SIZE);
@@ -4436,6 +4440,7 @@ static int write_page_nocow(struct scrub_ctx *sctx,
bio->bi_iter.bi_size = 0;
bio->bi_iter.bi_sector = physical_for_dev_replace >> 9;
bio->bi_bdev = dev->bdev;
+ bio_set_op_attrs(bio, REQ_OP_WRITE, WRITE_SYNC);
ret = bio_add_page(bio, page, PAGE_SIZE, 0);
if (ret != PAGE_SIZE) {
leave_with_eio:
@@ -4444,7 +4449,7 @@ leave_with_eio:
return -EIO;
}
- if (btrfsic_submit_bio_wait(WRITE_SYNC, bio))
+ if (btrfsic_submit_bio_wait(bio))
goto leave_with_eio;
bio_put(bio);
diff --git a/fs/btrfs/send.c b/fs/btrfs/send.c
index b71dd2983..a87675ffd 100644
--- a/fs/btrfs/send.c
+++ b/fs/btrfs/send.c
@@ -231,7 +231,6 @@ struct pending_dir_move {
u64 parent_ino;
u64 ino;
u64 gen;
- bool is_orphan;
struct list_head update_refs;
};
@@ -274,6 +273,39 @@ struct name_cache_entry {
char name[];
};
+static void inconsistent_snapshot_error(struct send_ctx *sctx,
+ enum btrfs_compare_tree_result result,
+ const char *what)
+{
+ const char *result_string;
+
+ switch (result) {
+ case BTRFS_COMPARE_TREE_NEW:
+ result_string = "new";
+ break;
+ case BTRFS_COMPARE_TREE_DELETED:
+ result_string = "deleted";
+ break;
+ case BTRFS_COMPARE_TREE_CHANGED:
+ result_string = "updated";
+ break;
+ case BTRFS_COMPARE_TREE_SAME:
+ ASSERT(0);
+ result_string = "unchanged";
+ break;
+ default:
+ ASSERT(0);
+ result_string = "unexpected";
+ }
+
+ btrfs_err(sctx->send_root->fs_info,
+ "Send: inconsistent snapshot, found %s %s for inode %llu without updated inode item, send root is %llu, parent root is %llu",
+ result_string, what, sctx->cmp_key->objectid,
+ sctx->send_root->root_key.objectid,
+ (sctx->parent_root ?
+ sctx->parent_root->root_key.objectid : 0));
+}
+
static int is_waiting_for_move(struct send_ctx *sctx, u64 ino);
static struct waiting_dir_move *
@@ -1861,7 +1893,8 @@ static int will_overwrite_ref(struct send_ctx *sctx, u64 dir, u64 dir_gen,
* was already unlinked/moved, so we can safely assume that we will not
* overwrite anything at this point in time.
*/
- if (other_inode > sctx->send_progress) {
+ if (other_inode > sctx->send_progress ||
+ is_waiting_for_move(sctx, other_inode)) {
ret = get_inode_info(sctx->parent_root, other_inode, NULL,
who_gen, NULL, NULL, NULL, NULL);
if (ret < 0)
@@ -2502,6 +2535,8 @@ verbose_printk("btrfs: send_utimes %llu\n", ino);
key.type = BTRFS_INODE_ITEM_KEY;
key.offset = 0;
ret = btrfs_search_slot(NULL, sctx->send_root, &key, path, 0, 0);
+ if (ret > 0)
+ ret = -ENOENT;
if (ret < 0)
goto out;
@@ -2947,6 +2982,10 @@ static int can_rmdir(struct send_ctx *sctx, u64 dir, u64 dir_gen,
}
if (loc.objectid > send_progress) {
+ struct orphan_dir_info *odi;
+
+ odi = get_orphan_dir_info(sctx, dir);
+ free_orphan_dir_info(sctx, odi);
ret = 0;
goto out;
}
@@ -3047,7 +3086,6 @@ static int add_pending_dir_move(struct send_ctx *sctx,
pm->parent_ino = parent_ino;
pm->ino = ino;
pm->gen = ino_gen;
- pm->is_orphan = is_orphan;
INIT_LIST_HEAD(&pm->list);
INIT_LIST_HEAD(&pm->update_refs);
RB_CLEAR_NODE(&pm->node);
@@ -3113,6 +3151,48 @@ static struct pending_dir_move *get_pending_dir_moves(struct send_ctx *sctx,
return NULL;
}
+static int path_loop(struct send_ctx *sctx, struct fs_path *name,
+ u64 ino, u64 gen, u64 *ancestor_ino)
+{
+ int ret = 0;
+ u64 parent_inode = 0;
+ u64 parent_gen = 0;
+ u64 start_ino = ino;
+
+ *ancestor_ino = 0;
+ while (ino != BTRFS_FIRST_FREE_OBJECTID) {
+ fs_path_reset(name);
+
+ if (is_waiting_for_rm(sctx, ino))
+ break;
+ if (is_waiting_for_move(sctx, ino)) {
+ if (*ancestor_ino == 0)
+ *ancestor_ino = ino;
+ ret = get_first_ref(sctx->parent_root, ino,
+ &parent_inode, &parent_gen, name);
+ } else {
+ ret = __get_cur_name_and_parent(sctx, ino, gen,
+ &parent_inode,
+ &parent_gen, name);
+ if (ret > 0) {
+ ret = 0;
+ break;
+ }
+ }
+ if (ret < 0)
+ break;
+ if (parent_inode == start_ino) {
+ ret = 1;
+ if (*ancestor_ino == 0)
+ *ancestor_ino = ino;
+ break;
+ }
+ ino = parent_inode;
+ gen = parent_gen;
+ }
+ return ret;
+}
+
static int apply_dir_move(struct send_ctx *sctx, struct pending_dir_move *pm)
{
struct fs_path *from_path = NULL;
@@ -3123,6 +3203,8 @@ static int apply_dir_move(struct send_ctx *sctx, struct pending_dir_move *pm)
u64 parent_ino, parent_gen;
struct waiting_dir_move *dm = NULL;
u64 rmdir_ino = 0;
+ u64 ancestor;
+ bool is_orphan;
int ret;
name = fs_path_alloc();
@@ -3135,9 +3217,10 @@ static int apply_dir_move(struct send_ctx *sctx, struct pending_dir_move *pm)
dm = get_waiting_dir_move(sctx, pm->ino);
ASSERT(dm);
rmdir_ino = dm->rmdir_ino;
+ is_orphan = dm->orphanized;
free_waiting_dir_move(sctx, dm);
- if (pm->is_orphan) {
+ if (is_orphan) {
ret = gen_unique_name(sctx, pm->ino,
pm->gen, from_path);
} else {
@@ -3155,6 +3238,24 @@ static int apply_dir_move(struct send_ctx *sctx, struct pending_dir_move *pm)
goto out;
sctx->send_progress = sctx->cur_ino + 1;
+ ret = path_loop(sctx, name, pm->ino, pm->gen, &ancestor);
+ if (ret < 0)
+ goto out;
+ if (ret) {
+ LIST_HEAD(deleted_refs);
+ ASSERT(ancestor > BTRFS_FIRST_FREE_OBJECTID);
+ ret = add_pending_dir_move(sctx, pm->ino, pm->gen, ancestor,
+ &pm->update_refs, &deleted_refs,
+ is_orphan);
+ if (ret < 0)
+ goto out;
+ if (rmdir_ino) {
+ dm = get_waiting_dir_move(sctx, pm->ino);
+ ASSERT(dm);
+ dm->rmdir_ino = rmdir_ino;
+ }
+ goto out;
+ }
fs_path_reset(name);
to_path = name;
name = NULL;
@@ -3174,7 +3275,7 @@ static int apply_dir_move(struct send_ctx *sctx, struct pending_dir_move *pm)
/* already deleted */
goto finish;
}
- ret = can_rmdir(sctx, rmdir_ino, odi->gen, sctx->cur_ino + 1);
+ ret = can_rmdir(sctx, rmdir_ino, odi->gen, sctx->cur_ino);
if (ret < 0)
goto out;
if (!ret)
@@ -3204,8 +3305,18 @@ finish:
* and old parent(s).
*/
list_for_each_entry(cur, &pm->update_refs, list) {
- if (cur->dir == rmdir_ino)
+ /*
+ * The parent inode might have been deleted in the send snapshot
+ */
+ ret = get_inode_info(sctx->send_root, cur->dir, NULL,
+ NULL, NULL, NULL, NULL, NULL);
+ if (ret == -ENOENT) {
+ ret = 0;
continue;
+ }
+ if (ret < 0)
+ goto out;
+
ret = send_utimes(sctx, cur->dir, cur->dir_gen);
if (ret < 0)
goto out;
@@ -3325,6 +3436,7 @@ static int wait_for_dest_dir_move(struct send_ctx *sctx,
u64 left_gen;
u64 right_gen;
int ret = 0;
+ struct waiting_dir_move *wdm;
if (RB_EMPTY_ROOT(&sctx->waiting_dir_moves))
return 0;
@@ -3383,7 +3495,8 @@ static int wait_for_dest_dir_move(struct send_ctx *sctx,
goto out;
}
- if (is_waiting_for_move(sctx, di_key.objectid)) {
+ wdm = get_waiting_dir_move(sctx, di_key.objectid);
+ if (wdm && !wdm->orphanized) {
ret = add_pending_dir_move(sctx,
sctx->cur_ino,
sctx->cur_inode_gen,
@@ -3470,7 +3583,8 @@ static int wait_for_parent_move(struct send_ctx *sctx,
ret = is_ancestor(sctx->parent_root,
sctx->cur_ino, sctx->cur_inode_gen,
ino, path_before);
- break;
+ if (ret)
+ break;
}
fs_path_reset(path_before);
@@ -3643,11 +3757,26 @@ verbose_printk("btrfs: process_recorded_refs %llu\n", sctx->cur_ino);
goto out;
if (ret) {
struct name_cache_entry *nce;
+ struct waiting_dir_move *wdm;
ret = orphanize_inode(sctx, ow_inode, ow_gen,
cur->full_path);
if (ret < 0)
goto out;
+
+ /*
+ * If ow_inode has its rename operation delayed
+ * make sure that its orphanized name is used in
+ * the source path when performing its rename
+ * operation.
+ */
+ if (is_waiting_for_move(sctx, ow_inode)) {
+ wdm = get_waiting_dir_move(sctx,
+ ow_inode);
+ ASSERT(wdm);
+ wdm->orphanized = true;
+ }
+
/*
* Make sure we clear our orphanized inode's
* name from the name cache. This is because the
@@ -3663,6 +3792,19 @@ verbose_printk("btrfs: process_recorded_refs %llu\n", sctx->cur_ino);
name_cache_delete(sctx, nce);
kfree(nce);
}
+
+ /*
+ * ow_inode might currently be an ancestor of
+ * cur_ino, therefore compute valid_path (the
+ * current path of cur_ino) again because it
+ * might contain the pre-orphanization name of
+ * ow_inode, which is no longer valid.
+ */
+ fs_path_reset(valid_path);
+ ret = get_cur_path(sctx, sctx->cur_ino,
+ sctx->cur_inode_gen, valid_path);
+ if (ret < 0)
+ goto out;
} else {
ret = send_unlink(sctx, cur->full_path);
if (ret < 0)
@@ -4126,10 +4268,12 @@ static int process_all_refs(struct send_ctx *sctx,
}
btrfs_release_path(path);
+ /*
+ * We don't actually care about pending_move as we are simply
+ * re-creating this inode and will be rename'ing it into place once we
+ * rename the parent directory.
+ */
ret = process_recorded_refs(sctx, &pending_move);
- /* Only applicable to an incremental send. */
- ASSERT(pending_move == 0);
-
out:
btrfs_free_path(path);
return ret;
@@ -5602,7 +5746,10 @@ static int changed_ref(struct send_ctx *sctx,
{
int ret = 0;
- BUG_ON(sctx->cur_ino != sctx->cmp_key->objectid);
+ if (sctx->cur_ino != sctx->cmp_key->objectid) {
+ inconsistent_snapshot_error(sctx, result, "reference");
+ return -EIO;
+ }
if (!sctx->cur_inode_new_gen &&
sctx->cur_ino != BTRFS_FIRST_FREE_OBJECTID) {
@@ -5627,7 +5774,10 @@ static int changed_xattr(struct send_ctx *sctx,
{
int ret = 0;
- BUG_ON(sctx->cur_ino != sctx->cmp_key->objectid);
+ if (sctx->cur_ino != sctx->cmp_key->objectid) {
+ inconsistent_snapshot_error(sctx, result, "xattr");
+ return -EIO;
+ }
if (!sctx->cur_inode_new_gen && !sctx->cur_inode_deleted) {
if (result == BTRFS_COMPARE_TREE_NEW)
@@ -5651,7 +5801,10 @@ static int changed_extent(struct send_ctx *sctx,
{
int ret = 0;
- BUG_ON(sctx->cur_ino != sctx->cmp_key->objectid);
+ if (sctx->cur_ino != sctx->cmp_key->objectid) {
+ inconsistent_snapshot_error(sctx, result, "extent");
+ return -EIO;
+ }
if (!sctx->cur_inode_new_gen && !sctx->cur_inode_deleted) {
if (result != BTRFS_COMPARE_TREE_DELETED)
diff --git a/fs/btrfs/super.c b/fs/btrfs/super.c
index 60e7179ed..4071fe2bd 100644
--- a/fs/btrfs/super.c
+++ b/fs/btrfs/super.c
@@ -184,6 +184,22 @@ static const char * const logtypes[] = {
"debug",
};
+
+/*
+ * Use one ratelimit state per log level so that a flood of less important
+ * messages doesn't cause more important ones to be dropped.
+ */
+static struct ratelimit_state printk_limits[] = {
+ RATELIMIT_STATE_INIT(printk_limits[0], DEFAULT_RATELIMIT_INTERVAL, 100),
+ RATELIMIT_STATE_INIT(printk_limits[1], DEFAULT_RATELIMIT_INTERVAL, 100),
+ RATELIMIT_STATE_INIT(printk_limits[2], DEFAULT_RATELIMIT_INTERVAL, 100),
+ RATELIMIT_STATE_INIT(printk_limits[3], DEFAULT_RATELIMIT_INTERVAL, 100),
+ RATELIMIT_STATE_INIT(printk_limits[4], DEFAULT_RATELIMIT_INTERVAL, 100),
+ RATELIMIT_STATE_INIT(printk_limits[5], DEFAULT_RATELIMIT_INTERVAL, 100),
+ RATELIMIT_STATE_INIT(printk_limits[6], DEFAULT_RATELIMIT_INTERVAL, 100),
+ RATELIMIT_STATE_INIT(printk_limits[7], DEFAULT_RATELIMIT_INTERVAL, 100),
+};
+
void btrfs_printk(const struct btrfs_fs_info *fs_info, const char *fmt, ...)
{
struct super_block *sb = fs_info->sb;
@@ -192,6 +208,7 @@ void btrfs_printk(const struct btrfs_fs_info *fs_info, const char *fmt, ...)
va_list args;
const char *type = logtypes[4];
int kern_level;
+ struct ratelimit_state *ratelimit;
va_start(args, fmt);
@@ -202,13 +219,18 @@ void btrfs_printk(const struct btrfs_fs_info *fs_info, const char *fmt, ...)
lvl[size] = '\0';
fmt += size;
type = logtypes[kern_level - '0'];
- } else
+ ratelimit = &printk_limits[kern_level - '0'];
+ } else {
*lvl = '\0';
+ /* Default to debug output */
+ ratelimit = &printk_limits[7];
+ }
vaf.fmt = fmt;
vaf.va = &args;
- printk("%sBTRFS %s (device %s): %pV\n", lvl, type, sb->s_id, &vaf);
+ if (__ratelimit(ratelimit))
+ printk("%sBTRFS %s (device %s): %pV\n", lvl, type, sb->s_id, &vaf);
va_end(args);
}
@@ -229,9 +251,11 @@ void btrfs_printk(const struct btrfs_fs_info *fs_info, const char *fmt, ...)
*/
__cold
void __btrfs_abort_transaction(struct btrfs_trans_handle *trans,
- struct btrfs_root *root, const char *function,
+ const char *function,
unsigned int line, int errno)
{
+ struct btrfs_fs_info *fs_info = trans->fs_info;
+
trans->aborted = errno;
/* Nothing used. The other threads that have joined this
* transaction may be able to continue. */
@@ -239,16 +263,16 @@ void __btrfs_abort_transaction(struct btrfs_trans_handle *trans,
const char *errstr;
errstr = btrfs_decode_error(errno);
- btrfs_warn(root->fs_info,
+ btrfs_warn(fs_info,
"%s:%d: Aborting unused transaction(%s).",
function, line, errstr);
return;
}
ACCESS_ONCE(trans->transaction->aborted) = errno;
/* Wake up anybody who may be waiting on this transaction */
- wake_up(&root->fs_info->transaction_wait);
- wake_up(&root->fs_info->transaction_blocked_wait);
- __btrfs_handle_fs_error(root->fs_info, function, line, errno, NULL);
+ wake_up(&fs_info->transaction_wait);
+ wake_up(&fs_info->transaction_blocked_wait);
+ __btrfs_handle_fs_error(fs_info, function, line, errno, NULL);
}
/*
* __btrfs_panic decodes unexpected, fatal errors from the caller,
@@ -432,12 +456,12 @@ int btrfs_parse_options(struct btrfs_root *root, char *options,
*/
break;
case Opt_nodatasum:
- btrfs_set_and_info(root, NODATASUM,
+ btrfs_set_and_info(info, NODATASUM,
"setting nodatasum");
break;
case Opt_datasum:
- if (btrfs_test_opt(root, NODATASUM)) {
- if (btrfs_test_opt(root, NODATACOW))
+ if (btrfs_test_opt(info, NODATASUM)) {
+ if (btrfs_test_opt(info, NODATACOW))
btrfs_info(root->fs_info, "setting datasum, datacow enabled");
else
btrfs_info(root->fs_info, "setting datasum");
@@ -446,9 +470,9 @@ int btrfs_parse_options(struct btrfs_root *root, char *options,
btrfs_clear_opt(info->mount_opt, NODATASUM);
break;
case Opt_nodatacow:
- if (!btrfs_test_opt(root, NODATACOW)) {
- if (!btrfs_test_opt(root, COMPRESS) ||
- !btrfs_test_opt(root, FORCE_COMPRESS)) {
+ if (!btrfs_test_opt(info, NODATACOW)) {
+ if (!btrfs_test_opt(info, COMPRESS) ||
+ !btrfs_test_opt(info, FORCE_COMPRESS)) {
btrfs_info(root->fs_info,
"setting nodatacow, compression disabled");
} else {
@@ -461,7 +485,7 @@ int btrfs_parse_options(struct btrfs_root *root, char *options,
btrfs_set_opt(info->mount_opt, NODATASUM);
break;
case Opt_datacow:
- btrfs_clear_and_info(root, NODATACOW,
+ btrfs_clear_and_info(info, NODATACOW,
"setting datacow");
break;
case Opt_compress_force:
@@ -470,10 +494,11 @@ int btrfs_parse_options(struct btrfs_root *root, char *options,
/* Fallthrough */
case Opt_compress:
case Opt_compress_type:
- saved_compress_type = btrfs_test_opt(root, COMPRESS) ?
+ saved_compress_type = btrfs_test_opt(info,
+ COMPRESS) ?
info->compress_type : BTRFS_COMPRESS_NONE;
saved_compress_force =
- btrfs_test_opt(root, FORCE_COMPRESS);
+ btrfs_test_opt(info, FORCE_COMPRESS);
if (token == Opt_compress ||
token == Opt_compress_force ||
strcmp(args[0].from, "zlib") == 0) {
@@ -513,10 +538,10 @@ int btrfs_parse_options(struct btrfs_root *root, char *options,
*/
btrfs_clear_opt(info->mount_opt, FORCE_COMPRESS);
}
- if ((btrfs_test_opt(root, COMPRESS) &&
+ if ((btrfs_test_opt(info, COMPRESS) &&
(info->compress_type != saved_compress_type ||
compress_force != saved_compress_force)) ||
- (!btrfs_test_opt(root, COMPRESS) &&
+ (!btrfs_test_opt(info, COMPRESS) &&
no_compress == 1)) {
btrfs_info(root->fs_info,
"%s %s compression",
@@ -526,25 +551,25 @@ int btrfs_parse_options(struct btrfs_root *root, char *options,
compress_force = false;
break;
case Opt_ssd:
- btrfs_set_and_info(root, SSD,
+ btrfs_set_and_info(info, SSD,
"use ssd allocation scheme");
break;
case Opt_ssd_spread:
- btrfs_set_and_info(root, SSD_SPREAD,
+ btrfs_set_and_info(info, SSD_SPREAD,
"use spread ssd allocation scheme");
btrfs_set_opt(info->mount_opt, SSD);
break;
case Opt_nossd:
- btrfs_set_and_info(root, NOSSD,
+ btrfs_set_and_info(info, NOSSD,
"not using ssd allocation scheme");
btrfs_clear_opt(info->mount_opt, SSD);
break;
case Opt_barrier:
- btrfs_clear_and_info(root, NOBARRIER,
+ btrfs_clear_and_info(info, NOBARRIER,
"turning on barriers");
break;
case Opt_nobarrier:
- btrfs_set_and_info(root, NOBARRIER,
+ btrfs_set_and_info(info, NOBARRIER,
"turning off barriers");
break;
case Opt_thread_pool:
@@ -604,24 +629,24 @@ int btrfs_parse_options(struct btrfs_root *root, char *options,
root->fs_info->sb->s_flags &= ~MS_POSIXACL;
break;
case Opt_notreelog:
- btrfs_set_and_info(root, NOTREELOG,
+ btrfs_set_and_info(info, NOTREELOG,
"disabling tree log");
break;
case Opt_treelog:
- btrfs_clear_and_info(root, NOTREELOG,
+ btrfs_clear_and_info(info, NOTREELOG,
"enabling tree log");
break;
case Opt_norecovery:
case Opt_nologreplay:
- btrfs_set_and_info(root, NOLOGREPLAY,
+ btrfs_set_and_info(info, NOLOGREPLAY,
"disabling log replay at mount time");
break;
case Opt_flushoncommit:
- btrfs_set_and_info(root, FLUSHONCOMMIT,
+ btrfs_set_and_info(info, FLUSHONCOMMIT,
"turning on flush-on-commit");
break;
case Opt_noflushoncommit:
- btrfs_clear_and_info(root, FLUSHONCOMMIT,
+ btrfs_clear_and_info(info, FLUSHONCOMMIT,
"turning off flush-on-commit");
break;
case Opt_ratio:
@@ -638,11 +663,11 @@ int btrfs_parse_options(struct btrfs_root *root, char *options,
}
break;
case Opt_discard:
- btrfs_set_and_info(root, DISCARD,
+ btrfs_set_and_info(info, DISCARD,
"turning on discard");
break;
case Opt_nodiscard:
- btrfs_clear_and_info(root, DISCARD,
+ btrfs_clear_and_info(info, DISCARD,
"turning off discard");
break;
case Opt_space_cache:
@@ -651,12 +676,13 @@ int btrfs_parse_options(struct btrfs_root *root, char *options,
strcmp(args[0].from, "v1") == 0) {
btrfs_clear_opt(root->fs_info->mount_opt,
FREE_SPACE_TREE);
- btrfs_set_and_info(root, SPACE_CACHE,
+ btrfs_set_and_info(info, SPACE_CACHE,
"enabling disk space caching");
} else if (strcmp(args[0].from, "v2") == 0) {
btrfs_clear_opt(root->fs_info->mount_opt,
SPACE_CACHE);
- btrfs_set_and_info(root, FREE_SPACE_TREE,
+ btrfs_set_and_info(info,
+ FREE_SPACE_TREE,
"enabling free space tree");
} else {
ret = -EINVAL;
@@ -667,12 +693,14 @@ int btrfs_parse_options(struct btrfs_root *root, char *options,
btrfs_set_opt(info->mount_opt, RESCAN_UUID_TREE);
break;
case Opt_no_space_cache:
- if (btrfs_test_opt(root, SPACE_CACHE)) {
- btrfs_clear_and_info(root, SPACE_CACHE,
+ if (btrfs_test_opt(info, SPACE_CACHE)) {
+ btrfs_clear_and_info(info,
+ SPACE_CACHE,
"disabling disk space caching");
}
- if (btrfs_test_opt(root, FREE_SPACE_TREE)) {
- btrfs_clear_and_info(root, FREE_SPACE_TREE,
+ if (btrfs_test_opt(info, FREE_SPACE_TREE)) {
+ btrfs_clear_and_info(info,
+ FREE_SPACE_TREE,
"disabling free space tree");
}
break;
@@ -685,7 +713,7 @@ int btrfs_parse_options(struct btrfs_root *root, char *options,
"disabling inode map caching");
break;
case Opt_clear_cache:
- btrfs_set_and_info(root, CLEAR_CACHE,
+ btrfs_set_and_info(info, CLEAR_CACHE,
"force clearing of disk cache");
break;
case Opt_user_subvol_rm_allowed:
@@ -698,11 +726,11 @@ int btrfs_parse_options(struct btrfs_root *root, char *options,
btrfs_clear_opt(info->mount_opt, ENOSPC_DEBUG);
break;
case Opt_defrag:
- btrfs_set_and_info(root, AUTO_DEFRAG,
+ btrfs_set_and_info(info, AUTO_DEFRAG,
"enabling auto defrag");
break;
case Opt_nodefrag:
- btrfs_clear_and_info(root, AUTO_DEFRAG,
+ btrfs_clear_and_info(info, AUTO_DEFRAG,
"disabling auto defrag");
break;
case Opt_recovery:
@@ -810,22 +838,22 @@ check:
/*
* Extra check for current option against current flag
*/
- if (btrfs_test_opt(root, NOLOGREPLAY) && !(new_flags & MS_RDONLY)) {
+ if (btrfs_test_opt(info, NOLOGREPLAY) && !(new_flags & MS_RDONLY)) {
btrfs_err(root->fs_info,
"nologreplay must be used with ro mount option");
ret = -EINVAL;
}
out:
if (btrfs_fs_compat_ro(root->fs_info, FREE_SPACE_TREE) &&
- !btrfs_test_opt(root, FREE_SPACE_TREE) &&
- !btrfs_test_opt(root, CLEAR_CACHE)) {
+ !btrfs_test_opt(info, FREE_SPACE_TREE) &&
+ !btrfs_test_opt(info, CLEAR_CACHE)) {
btrfs_err(root->fs_info, "cannot disable free space tree");
ret = -EINVAL;
}
- if (!ret && btrfs_test_opt(root, SPACE_CACHE))
+ if (!ret && btrfs_test_opt(info, SPACE_CACHE))
btrfs_info(root->fs_info, "disk space caching is enabled");
- if (!ret && btrfs_test_opt(root, FREE_SPACE_TREE))
+ if (!ret && btrfs_test_opt(info, FREE_SPACE_TREE))
btrfs_info(root->fs_info, "using free space tree");
kfree(orig);
return ret;
@@ -1149,7 +1177,7 @@ int btrfs_sync_fs(struct super_block *sb, int wait)
struct btrfs_fs_info *fs_info = btrfs_sb(sb);
struct btrfs_root *root = fs_info->tree_root;
- trace_btrfs_sync_fs(wait);
+ trace_btrfs_sync_fs(fs_info, wait);
if (!wait) {
filemap_flush(fs_info->btree_inode->i_mapping);
@@ -1192,13 +1220,13 @@ static int btrfs_show_options(struct seq_file *seq, struct dentry *dentry)
struct btrfs_root *root = info->tree_root;
char *compress_type;
- if (btrfs_test_opt(root, DEGRADED))
+ if (btrfs_test_opt(info, DEGRADED))
seq_puts(seq, ",degraded");
- if (btrfs_test_opt(root, NODATASUM))
+ if (btrfs_test_opt(info, NODATASUM))
seq_puts(seq, ",nodatasum");
- if (btrfs_test_opt(root, NODATACOW))
+ if (btrfs_test_opt(info, NODATACOW))
seq_puts(seq, ",nodatacow");
- if (btrfs_test_opt(root, NOBARRIER))
+ if (btrfs_test_opt(info, NOBARRIER))
seq_puts(seq, ",nobarrier");
if (info->max_inline != BTRFS_DEFAULT_MAX_INLINE)
seq_printf(seq, ",max_inline=%llu", info->max_inline);
@@ -1207,56 +1235,56 @@ static int btrfs_show_options(struct seq_file *seq, struct dentry *dentry)
if (info->thread_pool_size != min_t(unsigned long,
num_online_cpus() + 2, 8))
seq_printf(seq, ",thread_pool=%d", info->thread_pool_size);
- if (btrfs_test_opt(root, COMPRESS)) {
+ if (btrfs_test_opt(info, COMPRESS)) {
if (info->compress_type == BTRFS_COMPRESS_ZLIB)
compress_type = "zlib";
else
compress_type = "lzo";
- if (btrfs_test_opt(root, FORCE_COMPRESS))
+ if (btrfs_test_opt(info, FORCE_COMPRESS))
seq_printf(seq, ",compress-force=%s", compress_type);
else
seq_printf(seq, ",compress=%s", compress_type);
}
- if (btrfs_test_opt(root, NOSSD))
+ if (btrfs_test_opt(info, NOSSD))
seq_puts(seq, ",nossd");
- if (btrfs_test_opt(root, SSD_SPREAD))
+ if (btrfs_test_opt(info, SSD_SPREAD))
seq_puts(seq, ",ssd_spread");
- else if (btrfs_test_opt(root, SSD))
+ else if (btrfs_test_opt(info, SSD))
seq_puts(seq, ",ssd");
- if (btrfs_test_opt(root, NOTREELOG))
+ if (btrfs_test_opt(info, NOTREELOG))
seq_puts(seq, ",notreelog");
- if (btrfs_test_opt(root, NOLOGREPLAY))
+ if (btrfs_test_opt(info, NOLOGREPLAY))
seq_puts(seq, ",nologreplay");
- if (btrfs_test_opt(root, FLUSHONCOMMIT))
+ if (btrfs_test_opt(info, FLUSHONCOMMIT))
seq_puts(seq, ",flushoncommit");
- if (btrfs_test_opt(root, DISCARD))
+ if (btrfs_test_opt(info, DISCARD))
seq_puts(seq, ",discard");
if (!(root->fs_info->sb->s_flags & MS_POSIXACL))
seq_puts(seq, ",noacl");
- if (btrfs_test_opt(root, SPACE_CACHE))
+ if (btrfs_test_opt(info, SPACE_CACHE))
seq_puts(seq, ",space_cache");
- else if (btrfs_test_opt(root, FREE_SPACE_TREE))
+ else if (btrfs_test_opt(info, FREE_SPACE_TREE))
seq_puts(seq, ",space_cache=v2");
else
seq_puts(seq, ",nospace_cache");
- if (btrfs_test_opt(root, RESCAN_UUID_TREE))
+ if (btrfs_test_opt(info, RESCAN_UUID_TREE))
seq_puts(seq, ",rescan_uuid_tree");
- if (btrfs_test_opt(root, CLEAR_CACHE))
+ if (btrfs_test_opt(info, CLEAR_CACHE))
seq_puts(seq, ",clear_cache");
- if (btrfs_test_opt(root, USER_SUBVOL_RM_ALLOWED))
+ if (btrfs_test_opt(info, USER_SUBVOL_RM_ALLOWED))
seq_puts(seq, ",user_subvol_rm_allowed");
- if (btrfs_test_opt(root, ENOSPC_DEBUG))
+ if (btrfs_test_opt(info, ENOSPC_DEBUG))
seq_puts(seq, ",enospc_debug");
- if (btrfs_test_opt(root, AUTO_DEFRAG))
+ if (btrfs_test_opt(info, AUTO_DEFRAG))
seq_puts(seq, ",autodefrag");
- if (btrfs_test_opt(root, INODE_MAP_CACHE))
+ if (btrfs_test_opt(info, INODE_MAP_CACHE))
seq_puts(seq, ",inode_cache");
- if (btrfs_test_opt(root, SKIP_BALANCE))
+ if (btrfs_test_opt(info, SKIP_BALANCE))
seq_puts(seq, ",skip_balance");
#ifdef CONFIG_BTRFS_FS_CHECK_INTEGRITY
- if (btrfs_test_opt(root, CHECK_INTEGRITY_INCLUDING_EXTENT_DATA))
+ if (btrfs_test_opt(info, CHECK_INTEGRITY_INCLUDING_EXTENT_DATA))
seq_puts(seq, ",check_int_data");
- else if (btrfs_test_opt(root, CHECK_INTEGRITY))
+ else if (btrfs_test_opt(info, CHECK_INTEGRITY))
seq_puts(seq, ",check_int");
if (info->check_integrity_print_mask)
seq_printf(seq, ",check_int_print_mask=%d",
@@ -1265,14 +1293,14 @@ static int btrfs_show_options(struct seq_file *seq, struct dentry *dentry)
if (info->metadata_ratio)
seq_printf(seq, ",metadata_ratio=%d",
info->metadata_ratio);
- if (btrfs_test_opt(root, PANIC_ON_FATAL_ERROR))
+ if (btrfs_test_opt(info, PANIC_ON_FATAL_ERROR))
seq_puts(seq, ",fatal_errors=panic");
if (info->commit_interval != BTRFS_DEFAULT_COMMIT_INTERVAL)
seq_printf(seq, ",commit=%d", info->commit_interval);
#ifdef CONFIG_BTRFS_DEBUG
- if (btrfs_test_opt(root, FRAGMENT_DATA))
+ if (btrfs_test_opt(info, FRAGMENT_DATA))
seq_puts(seq, ",fragment=data");
- if (btrfs_test_opt(root, FRAGMENT_METADATA))
+ if (btrfs_test_opt(info, FRAGMENT_METADATA))
seq_puts(seq, ",fragment=metadata");
#endif
seq_printf(seq, ",subvolid=%llu",
@@ -2030,9 +2058,6 @@ static int btrfs_calc_avail_data_space(struct btrfs_root *root, u64 *free_bytes)
* chunk).
*
* If metadata is exhausted, f_bavail will be 0.
- *
- * FIXME: not accurate for mixed block groups, total and free/used are ok,
- * available appears slightly larger.
*/
static int btrfs_statfs(struct dentry *dentry, struct kstatfs *buf)
{
@@ -2216,6 +2241,13 @@ static int btrfs_freeze(struct super_block *sb)
struct btrfs_trans_handle *trans;
struct btrfs_root *root = btrfs_sb(sb)->tree_root;
+ root->fs_info->fs_frozen = 1;
+ /*
+ * We don't need a barrier here, we'll wait for any transaction that
+ * could be in progress on other threads (and do delayed iputs that
+ * we want to avoid on a frozen filesystem), or do the commit
+ * ourselves.
+ */
trans = btrfs_attach_transaction_barrier(root);
if (IS_ERR(trans)) {
/* no transaction, don't bother */
@@ -2226,6 +2258,14 @@ static int btrfs_freeze(struct super_block *sb)
return btrfs_commit_transaction(trans, root);
}
+static int btrfs_unfreeze(struct super_block *sb)
+{
+ struct btrfs_root *root = btrfs_sb(sb)->tree_root;
+
+ root->fs_info->fs_frozen = 0;
+ return 0;
+}
+
static int btrfs_show_devname(struct seq_file *m, struct dentry *root)
{
struct btrfs_fs_info *fs_info = btrfs_sb(root->d_sb);
@@ -2274,6 +2314,7 @@ static const struct super_operations btrfs_super_ops = {
.statfs = btrfs_statfs,
.remount_fs = btrfs_remount,
.freeze_fs = btrfs_freeze,
+ .unfreeze_fs = btrfs_unfreeze,
};
static const struct file_operations btrfs_ctl_fops = {
@@ -2319,49 +2360,6 @@ static void btrfs_print_mod_info(void)
btrfs_crc32c_impl());
}
-static int btrfs_run_sanity_tests(void)
-{
- int ret, i;
- u32 sectorsize, nodesize;
- u32 test_sectorsize[] = {
- PAGE_SIZE,
- };
- ret = btrfs_init_test_fs();
- if (ret)
- return ret;
- for (i = 0; i < ARRAY_SIZE(test_sectorsize); i++) {
- sectorsize = test_sectorsize[i];
- for (nodesize = sectorsize;
- nodesize <= BTRFS_MAX_METADATA_BLOCKSIZE;
- nodesize <<= 1) {
- pr_info("BTRFS: selftest: sectorsize: %u nodesize: %u\n",
- sectorsize, nodesize);
- ret = btrfs_test_free_space_cache(sectorsize, nodesize);
- if (ret)
- goto out;
- ret = btrfs_test_extent_buffer_operations(sectorsize,
- nodesize);
- if (ret)
- goto out;
- ret = btrfs_test_extent_io(sectorsize, nodesize);
- if (ret)
- goto out;
- ret = btrfs_test_inodes(sectorsize, nodesize);
- if (ret)
- goto out;
- ret = btrfs_test_qgroups(sectorsize, nodesize);
- if (ret)
- goto out;
- ret = btrfs_test_free_space_tree(sectorsize, nodesize);
- if (ret)
- goto out;
- }
- }
-out:
- btrfs_destroy_test_fs();
- return ret;
-}
-
static int __init init_btrfs_fs(void)
{
int err;
diff --git a/fs/btrfs/sysfs.c b/fs/btrfs/sysfs.c
index 4879656bd..c6569905d 100644
--- a/fs/btrfs/sysfs.c
+++ b/fs/btrfs/sysfs.c
@@ -326,6 +326,7 @@ SPACE_INFO_ATTR(bytes_used);
SPACE_INFO_ATTR(bytes_pinned);
SPACE_INFO_ATTR(bytes_reserved);
SPACE_INFO_ATTR(bytes_may_use);
+SPACE_INFO_ATTR(bytes_readonly);
SPACE_INFO_ATTR(disk_used);
SPACE_INFO_ATTR(disk_total);
BTRFS_ATTR(total_bytes_pinned, btrfs_space_info_show_total_bytes_pinned);
@@ -337,6 +338,7 @@ static struct attribute *space_info_attrs[] = {
BTRFS_ATTR_PTR(bytes_pinned),
BTRFS_ATTR_PTR(bytes_reserved),
BTRFS_ATTR_PTR(bytes_may_use),
+ BTRFS_ATTR_PTR(bytes_readonly),
BTRFS_ATTR_PTR(disk_used),
BTRFS_ATTR_PTR(disk_total),
BTRFS_ATTR_PTR(total_bytes_pinned),
diff --git a/fs/btrfs/tests/btrfs-tests.c b/fs/btrfs/tests/btrfs-tests.c
index 02223f3f7..bf62ad919 100644
--- a/fs/btrfs/tests/btrfs-tests.c
+++ b/fs/btrfs/tests/btrfs-tests.c
@@ -54,7 +54,7 @@ struct inode *btrfs_new_test_inode(void)
return new_inode(test_mnt->mnt_sb);
}
-int btrfs_init_test_fs(void)
+static int btrfs_init_test_fs(void)
{
int ret;
@@ -73,7 +73,7 @@ int btrfs_init_test_fs(void)
return 0;
}
-void btrfs_destroy_test_fs(void)
+static void btrfs_destroy_test_fs(void)
{
kern_unmount(test_mnt);
unregister_filesystem(&test_type);
@@ -128,14 +128,27 @@ struct btrfs_fs_info *btrfs_alloc_dummy_fs_info(void)
extent_io_tree_init(&fs_info->freed_extents[0], NULL);
extent_io_tree_init(&fs_info->freed_extents[1], NULL);
fs_info->pinned_extents = &fs_info->freed_extents[0];
+ set_bit(BTRFS_FS_STATE_DUMMY_FS_INFO, &fs_info->fs_state);
+
+ test_mnt->mnt_sb->s_fs_info = fs_info;
+
return fs_info;
}
-static void btrfs_free_dummy_fs_info(struct btrfs_fs_info *fs_info)
+void btrfs_free_dummy_fs_info(struct btrfs_fs_info *fs_info)
{
struct radix_tree_iter iter;
void **slot;
+ if (!fs_info)
+ return;
+
+ if (WARN_ON(!test_bit(BTRFS_FS_STATE_DUMMY_FS_INFO,
+ &fs_info->fs_state)))
+ return;
+
+ test_mnt->mnt_sb->s_fs_info = NULL;
+
spin_lock(&fs_info->buffer_lock);
radix_tree_for_each_slot(slot, &fs_info->buffer_radix, &iter, 0) {
struct extent_buffer *eb;
@@ -167,10 +180,11 @@ void btrfs_free_dummy_root(struct btrfs_root *root)
{
if (!root)
return;
+ /* Will be freed by btrfs_free_fs_roots */
+ if (WARN_ON(test_bit(BTRFS_ROOT_IN_RADIX, &root->state)))
+ return;
if (root->node)
free_extent_buffer(root->node);
- if (root->fs_info)
- btrfs_free_dummy_fs_info(root->fs_info);
kfree(root);
}
@@ -220,3 +234,46 @@ void btrfs_init_dummy_trans(struct btrfs_trans_handle *trans)
INIT_LIST_HEAD(&trans->qgroup_ref_list);
trans->type = __TRANS_DUMMY;
}
+
+int btrfs_run_sanity_tests(void)
+{
+ int ret, i;
+ u32 sectorsize, nodesize;
+ u32 test_sectorsize[] = {
+ PAGE_SIZE,
+ };
+ ret = btrfs_init_test_fs();
+ if (ret)
+ return ret;
+ for (i = 0; i < ARRAY_SIZE(test_sectorsize); i++) {
+ sectorsize = test_sectorsize[i];
+ for (nodesize = sectorsize;
+ nodesize <= BTRFS_MAX_METADATA_BLOCKSIZE;
+ nodesize <<= 1) {
+ pr_info("BTRFS: selftest: sectorsize: %u nodesize: %u\n",
+ sectorsize, nodesize);
+ ret = btrfs_test_free_space_cache(sectorsize, nodesize);
+ if (ret)
+ goto out;
+ ret = btrfs_test_extent_buffer_operations(sectorsize,
+ nodesize);
+ if (ret)
+ goto out;
+ ret = btrfs_test_extent_io(sectorsize, nodesize);
+ if (ret)
+ goto out;
+ ret = btrfs_test_inodes(sectorsize, nodesize);
+ if (ret)
+ goto out;
+ ret = btrfs_test_qgroups(sectorsize, nodesize);
+ if (ret)
+ goto out;
+ ret = btrfs_test_free_space_tree(sectorsize, nodesize);
+ if (ret)
+ goto out;
+ }
+ }
+out:
+ btrfs_destroy_test_fs();
+ return ret;
+}
diff --git a/fs/btrfs/tests/btrfs-tests.h b/fs/btrfs/tests/btrfs-tests.h
index 66fb6b701..b17ffbe8f 100644
--- a/fs/btrfs/tests/btrfs-tests.h
+++ b/fs/btrfs/tests/btrfs-tests.h
@@ -20,57 +20,29 @@
#define __BTRFS_TESTS
#ifdef CONFIG_BTRFS_FS_RUN_SANITY_TESTS
+int btrfs_run_sanity_tests(void);
#define test_msg(fmt, ...) pr_info("BTRFS: selftest: " fmt, ##__VA_ARGS__)
struct btrfs_root;
struct btrfs_trans_handle;
-int btrfs_test_free_space_cache(u32 sectorsize, u32 nodesize);
int btrfs_test_extent_buffer_operations(u32 sectorsize, u32 nodesize);
+int btrfs_test_free_space_cache(u32 sectorsize, u32 nodesize);
int btrfs_test_extent_io(u32 sectorsize, u32 nodesize);
int btrfs_test_inodes(u32 sectorsize, u32 nodesize);
int btrfs_test_qgroups(u32 sectorsize, u32 nodesize);
int btrfs_test_free_space_tree(u32 sectorsize, u32 nodesize);
-int btrfs_init_test_fs(void);
-void btrfs_destroy_test_fs(void);
struct inode *btrfs_new_test_inode(void);
struct btrfs_fs_info *btrfs_alloc_dummy_fs_info(void);
+void btrfs_free_dummy_fs_info(struct btrfs_fs_info *fs_info);
void btrfs_free_dummy_root(struct btrfs_root *root);
struct btrfs_block_group_cache *
btrfs_alloc_dummy_block_group(unsigned long length, u32 sectorsize);
void btrfs_free_dummy_block_group(struct btrfs_block_group_cache *cache);
void btrfs_init_dummy_trans(struct btrfs_trans_handle *trans);
#else
-static inline int btrfs_test_free_space_cache(u32 sectorsize, u32 nodesize)
-{
- return 0;
-}
-static inline int btrfs_test_extent_buffer_operations(u32 sectorsize,
- u32 nodesize)
-{
- return 0;
-}
-static inline int btrfs_init_test_fs(void)
-{
- return 0;
-}
-static inline void btrfs_destroy_test_fs(void)
-{
-}
-static inline int btrfs_test_extent_io(u32 sectorsize, u32 nodesize)
-{
- return 0;
-}
-static inline int btrfs_test_inodes(u32 sectorsize, u32 nodesize)
-{
- return 0;
-}
-static inline int btrfs_test_qgroups(u32 sectorsize, u32 nodesize)
-{
- return 0;
-}
-static inline int btrfs_test_free_space_tree(u32 sectorsize, u32 nodesize)
+static inline int btrfs_run_sanity_tests(void)
{
return 0;
}
diff --git a/fs/btrfs/tests/extent-buffer-tests.c b/fs/btrfs/tests/extent-buffer-tests.c
index 4f8cbd1ec..199569174 100644
--- a/fs/btrfs/tests/extent-buffer-tests.c
+++ b/fs/btrfs/tests/extent-buffer-tests.c
@@ -24,8 +24,9 @@
static int test_btrfs_split_item(u32 sectorsize, u32 nodesize)
{
- struct btrfs_path *path;
- struct btrfs_root *root;
+ struct btrfs_fs_info *fs_info;
+ struct btrfs_path *path = NULL;
+ struct btrfs_root *root = NULL;
struct extent_buffer *eb;
struct btrfs_item *item;
char *value = "mary had a little lamb";
@@ -40,17 +41,24 @@ static int test_btrfs_split_item(u32 sectorsize, u32 nodesize)
test_msg("Running btrfs_split_item tests\n");
- root = btrfs_alloc_dummy_root(sectorsize, nodesize);
+ fs_info = btrfs_alloc_dummy_fs_info();
+ if (!fs_info) {
+ test_msg("Could not allocate fs_info\n");
+ return -ENOMEM;
+ }
+
+ root = btrfs_alloc_dummy_root(fs_info, sectorsize, nodesize);
if (IS_ERR(root)) {
test_msg("Could not allocate root\n");
- return PTR_ERR(root);
+ ret = PTR_ERR(root);
+ goto out;
}
path = btrfs_alloc_path();
if (!path) {
test_msg("Could not allocate path\n");
- kfree(root);
- return -ENOMEM;
+ ret = -ENOMEM;
+ goto out;
}
path->nodes[0] = eb = alloc_dummy_extent_buffer(NULL, nodesize,
@@ -219,7 +227,8 @@ static int test_btrfs_split_item(u32 sectorsize, u32 nodesize)
}
out:
btrfs_free_path(path);
- kfree(root);
+ btrfs_free_dummy_root(root);
+ btrfs_free_dummy_fs_info(fs_info);
return ret;
}
diff --git a/fs/btrfs/tests/free-space-tests.c b/fs/btrfs/tests/free-space-tests.c
index 3956bb2ff..3221c8dee 100644
--- a/fs/btrfs/tests/free-space-tests.c
+++ b/fs/btrfs/tests/free-space-tests.c
@@ -837,6 +837,7 @@ test_steal_space_from_bitmap_to_extent(struct btrfs_block_group_cache *cache,
int btrfs_test_free_space_cache(u32 sectorsize, u32 nodesize)
{
+ struct btrfs_fs_info *fs_info;
struct btrfs_block_group_cache *cache;
struct btrfs_root *root = NULL;
int ret = -ENOMEM;
@@ -855,15 +856,17 @@ int btrfs_test_free_space_cache(u32 sectorsize, u32 nodesize)
return 0;
}
- root = btrfs_alloc_dummy_root(sectorsize, nodesize);
- if (IS_ERR(root)) {
- ret = PTR_ERR(root);
+ fs_info = btrfs_alloc_dummy_fs_info();
+ if (!fs_info) {
+ ret = -ENOMEM;
goto out;
}
- root->fs_info = btrfs_alloc_dummy_fs_info();
- if (!root->fs_info)
+ root = btrfs_alloc_dummy_root(fs_info, sectorsize, nodesize);
+ if (IS_ERR(root)) {
+ ret = PTR_ERR(root);
goto out;
+ }
root->fs_info->extent_root = root;
cache->fs_info = root->fs_info;
@@ -882,6 +885,7 @@ int btrfs_test_free_space_cache(u32 sectorsize, u32 nodesize)
out:
btrfs_free_dummy_block_group(cache);
btrfs_free_dummy_root(root);
+ btrfs_free_dummy_fs_info(fs_info);
test_msg("Free space cache tests finished\n");
return ret;
}
diff --git a/fs/btrfs/tests/free-space-tree-tests.c b/fs/btrfs/tests/free-space-tree-tests.c
index aac507085..7508d3b42 100644
--- a/fs/btrfs/tests/free-space-tree-tests.c
+++ b/fs/btrfs/tests/free-space-tree-tests.c
@@ -443,23 +443,24 @@ typedef int (*test_func_t)(struct btrfs_trans_handle *,
static int run_test(test_func_t test_func, int bitmaps,
u32 sectorsize, u32 nodesize)
{
+ struct btrfs_fs_info *fs_info;
struct btrfs_root *root = NULL;
struct btrfs_block_group_cache *cache = NULL;
struct btrfs_trans_handle trans;
struct btrfs_path *path = NULL;
int ret;
- root = btrfs_alloc_dummy_root(sectorsize, nodesize);
- if (IS_ERR(root)) {
- test_msg("Couldn't allocate dummy root\n");
- ret = PTR_ERR(root);
+ fs_info = btrfs_alloc_dummy_fs_info();
+ if (!fs_info) {
+ test_msg("Couldn't allocate dummy fs info\n");
+ ret = -ENOMEM;
goto out;
}
- root->fs_info = btrfs_alloc_dummy_fs_info();
- if (!root->fs_info) {
- test_msg("Couldn't allocate dummy fs info\n");
- ret = -ENOMEM;
+ root = btrfs_alloc_dummy_root(fs_info, sectorsize, nodesize);
+ if (IS_ERR(root)) {
+ test_msg("Couldn't allocate dummy root\n");
+ ret = PTR_ERR(root);
goto out;
}
@@ -534,6 +535,7 @@ out:
btrfs_free_path(path);
btrfs_free_dummy_block_group(cache);
btrfs_free_dummy_root(root);
+ btrfs_free_dummy_fs_info(fs_info);
return ret;
}
diff --git a/fs/btrfs/tests/inode-tests.c b/fs/btrfs/tests/inode-tests.c
index 29648c0a3..9f72aeda9 100644
--- a/fs/btrfs/tests/inode-tests.c
+++ b/fs/btrfs/tests/inode-tests.c
@@ -230,6 +230,7 @@ static unsigned long vacancy_only = 0;
static noinline int test_btrfs_get_extent(u32 sectorsize, u32 nodesize)
{
+ struct btrfs_fs_info *fs_info = NULL;
struct inode *inode = NULL;
struct btrfs_root *root = NULL;
struct extent_map *em = NULL;
@@ -248,19 +249,15 @@ static noinline int test_btrfs_get_extent(u32 sectorsize, u32 nodesize)
BTRFS_I(inode)->location.objectid = BTRFS_FIRST_FREE_OBJECTID;
BTRFS_I(inode)->location.offset = 0;
- root = btrfs_alloc_dummy_root(sectorsize, nodesize);
- if (IS_ERR(root)) {
- test_msg("Couldn't allocate root\n");
+ fs_info = btrfs_alloc_dummy_fs_info();
+ if (!fs_info) {
+ test_msg("Couldn't allocate dummy fs info\n");
goto out;
}
- /*
- * We do this since btrfs_get_extent wants to assign em->bdev to
- * root->fs_info->fs_devices->latest_bdev.
- */
- root->fs_info = btrfs_alloc_dummy_fs_info();
- if (!root->fs_info) {
- test_msg("Couldn't allocate dummy fs info\n");
+ root = btrfs_alloc_dummy_root(fs_info, sectorsize, nodesize);
+ if (IS_ERR(root)) {
+ test_msg("Couldn't allocate root\n");
goto out;
}
@@ -835,11 +832,13 @@ out:
free_extent_map(em);
iput(inode);
btrfs_free_dummy_root(root);
+ btrfs_free_dummy_fs_info(fs_info);
return ret;
}
static int test_hole_first(u32 sectorsize, u32 nodesize)
{
+ struct btrfs_fs_info *fs_info = NULL;
struct inode *inode = NULL;
struct btrfs_root *root = NULL;
struct extent_map *em = NULL;
@@ -855,15 +854,15 @@ static int test_hole_first(u32 sectorsize, u32 nodesize)
BTRFS_I(inode)->location.objectid = BTRFS_FIRST_FREE_OBJECTID;
BTRFS_I(inode)->location.offset = 0;
- root = btrfs_alloc_dummy_root(sectorsize, nodesize);
- if (IS_ERR(root)) {
- test_msg("Couldn't allocate root\n");
+ fs_info = btrfs_alloc_dummy_fs_info();
+ if (!fs_info) {
+ test_msg("Couldn't allocate dummy fs info\n");
goto out;
}
- root->fs_info = btrfs_alloc_dummy_fs_info();
- if (!root->fs_info) {
- test_msg("Couldn't allocate dummy fs info\n");
+ root = btrfs_alloc_dummy_root(fs_info, sectorsize, nodesize);
+ if (IS_ERR(root)) {
+ test_msg("Couldn't allocate root\n");
goto out;
}
@@ -934,11 +933,13 @@ out:
free_extent_map(em);
iput(inode);
btrfs_free_dummy_root(root);
+ btrfs_free_dummy_fs_info(fs_info);
return ret;
}
static int test_extent_accounting(u32 sectorsize, u32 nodesize)
{
+ struct btrfs_fs_info *fs_info = NULL;
struct inode *inode = NULL;
struct btrfs_root *root = NULL;
int ret = -ENOMEM;
@@ -949,15 +950,15 @@ static int test_extent_accounting(u32 sectorsize, u32 nodesize)
return ret;
}
- root = btrfs_alloc_dummy_root(sectorsize, nodesize);
- if (IS_ERR(root)) {
- test_msg("Couldn't allocate root\n");
+ fs_info = btrfs_alloc_dummy_fs_info();
+ if (!fs_info) {
+ test_msg("Couldn't allocate dummy fs info\n");
goto out;
}
- root->fs_info = btrfs_alloc_dummy_fs_info();
- if (!root->fs_info) {
- test_msg("Couldn't allocate dummy fs info\n");
+ root = btrfs_alloc_dummy_root(fs_info, sectorsize, nodesize);
+ if (IS_ERR(root)) {
+ test_msg("Couldn't allocate root\n");
goto out;
}
@@ -1132,6 +1133,7 @@ out:
NULL, GFP_KERNEL);
iput(inode);
btrfs_free_dummy_root(root);
+ btrfs_free_dummy_fs_info(fs_info);
return ret;
}
diff --git a/fs/btrfs/tests/qgroup-tests.c b/fs/btrfs/tests/qgroup-tests.c
index 57a12c0d6..4407fef7c 100644
--- a/fs/btrfs/tests/qgroup-tests.c
+++ b/fs/btrfs/tests/qgroup-tests.c
@@ -453,22 +453,24 @@ static int test_multiple_refs(struct btrfs_root *root,
int btrfs_test_qgroups(u32 sectorsize, u32 nodesize)
{
+ struct btrfs_fs_info *fs_info = NULL;
struct btrfs_root *root;
struct btrfs_root *tmp_root;
int ret = 0;
- root = btrfs_alloc_dummy_root(sectorsize, nodesize);
- if (IS_ERR(root)) {
- test_msg("Couldn't allocate root\n");
- return PTR_ERR(root);
+ fs_info = btrfs_alloc_dummy_fs_info();
+ if (!fs_info) {
+ test_msg("Couldn't allocate dummy fs info\n");
+ return -ENOMEM;
}
- root->fs_info = btrfs_alloc_dummy_fs_info();
- if (!root->fs_info) {
- test_msg("Couldn't allocate dummy fs info\n");
- ret = -ENOMEM;
+ root = btrfs_alloc_dummy_root(fs_info, sectorsize, nodesize);
+ if (IS_ERR(root)) {
+ test_msg("Couldn't allocate root\n");
+ ret = PTR_ERR(root);
goto out;
}
+
/* We are using this root as our extent root */
root->fs_info->extent_root = root;
@@ -495,7 +497,7 @@ int btrfs_test_qgroups(u32 sectorsize, u32 nodesize)
btrfs_set_header_nritems(root->node, 0);
root->alloc_bytenr += 2 * nodesize;
- tmp_root = btrfs_alloc_dummy_root(sectorsize, nodesize);
+ tmp_root = btrfs_alloc_dummy_root(fs_info, sectorsize, nodesize);
if (IS_ERR(tmp_root)) {
test_msg("Couldn't allocate a fs root\n");
ret = PTR_ERR(tmp_root);
@@ -510,7 +512,7 @@ int btrfs_test_qgroups(u32 sectorsize, u32 nodesize)
goto out;
}
- tmp_root = btrfs_alloc_dummy_root(sectorsize, nodesize);
+ tmp_root = btrfs_alloc_dummy_root(fs_info, sectorsize, nodesize);
if (IS_ERR(tmp_root)) {
test_msg("Couldn't allocate a fs root\n");
ret = PTR_ERR(tmp_root);
@@ -531,5 +533,6 @@ int btrfs_test_qgroups(u32 sectorsize, u32 nodesize)
ret = test_multiple_refs(root, sectorsize, nodesize);
out:
btrfs_free_dummy_root(root);
+ btrfs_free_dummy_fs_info(fs_info);
return ret;
}
diff --git a/fs/btrfs/transaction.c b/fs/btrfs/transaction.c
index 948aa186b..95d41919d 100644
--- a/fs/btrfs/transaction.c
+++ b/fs/btrfs/transaction.c
@@ -561,6 +561,7 @@ again:
h->transaction = cur_trans;
h->root = root;
h->use_count = 1;
+ h->fs_info = root->fs_info;
h->type = type;
h->can_flush_pending_bgs = true;
@@ -1491,7 +1492,7 @@ static noinline int create_pending_snapshot(struct btrfs_trans_handle *trans,
goto dir_item_existed;
} else if (IS_ERR(dir_item)) {
ret = PTR_ERR(dir_item);
- btrfs_abort_transaction(trans, root, ret);
+ btrfs_abort_transaction(trans, ret);
goto fail;
}
btrfs_release_path(path);
@@ -1504,7 +1505,7 @@ static noinline int create_pending_snapshot(struct btrfs_trans_handle *trans,
*/
ret = btrfs_run_delayed_items(trans, root);
if (ret) { /* Transaction aborted */
- btrfs_abort_transaction(trans, root, ret);
+ btrfs_abort_transaction(trans, ret);
goto fail;
}
@@ -1543,7 +1544,7 @@ static noinline int create_pending_snapshot(struct btrfs_trans_handle *trans,
if (ret) {
btrfs_tree_unlock(old);
free_extent_buffer(old);
- btrfs_abort_transaction(trans, root, ret);
+ btrfs_abort_transaction(trans, ret);
goto fail;
}
@@ -1554,7 +1555,7 @@ static noinline int create_pending_snapshot(struct btrfs_trans_handle *trans,
btrfs_tree_unlock(old);
free_extent_buffer(old);
if (ret) {
- btrfs_abort_transaction(trans, root, ret);
+ btrfs_abort_transaction(trans, ret);
goto fail;
}
/* see comments in should_cow_block() */
@@ -1568,7 +1569,7 @@ static noinline int create_pending_snapshot(struct btrfs_trans_handle *trans,
btrfs_tree_unlock(tmp);
free_extent_buffer(tmp);
if (ret) {
- btrfs_abort_transaction(trans, root, ret);
+ btrfs_abort_transaction(trans, ret);
goto fail;
}
@@ -1580,7 +1581,7 @@ static noinline int create_pending_snapshot(struct btrfs_trans_handle *trans,
btrfs_ino(parent_inode), index,
dentry->d_name.name, dentry->d_name.len);
if (ret) {
- btrfs_abort_transaction(trans, root, ret);
+ btrfs_abort_transaction(trans, ret);
goto fail;
}
@@ -1588,19 +1589,19 @@ static noinline int create_pending_snapshot(struct btrfs_trans_handle *trans,
pending->snap = btrfs_read_fs_root_no_name(root->fs_info, &key);
if (IS_ERR(pending->snap)) {
ret = PTR_ERR(pending->snap);
- btrfs_abort_transaction(trans, root, ret);
+ btrfs_abort_transaction(trans, ret);
goto fail;
}
ret = btrfs_reloc_post_snapshot(trans, pending);
if (ret) {
- btrfs_abort_transaction(trans, root, ret);
+ btrfs_abort_transaction(trans, ret);
goto fail;
}
ret = btrfs_run_delayed_refs(trans, root, (unsigned long)-1);
if (ret) {
- btrfs_abort_transaction(trans, root, ret);
+ btrfs_abort_transaction(trans, ret);
goto fail;
}
@@ -1622,7 +1623,7 @@ static noinline int create_pending_snapshot(struct btrfs_trans_handle *trans,
/* We have check then name at the beginning, so it is impossible. */
BUG_ON(ret == -EEXIST || ret == -EOVERFLOW);
if (ret) {
- btrfs_abort_transaction(trans, root, ret);
+ btrfs_abort_transaction(trans, ret);
goto fail;
}
@@ -1632,13 +1633,13 @@ static noinline int create_pending_snapshot(struct btrfs_trans_handle *trans,
current_fs_time(parent_inode->i_sb);
ret = btrfs_update_inode_fallback(trans, parent_root, parent_inode);
if (ret) {
- btrfs_abort_transaction(trans, root, ret);
+ btrfs_abort_transaction(trans, ret);
goto fail;
}
ret = btrfs_uuid_tree_add(trans, fs_info->uuid_root, new_uuid.b,
BTRFS_UUID_KEY_SUBVOL, objectid);
if (ret) {
- btrfs_abort_transaction(trans, root, ret);
+ btrfs_abort_transaction(trans, ret);
goto fail;
}
if (!btrfs_is_empty_uuid(new_root_item->received_uuid)) {
@@ -1647,14 +1648,14 @@ static noinline int create_pending_snapshot(struct btrfs_trans_handle *trans,
BTRFS_UUID_KEY_RECEIVED_SUBVOL,
objectid);
if (ret && ret != -EEXIST) {
- btrfs_abort_transaction(trans, root, ret);
+ btrfs_abort_transaction(trans, ret);
goto fail;
}
}
ret = btrfs_run_delayed_refs(trans, root, (unsigned long)-1);
if (ret) {
- btrfs_abort_transaction(trans, root, ret);
+ btrfs_abort_transaction(trans, ret);
goto fail;
}
@@ -1709,7 +1710,7 @@ static void update_super_roots(struct btrfs_root *root)
super->root = root_item->bytenr;
super->generation = root_item->generation;
super->root_level = root_item->level;
- if (btrfs_test_opt(root, SPACE_CACHE))
+ if (btrfs_test_opt(root->fs_info, SPACE_CACHE))
super->cache_generation = root_item->generation;
if (root->fs_info->update_uuid_tree_gen)
super->uuid_tree_generation = root_item->generation;
@@ -1850,7 +1851,7 @@ static void cleanup_transaction(struct btrfs_trans_handle *trans,
WARN_ON(trans->use_count > 1);
- btrfs_abort_transaction(trans, root, err);
+ btrfs_abort_transaction(trans, err);
spin_lock(&root->fs_info->trans_lock);
@@ -1895,14 +1896,14 @@ static void cleanup_transaction(struct btrfs_trans_handle *trans,
static inline int btrfs_start_delalloc_flush(struct btrfs_fs_info *fs_info)
{
- if (btrfs_test_opt(fs_info->tree_root, FLUSHONCOMMIT))
+ if (btrfs_test_opt(fs_info, FLUSHONCOMMIT))
return btrfs_start_delalloc_roots(fs_info, 1, -1);
return 0;
}
static inline void btrfs_wait_delalloc_flush(struct btrfs_fs_info *fs_info)
{
- if (btrfs_test_opt(fs_info->tree_root, FLUSHONCOMMIT))
+ if (btrfs_test_opt(fs_info, FLUSHONCOMMIT))
btrfs_wait_ordered_roots(fs_info, -1, 0, (u64)-1);
}
@@ -2277,8 +2278,13 @@ int btrfs_commit_transaction(struct btrfs_trans_handle *trans,
kmem_cache_free(btrfs_trans_handle_cachep, trans);
+ /*
+ * If fs has been frozen, we can not handle delayed iputs, otherwise
+ * it'll result in deadlock about SB_FREEZE_FS.
+ */
if (current != root->fs_info->transaction_kthread &&
- current != root->fs_info->cleaner_kthread)
+ current != root->fs_info->cleaner_kthread &&
+ !root->fs_info->fs_frozen)
btrfs_run_delayed_iputs(root);
return ret;
diff --git a/fs/btrfs/transaction.h b/fs/btrfs/transaction.h
index c5abee4f0..efb122643 100644
--- a/fs/btrfs/transaction.h
+++ b/fs/btrfs/transaction.h
@@ -128,6 +128,7 @@ struct btrfs_trans_handle {
* Subvolume quota depends on this
*/
struct btrfs_root *root;
+ struct btrfs_fs_info *fs_info;
struct seq_list delayed_ref_elem;
struct list_head qgroup_ref_list;
struct list_head new_bgs;
diff --git a/fs/btrfs/tree-log.c b/fs/btrfs/tree-log.c
index 542379f8f..ef9c55bc7 100644
--- a/fs/btrfs/tree-log.c
+++ b/fs/btrfs/tree-log.c
@@ -27,6 +27,7 @@
#include "backref.h"
#include "hash.h"
#include "compression.h"
+#include "qgroup.h"
/* magic values for the inode_only field in btrfs_log_inode:
*
@@ -680,6 +681,21 @@ static noinline int replay_one_extent(struct btrfs_trans_handle *trans,
ins.type = BTRFS_EXTENT_ITEM_KEY;
offset = key->offset - btrfs_file_extent_offset(eb, item);
+ /*
+ * Manually record dirty extent, as here we did a shallow
+ * file extent item copy and skip normal backref update,
+ * but modifying extent tree all by ourselves.
+ * So need to manually record dirty extent for qgroup,
+ * as the owner of the file extent changed from log tree
+ * (doesn't affect qgroup) to fs/file tree(affects qgroup)
+ */
+ ret = btrfs_qgroup_insert_dirty_extent(trans, root->fs_info,
+ btrfs_file_extent_disk_bytenr(eb, item),
+ btrfs_file_extent_disk_num_bytes(eb, item),
+ GFP_NOFS);
+ if (ret < 0)
+ goto out;
+
if (ins.objectid > 0) {
u64 csum_start;
u64 csum_end;
@@ -2757,7 +2773,7 @@ int btrfs_sync_log(struct btrfs_trans_handle *trans,
while (1) {
int batch = atomic_read(&root->log_batch);
/* when we're on an ssd, just kick the log commit out */
- if (!btrfs_test_opt(root, SSD) &&
+ if (!btrfs_test_opt(root->fs_info, SSD) &&
test_bit(BTRFS_ROOT_MULTI_LOG_TASKS, &root->state)) {
mutex_unlock(&root->log_mutex);
schedule_timeout_uninterruptible(1);
@@ -2788,7 +2804,7 @@ int btrfs_sync_log(struct btrfs_trans_handle *trans,
ret = btrfs_write_marked_extents(log, &log->dirty_log_pages, mark);
if (ret) {
blk_finish_plug(&plug);
- btrfs_abort_transaction(trans, root, ret);
+ btrfs_abort_transaction(trans, ret);
btrfs_free_logged_extents(log, log_transid);
btrfs_set_log_full_commit(root->fs_info, trans);
mutex_unlock(&root->log_mutex);
@@ -2807,7 +2823,7 @@ int btrfs_sync_log(struct btrfs_trans_handle *trans,
*/
mutex_unlock(&root->log_mutex);
- btrfs_init_log_ctx(&root_log_ctx);
+ btrfs_init_log_ctx(&root_log_ctx, NULL);
mutex_lock(&log_root_tree->log_mutex);
atomic_inc(&log_root_tree->log_batch);
@@ -2838,7 +2854,7 @@ int btrfs_sync_log(struct btrfs_trans_handle *trans,
btrfs_set_log_full_commit(root->fs_info, trans);
if (ret != -ENOSPC) {
- btrfs_abort_transaction(trans, root, ret);
+ btrfs_abort_transaction(trans, ret);
mutex_unlock(&log_root_tree->log_mutex);
goto out;
}
@@ -2899,7 +2915,7 @@ int btrfs_sync_log(struct btrfs_trans_handle *trans,
blk_finish_plug(&plug);
if (ret) {
btrfs_set_log_full_commit(root->fs_info, trans);
- btrfs_abort_transaction(trans, root, ret);
+ btrfs_abort_transaction(trans, ret);
btrfs_free_logged_extents(log, log_transid);
mutex_unlock(&log_root_tree->log_mutex);
goto out_wake_log_root;
@@ -2935,7 +2951,7 @@ int btrfs_sync_log(struct btrfs_trans_handle *trans,
ret = write_ctree_super(trans, root->fs_info->tree_root, 1);
if (ret) {
btrfs_set_log_full_commit(root->fs_info, trans);
- btrfs_abort_transaction(trans, root, ret);
+ btrfs_abort_transaction(trans, ret);
goto out_wake_log_root;
}
@@ -2992,7 +3008,7 @@ static void free_log_tree(struct btrfs_trans_handle *trans,
ret = walk_log_tree(trans, log, &wc);
/* I don't think this can happen but just in case */
if (ret)
- btrfs_abort_transaction(trans, log, ret);
+ btrfs_abort_transaction(trans, ret);
while (1) {
ret = find_first_extent_bit(&log->dirty_log_pages,
@@ -3161,7 +3177,7 @@ out_unlock:
btrfs_set_log_full_commit(root->fs_info, trans);
ret = 0;
} else if (ret < 0)
- btrfs_abort_transaction(trans, root, ret);
+ btrfs_abort_transaction(trans, ret);
btrfs_end_log_trans(root);
@@ -3194,7 +3210,7 @@ int btrfs_del_inode_ref_in_log(struct btrfs_trans_handle *trans,
btrfs_set_log_full_commit(root->fs_info, trans);
ret = 0;
} else if (ret < 0 && ret != -ENOENT)
- btrfs_abort_transaction(trans, root, ret);
+ btrfs_abort_transaction(trans, ret);
btrfs_end_log_trans(root);
return ret;
@@ -4470,7 +4486,8 @@ static int btrfs_log_trailing_hole(struct btrfs_trans_handle *trans,
static int btrfs_check_ref_name_override(struct extent_buffer *eb,
const int slot,
const struct btrfs_key *key,
- struct inode *inode)
+ struct inode *inode,
+ u64 *other_ino)
{
int ret;
struct btrfs_path *search_path;
@@ -4529,7 +4546,16 @@ static int btrfs_check_ref_name_override(struct extent_buffer *eb,
search_path, parent,
name, this_name_len, 0);
if (di && !IS_ERR(di)) {
- ret = 1;
+ struct btrfs_key di_key;
+
+ btrfs_dir_item_key_to_cpu(search_path->nodes[0],
+ di, &di_key);
+ if (di_key.type == BTRFS_INODE_ITEM_KEY) {
+ ret = 1;
+ *other_ino = di_key.objectid;
+ } else {
+ ret = -EAGAIN;
+ }
goto out;
} else if (IS_ERR(di)) {
ret = PTR_ERR(di);
@@ -4704,6 +4730,10 @@ static int btrfs_log_inode(struct btrfs_trans_handle *trans,
ins_nr = 0;
ret = btrfs_search_forward(root, &min_key,
path, trans->transid);
+ if (ret < 0) {
+ err = ret;
+ goto out_unlock;
+ }
if (ret != 0)
break;
again:
@@ -4719,16 +4749,72 @@ again:
if ((min_key.type == BTRFS_INODE_REF_KEY ||
min_key.type == BTRFS_INODE_EXTREF_KEY) &&
BTRFS_I(inode)->generation == trans->transid) {
+ u64 other_ino = 0;
+
ret = btrfs_check_ref_name_override(path->nodes[0],
path->slots[0],
- &min_key, inode);
+ &min_key, inode,
+ &other_ino);
if (ret < 0) {
err = ret;
goto out_unlock;
- } else if (ret > 0) {
- err = 1;
- btrfs_set_log_full_commit(root->fs_info, trans);
- goto out_unlock;
+ } else if (ret > 0 && ctx &&
+ other_ino != btrfs_ino(ctx->inode)) {
+ struct btrfs_key inode_key;
+ struct inode *other_inode;
+
+ if (ins_nr > 0) {
+ ins_nr++;
+ } else {
+ ins_nr = 1;
+ ins_start_slot = path->slots[0];
+ }
+ ret = copy_items(trans, inode, dst_path, path,
+ &last_extent, ins_start_slot,
+ ins_nr, inode_only,
+ logged_isize);
+ if (ret < 0) {
+ err = ret;
+ goto out_unlock;
+ }
+ ins_nr = 0;
+ btrfs_release_path(path);
+ inode_key.objectid = other_ino;
+ inode_key.type = BTRFS_INODE_ITEM_KEY;
+ inode_key.offset = 0;
+ other_inode = btrfs_iget(root->fs_info->sb,
+ &inode_key, root,
+ NULL);
+ /*
+ * If the other inode that had a conflicting dir
+ * entry was deleted in the current transaction,
+ * we don't need to do more work nor fallback to
+ * a transaction commit.
+ */
+ if (IS_ERR(other_inode) &&
+ PTR_ERR(other_inode) == -ENOENT) {
+ goto next_key;
+ } else if (IS_ERR(other_inode)) {
+ err = PTR_ERR(other_inode);
+ goto out_unlock;
+ }
+ /*
+ * We are safe logging the other inode without
+ * acquiring its i_mutex as long as we log with
+ * the LOG_INODE_EXISTS mode. We're safe against
+ * concurrent renames of the other inode as well
+ * because during a rename we pin the log and
+ * update the log with the new name before we
+ * unpin it.
+ */
+ err = btrfs_log_inode(trans, root, other_inode,
+ LOG_INODE_EXISTS,
+ 0, LLONG_MAX, ctx);
+ iput(other_inode);
+ if (err)
+ goto out_unlock;
+ else
+ goto next_key;
}
}
@@ -4796,7 +4882,7 @@ next_slot:
ins_nr = 0;
}
btrfs_release_path(path);
-
+next_key:
if (min_key.offset < (u64)-1) {
min_key.offset++;
} else if (min_key.type < max_key.type) {
@@ -4990,8 +5076,12 @@ static noinline int check_parent_dirs_for_sync(struct btrfs_trans_handle *trans,
if (!parent || d_really_is_negative(parent) || sb != parent->d_sb)
break;
- if (IS_ROOT(parent))
+ if (IS_ROOT(parent)) {
+ inode = d_inode(parent);
+ if (btrfs_must_commit_transaction(trans, inode))
+ ret = 1;
break;
+ }
parent = dget_parent(parent);
dput(old_parent);
@@ -5302,7 +5392,7 @@ static int btrfs_log_inode_parent(struct btrfs_trans_handle *trans,
sb = inode->i_sb;
- if (btrfs_test_opt(root, NOTREELOG)) {
+ if (btrfs_test_opt(root->fs_info, NOTREELOG)) {
ret = 1;
goto end_no_trans;
}
diff --git a/fs/btrfs/tree-log.h b/fs/btrfs/tree-log.h
index a9f1b75d0..ab858e31c 100644
--- a/fs/btrfs/tree-log.h
+++ b/fs/btrfs/tree-log.h
@@ -30,15 +30,18 @@ struct btrfs_log_ctx {
int log_transid;
int io_err;
bool log_new_dentries;
+ struct inode *inode;
struct list_head list;
};
-static inline void btrfs_init_log_ctx(struct btrfs_log_ctx *ctx)
+static inline void btrfs_init_log_ctx(struct btrfs_log_ctx *ctx,
+ struct inode *inode)
{
ctx->log_ret = 0;
ctx->log_transid = 0;
ctx->io_err = 0;
ctx->log_new_dentries = false;
+ ctx->inode = inode;
INIT_LIST_HEAD(&ctx->list);
}
diff --git a/fs/btrfs/volumes.c b/fs/btrfs/volumes.c
index 589f12817..035efce60 100644
--- a/fs/btrfs/volumes.c
+++ b/fs/btrfs/volumes.c
@@ -140,7 +140,6 @@ static int btrfs_relocate_sys_chunks(struct btrfs_root *root);
static void __btrfs_reset_dev_stats(struct btrfs_device *dev);
static void btrfs_dev_stat_print_on_error(struct btrfs_device *dev);
static void btrfs_dev_stat_print_on_load(struct btrfs_device *device);
-static void btrfs_close_one_device(struct btrfs_device *device);
DEFINE_MUTEX(uuid_mutex);
static LIST_HEAD(fs_uuids);
@@ -462,7 +461,7 @@ loop_lock:
sync_pending = 0;
}
- btrfsic_submit_bio(cur->bi_rw, cur);
+ btrfsic_submit_bio(cur);
num_run++;
batch_run++;
@@ -835,10 +834,6 @@ static void __free_device(struct work_struct *work)
struct btrfs_device *device;
device = container_of(work, struct btrfs_device, rcu_work);
-
- if (device->bdev)
- blkdev_put(device->bdev, device->mode);
-
rcu_string_free(device->name);
kfree(device);
}
@@ -853,6 +848,54 @@ static void free_device(struct rcu_head *head)
schedule_work(&device->rcu_work);
}
+static void btrfs_close_bdev(struct btrfs_device *device)
+{
+ if (device->bdev && device->writeable) {
+ sync_blockdev(device->bdev);
+ invalidate_bdev(device->bdev);
+ }
+
+ if (device->bdev)
+ blkdev_put(device->bdev, device->mode);
+}
+
+static void btrfs_close_one_device(struct btrfs_device *device)
+{
+ struct btrfs_fs_devices *fs_devices = device->fs_devices;
+ struct btrfs_device *new_device;
+ struct rcu_string *name;
+
+ if (device->bdev)
+ fs_devices->open_devices--;
+
+ if (device->writeable &&
+ device->devid != BTRFS_DEV_REPLACE_DEVID) {
+ list_del_init(&device->dev_alloc_list);
+ fs_devices->rw_devices--;
+ }
+
+ if (device->missing)
+ fs_devices->missing_devices--;
+
+ btrfs_close_bdev(device);
+
+ new_device = btrfs_alloc_device(NULL, &device->devid,
+ device->uuid);
+ BUG_ON(IS_ERR(new_device)); /* -ENOMEM */
+
+ /* Safe because we are under uuid_mutex */
+ if (device->name) {
+ name = rcu_string_strdup(device->name->str, GFP_NOFS);
+ BUG_ON(!name); /* -ENOMEM */
+ rcu_assign_pointer(new_device->name, name);
+ }
+
+ list_replace_rcu(&device->dev_list, &new_device->dev_list);
+ new_device->fs_devices = device->fs_devices;
+
+ call_rcu(&device->rcu, free_device);
+}
+
static int __btrfs_close_devices(struct btrfs_fs_devices *fs_devices)
{
struct btrfs_device *device, *tmp;
@@ -1893,6 +1936,8 @@ int btrfs_rm_device(struct btrfs_root *root, char *device_path, u64 devid)
btrfs_sysfs_rm_device_link(root->fs_info->fs_devices, device);
}
+ btrfs_close_bdev(device);
+
call_rcu(&device->rcu, free_device);
num_devices = btrfs_super_num_devices(root->fs_info->super_copy) - 1;
@@ -1986,6 +2031,9 @@ void btrfs_rm_dev_replace_free_srcdev(struct btrfs_fs_info *fs_info,
/* zero out the old super if it is writable */
btrfs_scratch_superblocks(srcdev->bdev, srcdev->name->str);
}
+
+ btrfs_close_bdev(srcdev);
+
call_rcu(&srcdev->rcu, free_device);
/*
@@ -2041,6 +2089,8 @@ void btrfs_destroy_dev_replace_tgtdev(struct btrfs_fs_info *fs_info,
* the device_list_mutex lock.
*/
btrfs_scratch_superblocks(tgtdev->bdev, tgtdev->name->str);
+
+ btrfs_close_bdev(tgtdev);
call_rcu(&tgtdev->rcu, free_device);
}
@@ -2399,14 +2449,14 @@ int btrfs_init_new_device(struct btrfs_root *root, char *device_path)
ret = init_first_rw_device(trans, root, device);
unlock_chunks(root);
if (ret) {
- btrfs_abort_transaction(trans, root, ret);
+ btrfs_abort_transaction(trans, ret);
goto error_trans;
}
}
ret = btrfs_add_device(trans, root, device);
if (ret) {
- btrfs_abort_transaction(trans, root, ret);
+ btrfs_abort_transaction(trans, ret);
goto error_trans;
}
@@ -2415,7 +2465,7 @@ int btrfs_init_new_device(struct btrfs_root *root, char *device_path)
ret = btrfs_finish_sprout(trans, root);
if (ret) {
- btrfs_abort_transaction(trans, root, ret);
+ btrfs_abort_transaction(trans, ret);
goto error_trans;
}
@@ -2801,7 +2851,7 @@ int btrfs_remove_chunk(struct btrfs_trans_handle *trans,
&dev_extent_len);
if (ret) {
mutex_unlock(&fs_devices->device_list_mutex);
- btrfs_abort_transaction(trans, root, ret);
+ btrfs_abort_transaction(trans, ret);
goto out;
}
@@ -2820,7 +2870,7 @@ int btrfs_remove_chunk(struct btrfs_trans_handle *trans,
ret = btrfs_update_device(trans, map->stripes[i].dev);
if (ret) {
mutex_unlock(&fs_devices->device_list_mutex);
- btrfs_abort_transaction(trans, root, ret);
+ btrfs_abort_transaction(trans, ret);
goto out;
}
}
@@ -2829,7 +2879,7 @@ int btrfs_remove_chunk(struct btrfs_trans_handle *trans,
ret = btrfs_free_chunk(trans, root, chunk_objectid, chunk_offset);
if (ret) {
- btrfs_abort_transaction(trans, root, ret);
+ btrfs_abort_transaction(trans, ret);
goto out;
}
@@ -2838,14 +2888,14 @@ int btrfs_remove_chunk(struct btrfs_trans_handle *trans,
if (map->type & BTRFS_BLOCK_GROUP_SYSTEM) {
ret = btrfs_del_sys_chunk(root, chunk_objectid, chunk_offset);
if (ret) {
- btrfs_abort_transaction(trans, root, ret);
+ btrfs_abort_transaction(trans, ret);
goto out;
}
}
ret = btrfs_remove_block_group(trans, extent_root, chunk_offset, em);
if (ret) {
- btrfs_abort_transaction(trans, extent_root, ret);
+ btrfs_abort_transaction(trans, ret);
goto out;
}
@@ -2902,7 +2952,7 @@ static int btrfs_relocate_chunk(struct btrfs_root *root, u64 chunk_offset)
* chunk tree entries
*/
ret = btrfs_remove_chunk(trans, root, chunk_offset);
- btrfs_end_transaction(trans, root);
+ btrfs_end_transaction(trans, extent_root);
return ret;
}
@@ -3421,7 +3471,7 @@ static int __btrfs_balance(struct btrfs_fs_info *fs_info)
u64 size_to_free;
u64 chunk_type;
struct btrfs_chunk *chunk;
- struct btrfs_path *path;
+ struct btrfs_path *path = NULL;
struct btrfs_key key;
struct btrfs_key found_key;
struct btrfs_trans_handle *trans;
@@ -3455,13 +3505,33 @@ static int __btrfs_balance(struct btrfs_fs_info *fs_info)
ret = btrfs_shrink_device(device, old_size - size_to_free);
if (ret == -ENOSPC)
break;
- BUG_ON(ret);
+ if (ret) {
+ /* btrfs_shrink_device never returns ret > 0 */
+ WARN_ON(ret > 0);
+ goto error;
+ }
trans = btrfs_start_transaction(dev_root, 0);
- BUG_ON(IS_ERR(trans));
+ if (IS_ERR(trans)) {
+ ret = PTR_ERR(trans);
+ btrfs_info_in_rcu(fs_info,
+ "resize: unable to start transaction after shrinking device %s (error %d), old size %llu, new size %llu",
+ rcu_str_deref(device->name), ret,
+ old_size, old_size - size_to_free);
+ goto error;
+ }
ret = btrfs_grow_device(trans, device, old_size);
- BUG_ON(ret);
+ if (ret) {
+ btrfs_end_transaction(trans, dev_root);
+ /* btrfs_grow_device never returns ret > 0 */
+ WARN_ON(ret > 0);
+ btrfs_info_in_rcu(fs_info,
+ "resize: unable to grow device after shrinking device %s (error %d), old size %llu, new size %llu",
+ rcu_str_deref(device->name), ret,
+ old_size, old_size - size_to_free);
+ goto error;
+ }
btrfs_end_transaction(trans, dev_root);
}
@@ -3885,7 +3955,7 @@ int btrfs_resume_balance_async(struct btrfs_fs_info *fs_info)
}
spin_unlock(&fs_info->balance_lock);
- if (btrfs_test_opt(fs_info->tree_root, SKIP_BALANCE)) {
+ if (btrfs_test_opt(fs_info, SKIP_BALANCE)) {
btrfs_info(fs_info, "force skipping balance");
return 0;
}
@@ -4240,7 +4310,7 @@ int btrfs_create_uuid_tree(struct btrfs_fs_info *fs_info)
BTRFS_UUID_TREE_OBJECTID);
if (IS_ERR(uuid_root)) {
ret = PTR_ERR(uuid_root);
- btrfs_abort_transaction(trans, tree_root, ret);
+ btrfs_abort_transaction(trans, ret);
btrfs_end_transaction(trans, tree_root);
return ret;
}
@@ -4514,8 +4584,7 @@ static void check_raid56_incompat_flag(struct btrfs_fs_info *info, u64 type)
btrfs_set_fs_incompat(info, RAID56);
}
-#define BTRFS_MAX_DEVS(r) ((BTRFS_LEAF_DATA_SIZE(r) \
- - sizeof(struct btrfs_item) \
+#define BTRFS_MAX_DEVS(r) ((BTRFS_MAX_ITEM_SIZE(r) \
- sizeof(struct btrfs_chunk)) \
/ sizeof(struct btrfs_stripe) + 1)
@@ -5260,7 +5329,7 @@ void btrfs_put_bbio(struct btrfs_bio *bbio)
kfree(bbio);
}
-static int __btrfs_map_block(struct btrfs_fs_info *fs_info, int rw,
+static int __btrfs_map_block(struct btrfs_fs_info *fs_info, int op,
u64 logical, u64 *length,
struct btrfs_bio **bbio_ret,
int mirror_num, int need_raid_map)
@@ -5346,7 +5415,7 @@ static int __btrfs_map_block(struct btrfs_fs_info *fs_info, int rw,
raid56_full_stripe_start *= full_stripe_len;
}
- if (rw & REQ_DISCARD) {
+ if (op == REQ_OP_DISCARD) {
/* we don't discard raid56 yet */
if (map->type & BTRFS_BLOCK_GROUP_RAID56_MASK) {
ret = -EOPNOTSUPP;
@@ -5359,7 +5428,7 @@ static int __btrfs_map_block(struct btrfs_fs_info *fs_info, int rw,
For other RAID types and for RAID[56] reads, just allow a single
stripe (on a single disk). */
if ((map->type & BTRFS_BLOCK_GROUP_RAID56_MASK) &&
- (rw & REQ_WRITE)) {
+ (op == REQ_OP_WRITE)) {
max_len = stripe_len * nr_data_stripes(map) -
(offset - raid56_full_stripe_start);
} else {
@@ -5384,8 +5453,8 @@ static int __btrfs_map_block(struct btrfs_fs_info *fs_info, int rw,
btrfs_dev_replace_set_lock_blocking(dev_replace);
if (dev_replace_is_ongoing && mirror_num == map->num_stripes + 1 &&
- !(rw & (REQ_WRITE | REQ_DISCARD | REQ_GET_READ_MIRRORS)) &&
- dev_replace->tgtdev != NULL) {
+ op != REQ_OP_WRITE && op != REQ_OP_DISCARD &&
+ op != REQ_GET_READ_MIRRORS && dev_replace->tgtdev != NULL) {
/*
* in dev-replace case, for repair case (that's the only
* case where the mirror is selected explicitly when
@@ -5472,15 +5541,17 @@ static int __btrfs_map_block(struct btrfs_fs_info *fs_info, int rw,
(offset + *length);
if (map->type & BTRFS_BLOCK_GROUP_RAID0) {
- if (rw & REQ_DISCARD)
+ if (op == REQ_OP_DISCARD)
num_stripes = min_t(u64, map->num_stripes,
stripe_nr_end - stripe_nr_orig);
stripe_nr = div_u64_rem(stripe_nr, map->num_stripes,
&stripe_index);
- if (!(rw & (REQ_WRITE | REQ_DISCARD | REQ_GET_READ_MIRRORS)))
+ if (op != REQ_OP_WRITE && op != REQ_OP_DISCARD &&
+ op != REQ_GET_READ_MIRRORS)
mirror_num = 1;
} else if (map->type & BTRFS_BLOCK_GROUP_RAID1) {
- if (rw & (REQ_WRITE | REQ_DISCARD | REQ_GET_READ_MIRRORS))
+ if (op == REQ_OP_WRITE || op == REQ_OP_DISCARD ||
+ op == REQ_GET_READ_MIRRORS)
num_stripes = map->num_stripes;
else if (mirror_num)
stripe_index = mirror_num - 1;
@@ -5493,7 +5564,8 @@ static int __btrfs_map_block(struct btrfs_fs_info *fs_info, int rw,
}
} else if (map->type & BTRFS_BLOCK_GROUP_DUP) {
- if (rw & (REQ_WRITE | REQ_DISCARD | REQ_GET_READ_MIRRORS)) {
+ if (op == REQ_OP_WRITE || op == REQ_OP_DISCARD ||
+ op == REQ_GET_READ_MIRRORS) {
num_stripes = map->num_stripes;
} else if (mirror_num) {
stripe_index = mirror_num - 1;
@@ -5507,9 +5579,9 @@ static int __btrfs_map_block(struct btrfs_fs_info *fs_info, int rw,
stripe_nr = div_u64_rem(stripe_nr, factor, &stripe_index);
stripe_index *= map->sub_stripes;
- if (rw & (REQ_WRITE | REQ_GET_READ_MIRRORS))
+ if (op == REQ_OP_WRITE || op == REQ_GET_READ_MIRRORS)
num_stripes = map->sub_stripes;
- else if (rw & REQ_DISCARD)
+ else if (op == REQ_OP_DISCARD)
num_stripes = min_t(u64, map->sub_stripes *
(stripe_nr_end - stripe_nr_orig),
map->num_stripes);
@@ -5527,7 +5599,7 @@ static int __btrfs_map_block(struct btrfs_fs_info *fs_info, int rw,
} else if (map->type & BTRFS_BLOCK_GROUP_RAID56_MASK) {
if (need_raid_map &&
- ((rw & (REQ_WRITE | REQ_GET_READ_MIRRORS)) ||
+ (op == REQ_OP_WRITE || op == REQ_GET_READ_MIRRORS ||
mirror_num > 1)) {
/* push stripe_nr back to the start of the full stripe */
stripe_nr = div_u64(raid56_full_stripe_start,
@@ -5555,8 +5627,8 @@ static int __btrfs_map_block(struct btrfs_fs_info *fs_info, int rw,
/* We distribute the parity blocks across stripes */
div_u64_rem(stripe_nr + stripe_index, map->num_stripes,
&stripe_index);
- if (!(rw & (REQ_WRITE | REQ_DISCARD |
- REQ_GET_READ_MIRRORS)) && mirror_num <= 1)
+ if ((op != REQ_OP_WRITE && op != REQ_OP_DISCARD &&
+ op != REQ_GET_READ_MIRRORS) && mirror_num <= 1)
mirror_num = 1;
}
} else {
@@ -5579,9 +5651,9 @@ static int __btrfs_map_block(struct btrfs_fs_info *fs_info, int rw,
num_alloc_stripes = num_stripes;
if (dev_replace_is_ongoing) {
- if (rw & (REQ_WRITE | REQ_DISCARD))
+ if (op == REQ_OP_WRITE || op == REQ_OP_DISCARD)
num_alloc_stripes <<= 1;
- if (rw & REQ_GET_READ_MIRRORS)
+ if (op == REQ_GET_READ_MIRRORS)
num_alloc_stripes++;
tgtdev_indexes = num_stripes;
}
@@ -5596,7 +5668,8 @@ static int __btrfs_map_block(struct btrfs_fs_info *fs_info, int rw,
/* build raid_map */
if (map->type & BTRFS_BLOCK_GROUP_RAID56_MASK &&
- need_raid_map && ((rw & (REQ_WRITE | REQ_GET_READ_MIRRORS)) ||
+ need_raid_map &&
+ ((op == REQ_OP_WRITE || op == REQ_GET_READ_MIRRORS) ||
mirror_num > 1)) {
u64 tmp;
unsigned rot;
@@ -5621,7 +5694,7 @@ static int __btrfs_map_block(struct btrfs_fs_info *fs_info, int rw,
RAID6_Q_STRIPE;
}
- if (rw & REQ_DISCARD) {
+ if (op == REQ_OP_DISCARD) {
u32 factor = 0;
u32 sub_stripes = 0;
u64 stripes_per_dev = 0;
@@ -5701,14 +5774,15 @@ static int __btrfs_map_block(struct btrfs_fs_info *fs_info, int rw,
}
}
- if (rw & (REQ_WRITE | REQ_GET_READ_MIRRORS))
+ if (op == REQ_OP_WRITE || op == REQ_GET_READ_MIRRORS)
max_errors = btrfs_chunk_max_errors(map);
if (bbio->raid_map)
sort_parity_stripes(bbio, num_stripes);
tgtdev_indexes = 0;
- if (dev_replace_is_ongoing && (rw & (REQ_WRITE | REQ_DISCARD)) &&
+ if (dev_replace_is_ongoing &&
+ (op == REQ_OP_WRITE || op == REQ_OP_DISCARD) &&
dev_replace->tgtdev != NULL) {
int index_where_to_add;
u64 srcdev_devid = dev_replace->srcdev->devid;
@@ -5743,7 +5817,7 @@ static int __btrfs_map_block(struct btrfs_fs_info *fs_info, int rw,
}
}
num_stripes = index_where_to_add;
- } else if (dev_replace_is_ongoing && (rw & REQ_GET_READ_MIRRORS) &&
+ } else if (dev_replace_is_ongoing && (op == REQ_GET_READ_MIRRORS) &&
dev_replace->tgtdev != NULL) {
u64 srcdev_devid = dev_replace->srcdev->devid;
int index_srcdev = 0;
@@ -5815,21 +5889,21 @@ out:
return ret;
}
-int btrfs_map_block(struct btrfs_fs_info *fs_info, int rw,
+int btrfs_map_block(struct btrfs_fs_info *fs_info, int op,
u64 logical, u64 *length,
struct btrfs_bio **bbio_ret, int mirror_num)
{
- return __btrfs_map_block(fs_info, rw, logical, length, bbio_ret,
+ return __btrfs_map_block(fs_info, op, logical, length, bbio_ret,
mirror_num, 0);
}
/* For Scrub/replace */
-int btrfs_map_sblock(struct btrfs_fs_info *fs_info, int rw,
+int btrfs_map_sblock(struct btrfs_fs_info *fs_info, int op,
u64 logical, u64 *length,
struct btrfs_bio **bbio_ret, int mirror_num,
int need_raid_map)
{
- return __btrfs_map_block(fs_info, rw, logical, length, bbio_ret,
+ return __btrfs_map_block(fs_info, op, logical, length, bbio_ret,
mirror_num, need_raid_map);
}
@@ -5943,13 +6017,13 @@ static void btrfs_end_bio(struct bio *bio)
BUG_ON(stripe_index >= bbio->num_stripes);
dev = bbio->stripes[stripe_index].dev;
if (dev->bdev) {
- if (bio->bi_rw & WRITE)
+ if (bio_op(bio) == REQ_OP_WRITE)
btrfs_dev_stat_inc(dev,
BTRFS_DEV_STAT_WRITE_ERRS);
else
btrfs_dev_stat_inc(dev,
BTRFS_DEV_STAT_READ_ERRS);
- if ((bio->bi_rw & WRITE_FLUSH) == WRITE_FLUSH)
+ if ((bio->bi_opf & WRITE_FLUSH) == WRITE_FLUSH)
btrfs_dev_stat_inc(dev,
BTRFS_DEV_STAT_FLUSH_ERRS);
btrfs_dev_stat_print_on_error(dev);
@@ -5997,7 +6071,7 @@ static void btrfs_end_bio(struct bio *bio)
*/
static noinline void btrfs_schedule_bio(struct btrfs_root *root,
struct btrfs_device *device,
- int rw, struct bio *bio)
+ struct bio *bio)
{
int should_queue = 1;
struct btrfs_pending_bios *pending_bios;
@@ -6008,9 +6082,9 @@ static noinline void btrfs_schedule_bio(struct btrfs_root *root,
}
/* don't bother with additional async steps for reads, right now */
- if (!(rw & REQ_WRITE)) {
+ if (bio_op(bio) == REQ_OP_READ) {
bio_get(bio);
- btrfsic_submit_bio(rw, bio);
+ btrfsic_submit_bio(bio);
bio_put(bio);
return;
}
@@ -6024,10 +6098,9 @@ static noinline void btrfs_schedule_bio(struct btrfs_root *root,
atomic_inc(&root->fs_info->nr_async_bios);
WARN_ON(bio->bi_next);
bio->bi_next = NULL;
- bio->bi_rw |= rw;
spin_lock(&device->io_lock);
- if (bio->bi_rw & REQ_SYNC)
+ if (bio->bi_opf & REQ_SYNC)
pending_bios = &device->pending_sync_bios;
else
pending_bios = &device->pending_bios;
@@ -6050,7 +6123,7 @@ static noinline void btrfs_schedule_bio(struct btrfs_root *root,
static void submit_stripe_bio(struct btrfs_root *root, struct btrfs_bio *bbio,
struct bio *bio, u64 physical, int dev_nr,
- int rw, int async)
+ int async)
{
struct btrfs_device *dev = bbio->stripes[dev_nr].dev;
@@ -6064,8 +6137,8 @@ static void submit_stripe_bio(struct btrfs_root *root, struct btrfs_bio *bbio,
rcu_read_lock();
name = rcu_dereference(dev->name);
- pr_debug("btrfs_map_bio: rw %d, sector=%llu, dev=%lu "
- "(%s id %llu), size=%u\n", rw,
+ pr_debug("btrfs_map_bio: rw %d 0x%x, sector=%llu, dev=%lu "
+ "(%s id %llu), size=%u\n", bio_op(bio), bio->bi_opf,
(u64)bio->bi_iter.bi_sector, (u_long)dev->bdev->bd_dev,
name->str, dev->devid, bio->bi_iter.bi_size);
rcu_read_unlock();
@@ -6076,9 +6149,9 @@ static void submit_stripe_bio(struct btrfs_root *root, struct btrfs_bio *bbio,
btrfs_bio_counter_inc_noblocked(root->fs_info);
if (async)
- btrfs_schedule_bio(root, dev, rw, bio);
+ btrfs_schedule_bio(root, dev, bio);
else
- btrfsic_submit_bio(rw, bio);
+ btrfsic_submit_bio(bio);
}
static void bbio_error(struct btrfs_bio *bbio, struct bio *bio, u64 logical)
@@ -6095,7 +6168,7 @@ static void bbio_error(struct btrfs_bio *bbio, struct bio *bio, u64 logical)
}
}
-int btrfs_map_bio(struct btrfs_root *root, int rw, struct bio *bio,
+int btrfs_map_bio(struct btrfs_root *root, struct bio *bio,
int mirror_num, int async_submit)
{
struct btrfs_device *dev;
@@ -6112,8 +6185,8 @@ int btrfs_map_bio(struct btrfs_root *root, int rw, struct bio *bio,
map_length = length;
btrfs_bio_counter_inc_blocked(root->fs_info);
- ret = __btrfs_map_block(root->fs_info, rw, logical, &map_length, &bbio,
- mirror_num, 1);
+ ret = __btrfs_map_block(root->fs_info, bio_op(bio), logical,
+ &map_length, &bbio, mirror_num, 1);
if (ret) {
btrfs_bio_counter_dec(root->fs_info);
return ret;
@@ -6127,10 +6200,10 @@ int btrfs_map_bio(struct btrfs_root *root, int rw, struct bio *bio,
atomic_set(&bbio->stripes_pending, bbio->num_stripes);
if ((bbio->map_type & BTRFS_BLOCK_GROUP_RAID56_MASK) &&
- ((rw & WRITE) || (mirror_num > 1))) {
+ ((bio_op(bio) == REQ_OP_WRITE) || (mirror_num > 1))) {
/* In this case, map_length has been set to the length of
a single stripe; not the whole write */
- if (rw & WRITE) {
+ if (bio_op(bio) == REQ_OP_WRITE) {
ret = raid56_parity_write(root, bio, bbio, map_length);
} else {
ret = raid56_parity_recover(root, bio, bbio, map_length,
@@ -6149,7 +6222,8 @@ int btrfs_map_bio(struct btrfs_root *root, int rw, struct bio *bio,
for (dev_nr = 0; dev_nr < total_devs; dev_nr++) {
dev = bbio->stripes[dev_nr].dev;
- if (!dev || !dev->bdev || (rw & WRITE && !dev->writeable)) {
+ if (!dev || !dev->bdev ||
+ (bio_op(bio) == REQ_OP_WRITE && !dev->writeable)) {
bbio_error(bbio, first_bio, logical);
continue;
}
@@ -6161,7 +6235,7 @@ int btrfs_map_bio(struct btrfs_root *root, int rw, struct bio *bio,
bio = first_bio;
submit_stripe_bio(root, bbio, bio,
- bbio->stripes[dev_nr].physical, dev_nr, rw,
+ bbio->stripes[dev_nr].physical, dev_nr,
async_submit);
}
btrfs_bio_counter_dec(root->fs_info);
@@ -6396,7 +6470,8 @@ static int read_one_chunk(struct btrfs_root *root, struct btrfs_key *key,
BTRFS_UUID_SIZE);
map->stripes[i].dev = btrfs_find_device(root->fs_info, devid,
uuid, NULL);
- if (!map->stripes[i].dev && !btrfs_test_opt(root, DEGRADED)) {
+ if (!map->stripes[i].dev &&
+ !btrfs_test_opt(root->fs_info, DEGRADED)) {
free_extent_map(em);
return -EIO;
}
@@ -6464,7 +6539,7 @@ static struct btrfs_fs_devices *open_seed_devices(struct btrfs_root *root,
fs_devices = find_fsid(fsid);
if (!fs_devices) {
- if (!btrfs_test_opt(root, DEGRADED))
+ if (!btrfs_test_opt(root->fs_info, DEGRADED))
return ERR_PTR(-ENOENT);
fs_devices = alloc_fs_devices(fsid);
@@ -6526,7 +6601,7 @@ static int read_one_dev(struct btrfs_root *root,
device = btrfs_find_device(root->fs_info, devid, dev_uuid, fs_uuid);
if (!device) {
- if (!btrfs_test_opt(root, DEGRADED))
+ if (!btrfs_test_opt(root->fs_info, DEGRADED))
return -EIO;
device = add_missing_dev(root, fs_devices, devid, dev_uuid);
@@ -6535,7 +6610,7 @@ static int read_one_dev(struct btrfs_root *root,
btrfs_warn(root->fs_info, "devid %llu uuid %pU missing",
devid, dev_uuid);
} else {
- if (!device->bdev && !btrfs_test_opt(root, DEGRADED))
+ if (!device->bdev && !btrfs_test_opt(root->fs_info, DEGRADED))
return -EIO;
if(!device->bdev && !device->missing) {
@@ -7138,38 +7213,3 @@ void btrfs_reset_fs_info_ptr(struct btrfs_fs_info *fs_info)
fs_devices = fs_devices->seed;
}
}
-
-static void btrfs_close_one_device(struct btrfs_device *device)
-{
- struct btrfs_fs_devices *fs_devices = device->fs_devices;
- struct btrfs_device *new_device;
- struct rcu_string *name;
-
- if (device->bdev)
- fs_devices->open_devices--;
-
- if (device->writeable &&
- device->devid != BTRFS_DEV_REPLACE_DEVID) {
- list_del_init(&device->dev_alloc_list);
- fs_devices->rw_devices--;
- }
-
- if (device->missing)
- fs_devices->missing_devices--;
-
- new_device = btrfs_alloc_device(NULL, &device->devid,
- device->uuid);
- BUG_ON(IS_ERR(new_device)); /* -ENOMEM */
-
- /* Safe because we are under uuid_mutex */
- if (device->name) {
- name = rcu_string_strdup(device->name->str, GFP_NOFS);
- BUG_ON(!name); /* -ENOMEM */
- rcu_assign_pointer(new_device->name, name);
- }
-
- list_replace_rcu(&device->dev_list, &new_device->dev_list);
- new_device->fs_devices = device->fs_devices;
-
- call_rcu(&device->rcu, free_device);
-}
diff --git a/fs/btrfs/volumes.h b/fs/btrfs/volumes.h
index 0ac90f8d8..6613e6335 100644
--- a/fs/btrfs/volumes.h
+++ b/fs/btrfs/volumes.h
@@ -375,10 +375,10 @@ int btrfs_account_dev_extents_size(struct btrfs_device *device, u64 start,
u64 end, u64 *length);
void btrfs_get_bbio(struct btrfs_bio *bbio);
void btrfs_put_bbio(struct btrfs_bio *bbio);
-int btrfs_map_block(struct btrfs_fs_info *fs_info, int rw,
+int btrfs_map_block(struct btrfs_fs_info *fs_info, int op,
u64 logical, u64 *length,
struct btrfs_bio **bbio_ret, int mirror_num);
-int btrfs_map_sblock(struct btrfs_fs_info *fs_info, int rw,
+int btrfs_map_sblock(struct btrfs_fs_info *fs_info, int op,
u64 logical, u64 *length,
struct btrfs_bio **bbio_ret, int mirror_num,
int need_raid_map);
@@ -391,7 +391,7 @@ int btrfs_alloc_chunk(struct btrfs_trans_handle *trans,
struct btrfs_root *extent_root, u64 type);
void btrfs_mapping_init(struct btrfs_mapping_tree *tree);
void btrfs_mapping_tree_free(struct btrfs_mapping_tree *tree);
-int btrfs_map_bio(struct btrfs_root *root, int rw, struct bio *bio,
+int btrfs_map_bio(struct btrfs_root *root, struct bio *bio,
int mirror_num, int async_submit);
int btrfs_open_devices(struct btrfs_fs_devices *fs_devices,
fmode_t flags, void *holder);