summaryrefslogtreecommitdiff
path: root/fs/btrfs/extent-tree.c
diff options
context:
space:
mode:
Diffstat (limited to 'fs/btrfs/extent-tree.c')
-rw-r--r--fs/btrfs/extent-tree.c246
1 files changed, 187 insertions, 59 deletions
diff --git a/fs/btrfs/extent-tree.c b/fs/btrfs/extent-tree.c
index 78f1b57d0..82b912a29 100644
--- a/fs/btrfs/extent-tree.c
+++ b/fs/btrfs/extent-tree.c
@@ -231,9 +231,9 @@ static int add_excluded_extent(struct btrfs_root *root,
{
u64 end = start + num_bytes - 1;
set_extent_bits(&root->fs_info->freed_extents[0],
- start, end, EXTENT_UPTODATE, GFP_NOFS);
+ start, end, EXTENT_UPTODATE);
set_extent_bits(&root->fs_info->freed_extents[1],
- start, end, EXTENT_UPTODATE, GFP_NOFS);
+ start, end, EXTENT_UPTODATE);
return 0;
}
@@ -246,9 +246,9 @@ static void free_excluded_extents(struct btrfs_root *root,
end = start + cache->key.offset - 1;
clear_extent_bits(&root->fs_info->freed_extents[0],
- start, end, EXTENT_UPTODATE, GFP_NOFS);
+ start, end, EXTENT_UPTODATE);
clear_extent_bits(&root->fs_info->freed_extents[1],
- start, end, EXTENT_UPTODATE, GFP_NOFS);
+ start, end, EXTENT_UPTODATE);
}
static int exclude_super_stripes(struct btrfs_root *root,
@@ -980,7 +980,7 @@ out_free:
* event that tree block loses its owner tree's reference and do the
* back refs conversion.
*
- * When a tree block is COW'd through a tree, there are four cases:
+ * When a tree block is COWed through a tree, there are four cases:
*
* The reference count of the block is one and the tree is the block's
* owner tree. Nothing to do in this case.
@@ -2042,6 +2042,11 @@ int btrfs_discard_extent(struct btrfs_root *root, u64 bytenr,
struct btrfs_bio *bbio = NULL;
+ /*
+ * Avoid races with device replace and make sure our bbio has devices
+ * associated to its stripes that don't go away while we are discarding.
+ */
+ btrfs_bio_counter_inc_blocked(root->fs_info);
/* Tell the block device(s) that the sectors can be discarded */
ret = btrfs_map_block(root->fs_info, REQ_DISCARD,
bytenr, &num_bytes, &bbio, 0);
@@ -2074,6 +2079,7 @@ int btrfs_discard_extent(struct btrfs_root *root, u64 bytenr,
}
btrfs_put_bbio(bbio);
}
+ btrfs_bio_counter_dec(root->fs_info);
if (actual_bytes)
*actual_bytes = discarded_bytes;
@@ -2595,7 +2601,7 @@ static noinline int __btrfs_run_delayed_refs(struct btrfs_trans_handle *trans,
}
/*
- * Need to drop our head ref lock and re-aqcuire the
+ * Need to drop our head ref lock and re-acquire the
* delayed ref lock and then re-check to make sure
* nobody got added.
*/
@@ -2747,7 +2753,7 @@ static inline u64 heads_to_leaves(struct btrfs_root *root, u64 heads)
/*
* We don't ever fill up leaves all the way so multiply by 2 just to be
- * closer to what we're really going to want to ouse.
+ * closer to what we're really going to want to use.
*/
return div_u64(num_bytes, BTRFS_LEAF_DATA_SIZE(root));
}
@@ -2829,6 +2835,7 @@ int btrfs_should_throttle_delayed_refs(struct btrfs_trans_handle *trans,
struct async_delayed_refs {
struct btrfs_root *root;
+ u64 transid;
int count;
int error;
int sync;
@@ -2844,6 +2851,10 @@ static void delayed_ref_async_start(struct btrfs_work *work)
async = container_of(work, struct async_delayed_refs, work);
+ /* if the commit is already started, we don't need to wait here */
+ if (btrfs_transaction_blocked(async->root->fs_info))
+ goto done;
+
trans = btrfs_join_transaction(async->root);
if (IS_ERR(trans)) {
async->error = PTR_ERR(trans);
@@ -2851,14 +2862,19 @@ static void delayed_ref_async_start(struct btrfs_work *work)
}
/*
- * trans->sync means that when we call end_transaciton, we won't
+ * trans->sync means that when we call end_transaction, we won't
* wait on delayed refs
*/
trans->sync = true;
+
+ /* Don't bother flushing if we got into a different transaction */
+ if (trans->transid > async->transid)
+ goto end;
+
ret = btrfs_run_delayed_refs(trans, async->root, async->count);
if (ret)
async->error = ret;
-
+end:
ret = btrfs_end_transaction(trans, async->root);
if (ret && !async->error)
async->error = ret;
@@ -2870,7 +2886,7 @@ done:
}
int btrfs_async_run_delayed_refs(struct btrfs_root *root,
- unsigned long count, int wait)
+ unsigned long count, u64 transid, int wait)
{
struct async_delayed_refs *async;
int ret;
@@ -2882,6 +2898,7 @@ int btrfs_async_run_delayed_refs(struct btrfs_root *root,
async->root = root->fs_info->tree_root;
async->count = count;
async->error = 0;
+ async->transid = transid;
if (wait)
async->sync = 1;
else
@@ -3824,6 +3841,59 @@ int btrfs_extent_readonly(struct btrfs_root *root, u64 bytenr)
return readonly;
}
+bool btrfs_inc_nocow_writers(struct btrfs_fs_info *fs_info, u64 bytenr)
+{
+ struct btrfs_block_group_cache *bg;
+ bool ret = true;
+
+ bg = btrfs_lookup_block_group(fs_info, bytenr);
+ if (!bg)
+ return false;
+
+ spin_lock(&bg->lock);
+ if (bg->ro)
+ ret = false;
+ else
+ atomic_inc(&bg->nocow_writers);
+ spin_unlock(&bg->lock);
+
+ /* no put on block group, done by btrfs_dec_nocow_writers */
+ if (!ret)
+ btrfs_put_block_group(bg);
+
+ return ret;
+
+}
+
+void btrfs_dec_nocow_writers(struct btrfs_fs_info *fs_info, u64 bytenr)
+{
+ struct btrfs_block_group_cache *bg;
+
+ bg = btrfs_lookup_block_group(fs_info, bytenr);
+ ASSERT(bg);
+ if (atomic_dec_and_test(&bg->nocow_writers))
+ wake_up_atomic_t(&bg->nocow_writers);
+ /*
+ * Once for our lookup and once for the lookup done by a previous call
+ * to btrfs_inc_nocow_writers()
+ */
+ btrfs_put_block_group(bg);
+ btrfs_put_block_group(bg);
+}
+
+static int btrfs_wait_nocow_writers_atomic_t(atomic_t *a)
+{
+ schedule();
+ return 0;
+}
+
+void btrfs_wait_nocow_writers(struct btrfs_block_group_cache *bg)
+{
+ wait_on_atomic_t(&bg->nocow_writers,
+ btrfs_wait_nocow_writers_atomic_t,
+ TASK_UNINTERRUPTIBLE);
+}
+
static const char *alloc_name(u64 flags)
{
switch (flags) {
@@ -4141,7 +4211,7 @@ commit_trans:
if (need_commit > 0) {
btrfs_start_delalloc_roots(fs_info, 0, -1);
- btrfs_wait_ordered_roots(fs_info, -1);
+ btrfs_wait_ordered_roots(fs_info, -1, 0, (u64)-1);
}
trans = btrfs_join_transaction(root);
@@ -4243,7 +4313,7 @@ void btrfs_free_reserved_data_space_noquota(struct inode *inode, u64 start,
* Called if we need to clear a data reservation for this inode
* Normally in a error case.
*
- * This one will handle the per-indoe data rsv map for accurate reserved
+ * This one will handle the per-inode data rsv map for accurate reserved
* space framework.
*/
void btrfs_free_reserved_data_space(struct inode *inode, u64 start, u64 len)
@@ -4583,7 +4653,8 @@ static void btrfs_writeback_inodes_sb_nr(struct btrfs_root *root,
*/
btrfs_start_delalloc_roots(root->fs_info, 0, nr_items);
if (!current->journal_info)
- btrfs_wait_ordered_roots(root->fs_info, nr_items);
+ btrfs_wait_ordered_roots(root->fs_info, nr_items,
+ 0, (u64)-1);
}
}
@@ -4620,7 +4691,7 @@ static void shrink_delalloc(struct btrfs_root *root, u64 to_reclaim, u64 orig,
/* Calc the number of the pages we need flush for space reservation */
items = calc_reclaim_items_nr(root, to_reclaim);
- to_reclaim = items * EXTENT_SIZE_PER_ITEM;
+ to_reclaim = (u64)items * EXTENT_SIZE_PER_ITEM;
trans = (struct btrfs_trans_handle *)current->journal_info;
block_rsv = &root->fs_info->delalloc_block_rsv;
@@ -4632,7 +4703,8 @@ static void shrink_delalloc(struct btrfs_root *root, u64 to_reclaim, u64 orig,
if (trans)
return;
if (wait_ordered)
- btrfs_wait_ordered_roots(root->fs_info, items);
+ btrfs_wait_ordered_roots(root->fs_info, items,
+ 0, (u64)-1);
return;
}
@@ -4671,7 +4743,8 @@ skip_async:
loops++;
if (wait_ordered && !trans) {
- btrfs_wait_ordered_roots(root->fs_info, items);
+ btrfs_wait_ordered_roots(root->fs_info, items,
+ 0, (u64)-1);
} else {
time_left = schedule_timeout_killable(1);
if (time_left)
@@ -4911,7 +4984,7 @@ void btrfs_init_async_reclaim_work(struct work_struct *work)
* @orig_bytes - the number of bytes we want
* @flush - whether or not we can flush to make our reservation
*
- * This will reserve orgi_bytes number of bytes from the space info associated
+ * This will reserve orig_bytes number of bytes from the space info associated
* with the block_rsv. If there is not enough space it will make an attempt to
* flush out space to make room. It will do this by flushing delalloc if
* possible or committing the transaction. If flush is 0 then no attempts to
@@ -5516,7 +5589,7 @@ void btrfs_orphan_release_metadata(struct inode *inode)
* common file/directory operations, they change two fs/file trees
* and root tree, the number of items that the qgroup reserves is
* different with the free space reservation. So we can not use
- * the space reseravtion mechanism in start_transaction().
+ * the space reservation mechanism in start_transaction().
*/
int btrfs_subvolume_reserve_metadata(struct btrfs_root *root,
struct btrfs_block_rsv *rsv,
@@ -5565,7 +5638,7 @@ void btrfs_subvolume_release_metadata(struct btrfs_root *root,
/**
* drop_outstanding_extent - drop an outstanding extent
* @inode: the inode we're dropping the extent for
- * @num_bytes: the number of bytes we're relaseing.
+ * @num_bytes: the number of bytes we're releasing.
*
* This is called when we are freeing up an outstanding extent, either called
* after an error or after an extent is written. This will return the number of
@@ -5591,7 +5664,7 @@ static unsigned drop_outstanding_extent(struct inode *inode, u64 num_bytes)
drop_inode_space = 1;
/*
- * If we have more or the same amount of outsanding extents than we have
+ * If we have more or the same amount of outstanding extents than we have
* reserved then we need to leave the reserved extents count alone.
*/
if (BTRFS_I(inode)->outstanding_extents >=
@@ -5605,8 +5678,8 @@ static unsigned drop_outstanding_extent(struct inode *inode, u64 num_bytes)
}
/**
- * calc_csum_metadata_size - return the amount of metada space that must be
- * reserved/free'd for the given bytes.
+ * calc_csum_metadata_size - return the amount of metadata space that must be
+ * reserved/freed for the given bytes.
* @inode: the inode we're manipulating
* @num_bytes: the number of bytes in question
* @reserve: 1 if we are reserving space, 0 if we are freeing space
@@ -5758,7 +5831,7 @@ out_fail:
/*
* This is tricky, but first we need to figure out how much we
- * free'd from any free-ers that occurred during this
+ * freed from any free-ers that occurred during this
* reservation, so we reset ->csum_bytes to the csum_bytes
* before we dropped our lock, and then call the free for the
* number of bytes that were freed while we were trying our
@@ -5780,7 +5853,7 @@ out_fail:
/*
* Now reset ->csum_bytes to what it should be. If bytes is
- * more than to_free then we would have free'd more space had we
+ * more than to_free then we would have freed more space had we
* not had an artificially high ->csum_bytes, so we need to free
* the remainder. If bytes is the same or less then we don't
* need to do anything, the other free-ers did the correct
@@ -6172,6 +6245,57 @@ int btrfs_exclude_logged_extents(struct btrfs_root *log,
return 0;
}
+static void
+btrfs_inc_block_group_reservations(struct btrfs_block_group_cache *bg)
+{
+ atomic_inc(&bg->reservations);
+}
+
+void btrfs_dec_block_group_reservations(struct btrfs_fs_info *fs_info,
+ const u64 start)
+{
+ struct btrfs_block_group_cache *bg;
+
+ bg = btrfs_lookup_block_group(fs_info, start);
+ ASSERT(bg);
+ if (atomic_dec_and_test(&bg->reservations))
+ wake_up_atomic_t(&bg->reservations);
+ btrfs_put_block_group(bg);
+}
+
+static int btrfs_wait_bg_reservations_atomic_t(atomic_t *a)
+{
+ schedule();
+ return 0;
+}
+
+void btrfs_wait_block_group_reservations(struct btrfs_block_group_cache *bg)
+{
+ struct btrfs_space_info *space_info = bg->space_info;
+
+ ASSERT(bg->ro);
+
+ if (!(bg->flags & BTRFS_BLOCK_GROUP_DATA))
+ return;
+
+ /*
+ * Our block group is read only but before we set it to read only,
+ * some task might have had allocated an extent from it already, but it
+ * has not yet created a respective ordered extent (and added it to a
+ * root's list of ordered extents).
+ * Therefore wait for any task currently allocating extents, since the
+ * block group's reservations counter is incremented while a read lock
+ * on the groups' semaphore is held and decremented after releasing
+ * the read access on that semaphore and creating the ordered extent.
+ */
+ down_write(&space_info->groups_sem);
+ up_write(&space_info->groups_sem);
+
+ wait_on_atomic_t(&bg->reservations,
+ btrfs_wait_bg_reservations_atomic_t,
+ TASK_UNINTERRUPTIBLE);
+}
+
/**
* btrfs_update_reserved_bytes - update the block_group and space info counters
* @cache: The cache we are manipulating
@@ -6408,7 +6532,7 @@ int btrfs_finish_extent_commit(struct btrfs_trans_handle *trans,
ret = btrfs_discard_extent(root, start,
end + 1 - start, NULL);
- clear_extent_dirty(unpin, start, end, GFP_NOFS);
+ clear_extent_dirty(unpin, start, end);
unpin_extent_range(root, start, end, true);
mutex_unlock(&fs_info->unused_bg_unpin_mutex);
cond_resched();
@@ -7025,36 +7149,35 @@ btrfs_lock_cluster(struct btrfs_block_group_cache *block_group,
int delalloc)
{
struct btrfs_block_group_cache *used_bg = NULL;
- bool locked = false;
-again:
+
spin_lock(&cluster->refill_lock);
- if (locked) {
- if (used_bg == cluster->block_group)
+ while (1) {
+ used_bg = cluster->block_group;
+ if (!used_bg)
+ return NULL;
+
+ if (used_bg == block_group)
return used_bg;
- up_read(&used_bg->data_rwsem);
- btrfs_put_block_group(used_bg);
- }
+ btrfs_get_block_group(used_bg);
- used_bg = cluster->block_group;
- if (!used_bg)
- return NULL;
+ if (!delalloc)
+ return used_bg;
- if (used_bg == block_group)
- return used_bg;
+ if (down_read_trylock(&used_bg->data_rwsem))
+ return used_bg;
- btrfs_get_block_group(used_bg);
+ spin_unlock(&cluster->refill_lock);
- if (!delalloc)
- return used_bg;
+ down_read(&used_bg->data_rwsem);
- if (down_read_trylock(&used_bg->data_rwsem))
- return used_bg;
+ spin_lock(&cluster->refill_lock);
+ if (used_bg == cluster->block_group)
+ return used_bg;
- spin_unlock(&cluster->refill_lock);
- down_read(&used_bg->data_rwsem);
- locked = true;
- goto again;
+ up_read(&used_bg->data_rwsem);
+ btrfs_put_block_group(used_bg);
+ }
}
static inline void
@@ -7431,6 +7554,7 @@ checks:
btrfs_add_free_space(block_group, offset, num_bytes);
goto loop;
}
+ btrfs_inc_block_group_reservations(block_group);
/* we are all good, lets return */
ins->objectid = search_start;
@@ -7471,7 +7595,7 @@ loop:
if (loop == LOOP_CACHING_NOWAIT) {
/*
* We want to skip the LOOP_CACHING_WAIT step if we
- * don't have any unached bgs and we've alrelady done a
+ * don't have any uncached bgs and we've already done a
* full search through.
*/
if (orig_have_caching_bg || !full_search)
@@ -7612,8 +7736,10 @@ again:
WARN_ON(num_bytes < root->sectorsize);
ret = find_free_extent(root, num_bytes, empty_size, hint_byte, ins,
flags, delalloc);
-
- if (ret == -ENOSPC) {
+ if (!ret && !is_data) {
+ btrfs_dec_block_group_reservations(root->fs_info,
+ ins->objectid);
+ } else if (ret == -ENOSPC) {
if (!final_tried && ins->offset) {
num_bytes = min(num_bytes >> 1, ins->offset);
num_bytes = round_down(num_bytes, root->sectorsize);
@@ -7873,7 +7999,7 @@ int btrfs_alloc_logged_file_extent(struct btrfs_trans_handle *trans,
/*
* Mixed block groups will exclude before processing the log so we only
- * need to do the exlude dance if this fs isn't mixed.
+ * need to do the exclude dance if this fs isn't mixed.
*/
if (!btrfs_fs_incompat(root->fs_info, MIXED_GROUPS)) {
ret = __exclude_logged_extent(root, ins->objectid, ins->offset);
@@ -7901,8 +8027,9 @@ btrfs_init_new_buffer(struct btrfs_trans_handle *trans, struct btrfs_root *root,
struct extent_buffer *buf;
buf = btrfs_find_create_tree_block(root, bytenr);
- if (!buf)
- return ERR_PTR(-ENOMEM);
+ if (IS_ERR(buf))
+ return buf;
+
btrfs_set_header_generation(buf, trans->transid);
btrfs_set_buffer_lockdep_class(root->root_key.objectid, buf, level);
btrfs_tree_lock(buf);
@@ -7923,7 +8050,7 @@ btrfs_init_new_buffer(struct btrfs_trans_handle *trans, struct btrfs_root *root,
buf->start + buf->len - 1, GFP_NOFS);
else
set_extent_new(&root->dirty_log_pages, buf->start,
- buf->start + buf->len - 1, GFP_NOFS);
+ buf->start + buf->len - 1);
} else {
buf->log_index = -1;
set_extent_dirty(&trans->transaction->dirty_pages, buf->start,
@@ -8544,8 +8671,9 @@ static noinline int do_walk_down(struct btrfs_trans_handle *trans,
next = btrfs_find_tree_block(root->fs_info, bytenr);
if (!next) {
next = btrfs_find_create_tree_block(root, bytenr);
- if (!next)
- return -ENOMEM;
+ if (IS_ERR(next))
+ return PTR_ERR(next);
+
btrfs_set_buffer_lockdep_class(root->root_key.objectid, next,
level - 1);
reada = 1;
@@ -9058,7 +9186,7 @@ out:
if (!for_reloc && root_dropped == false)
btrfs_add_dead_root(root);
if (err && err != -EAGAIN)
- btrfs_std_error(root->fs_info, err, NULL);
+ btrfs_handle_fs_error(root->fs_info, err, NULL);
return err;
}
@@ -9317,7 +9445,7 @@ u64 btrfs_account_ro_block_groups_free_space(struct btrfs_space_info *sinfo)
u64 free_bytes = 0;
int factor;
- /* It's df, we don't care if it's racey */
+ /* It's df, we don't care if it's racy */
if (list_empty(&sinfo->ro_bgs))
return 0;
@@ -10526,14 +10654,14 @@ void btrfs_delete_unused_bgs(struct btrfs_fs_info *fs_info)
*/
mutex_lock(&fs_info->unused_bg_unpin_mutex);
ret = clear_extent_bits(&fs_info->freed_extents[0], start, end,
- EXTENT_DIRTY, GFP_NOFS);
+ EXTENT_DIRTY);
if (ret) {
mutex_unlock(&fs_info->unused_bg_unpin_mutex);
btrfs_dec_block_group_ro(root, block_group);
goto end_trans;
}
ret = clear_extent_bits(&fs_info->freed_extents[1], start, end,
- EXTENT_DIRTY, GFP_NOFS);
+ EXTENT_DIRTY);
if (ret) {
mutex_unlock(&fs_info->unused_bg_unpin_mutex);
btrfs_dec_block_group_ro(root, block_group);