summaryrefslogtreecommitdiff
path: root/fs/btrfs/volumes.c
diff options
context:
space:
mode:
Diffstat (limited to 'fs/btrfs/volumes.c')
-rw-r--r--fs/btrfs/volumes.c617
1 files changed, 374 insertions, 243 deletions
diff --git a/fs/btrfs/volumes.c b/fs/btrfs/volumes.c
index bd0f45fb3..589f12817 100644
--- a/fs/btrfs/volumes.c
+++ b/fs/btrfs/volumes.c
@@ -20,13 +20,13 @@
#include <linux/slab.h>
#include <linux/buffer_head.h>
#include <linux/blkdev.h>
-#include <linux/random.h>
#include <linux/iocontext.h>
#include <linux/capability.h>
#include <linux/ratelimit.h>
#include <linux/kthread.h>
#include <linux/raid/pq.h>
#include <linux/semaphore.h>
+#include <linux/uuid.h>
#include <asm/div64.h>
#include "ctree.h"
#include "extent_map.h"
@@ -118,6 +118,21 @@ const u64 btrfs_raid_group[BTRFS_NR_RAID_TYPES] = {
[BTRFS_RAID_RAID6] = BTRFS_BLOCK_GROUP_RAID6,
};
+/*
+ * Table to convert BTRFS_RAID_* to the error code if minimum number of devices
+ * condition is not met. Zero means there's no corresponding
+ * BTRFS_ERROR_DEV_*_NOT_MET value.
+ */
+const int btrfs_raid_mindev_error[BTRFS_NR_RAID_TYPES] = {
+ [BTRFS_RAID_RAID10] = BTRFS_ERROR_DEV_RAID10_MIN_NOT_MET,
+ [BTRFS_RAID_RAID1] = BTRFS_ERROR_DEV_RAID1_MIN_NOT_MET,
+ [BTRFS_RAID_DUP] = 0,
+ [BTRFS_RAID_RAID0] = 0,
+ [BTRFS_RAID_SINGLE] = 0,
+ [BTRFS_RAID_RAID5] = BTRFS_ERROR_DEV_RAID5_MIN_NOT_MET,
+ [BTRFS_RAID_RAID6] = BTRFS_ERROR_DEV_RAID6_MIN_NOT_MET,
+};
+
static int init_first_rw_device(struct btrfs_trans_handle *trans,
struct btrfs_root *root,
struct btrfs_device *device);
@@ -699,7 +714,8 @@ static noinline int device_list_add(const char *path,
* if there is new btrfs on an already registered device,
* then remove the stale device entry.
*/
- btrfs_free_stale_device(device);
+ if (ret > 0)
+ btrfs_free_stale_device(device);
*fs_devices_ret = fs_devices;
@@ -988,6 +1004,56 @@ int btrfs_open_devices(struct btrfs_fs_devices *fs_devices,
return ret;
}
+void btrfs_release_disk_super(struct page *page)
+{
+ kunmap(page);
+ put_page(page);
+}
+
+int btrfs_read_disk_super(struct block_device *bdev, u64 bytenr,
+ struct page **page, struct btrfs_super_block **disk_super)
+{
+ void *p;
+ pgoff_t index;
+
+ /* make sure our super fits in the device */
+ if (bytenr + PAGE_SIZE >= i_size_read(bdev->bd_inode))
+ return 1;
+
+ /* make sure our super fits in the page */
+ if (sizeof(**disk_super) > PAGE_SIZE)
+ return 1;
+
+ /* make sure our super doesn't straddle pages on disk */
+ index = bytenr >> PAGE_SHIFT;
+ if ((bytenr + sizeof(**disk_super) - 1) >> PAGE_SHIFT != index)
+ return 1;
+
+ /* pull in the page with our super */
+ *page = read_cache_page_gfp(bdev->bd_inode->i_mapping,
+ index, GFP_KERNEL);
+
+ if (IS_ERR_OR_NULL(*page))
+ return 1;
+
+ p = kmap(*page);
+
+ /* align our pointer to the offset of the super block */
+ *disk_super = p + (bytenr & ~PAGE_MASK);
+
+ if (btrfs_super_bytenr(*disk_super) != bytenr ||
+ btrfs_super_magic(*disk_super) != BTRFS_MAGIC) {
+ btrfs_release_disk_super(*page);
+ return 1;
+ }
+
+ if ((*disk_super)->label[0] &&
+ (*disk_super)->label[BTRFS_LABEL_SIZE - 1])
+ (*disk_super)->label[BTRFS_LABEL_SIZE - 1] = '\0';
+
+ return 0;
+}
+
/*
* Look for a btrfs signature on a device. This may be called out of the mount path
* and we are not allowed to call set_blocksize during the scan. The superblock
@@ -999,13 +1065,11 @@ int btrfs_scan_one_device(const char *path, fmode_t flags, void *holder,
struct btrfs_super_block *disk_super;
struct block_device *bdev;
struct page *page;
- void *p;
int ret = -EINVAL;
u64 devid;
u64 transid;
u64 total_devices;
u64 bytenr;
- pgoff_t index;
/*
* we would like to check all the supers, but that would make
@@ -1018,41 +1082,14 @@ int btrfs_scan_one_device(const char *path, fmode_t flags, void *holder,
mutex_lock(&uuid_mutex);
bdev = blkdev_get_by_path(path, flags, holder);
-
if (IS_ERR(bdev)) {
ret = PTR_ERR(bdev);
goto error;
}
- /* make sure our super fits in the device */
- if (bytenr + PAGE_SIZE >= i_size_read(bdev->bd_inode))
- goto error_bdev_put;
-
- /* make sure our super fits in the page */
- if (sizeof(*disk_super) > PAGE_SIZE)
- goto error_bdev_put;
-
- /* make sure our super doesn't straddle pages on disk */
- index = bytenr >> PAGE_SHIFT;
- if ((bytenr + sizeof(*disk_super) - 1) >> PAGE_SHIFT != index)
- goto error_bdev_put;
-
- /* pull in the page with our super */
- page = read_cache_page_gfp(bdev->bd_inode->i_mapping,
- index, GFP_NOFS);
-
- if (IS_ERR_OR_NULL(page))
+ if (btrfs_read_disk_super(bdev, bytenr, &page, &disk_super))
goto error_bdev_put;
- p = kmap(page);
-
- /* align our pointer to the offset of the super block */
- disk_super = p + (bytenr & ~PAGE_MASK);
-
- if (btrfs_super_bytenr(disk_super) != bytenr ||
- btrfs_super_magic(disk_super) != BTRFS_MAGIC)
- goto error_unmap;
-
devid = btrfs_stack_device_id(&disk_super->dev_item);
transid = btrfs_super_generation(disk_super);
total_devices = btrfs_super_num_devices(disk_super);
@@ -1060,8 +1097,6 @@ int btrfs_scan_one_device(const char *path, fmode_t flags, void *holder,
ret = device_list_add(path, disk_super, devid, fs_devices_ret);
if (ret > 0) {
if (disk_super->label[0]) {
- if (disk_super->label[BTRFS_LABEL_SIZE - 1])
- disk_super->label[BTRFS_LABEL_SIZE - 1] = '\0';
printk(KERN_INFO "BTRFS: device label %s ", disk_super->label);
} else {
printk(KERN_INFO "BTRFS: device fsid %pU ", disk_super->fsid);
@@ -1073,9 +1108,7 @@ int btrfs_scan_one_device(const char *path, fmode_t flags, void *holder,
if (!ret && fs_devices_ret)
(*fs_devices_ret)->total_devices = total_devices;
-error_unmap:
- kunmap(page);
- put_page(page);
+ btrfs_release_disk_super(page);
error_bdev_put:
blkdev_put(bdev, flags);
@@ -1454,7 +1487,7 @@ again:
extent = btrfs_item_ptr(leaf, path->slots[0],
struct btrfs_dev_extent);
} else {
- btrfs_std_error(root->fs_info, ret, "Slot search failed");
+ btrfs_handle_fs_error(root->fs_info, ret, "Slot search failed");
goto out;
}
@@ -1462,7 +1495,7 @@ again:
ret = btrfs_del_item(trans, root, path);
if (ret) {
- btrfs_std_error(root->fs_info, ret,
+ btrfs_handle_fs_error(root->fs_info, ret,
"Failed to remove dev extent item");
} else {
set_bit(BTRFS_TRANS_HAVE_FREE_BGS, &trans->transaction->flags);
@@ -1688,32 +1721,92 @@ out:
return ret;
}
-int btrfs_rm_device(struct btrfs_root *root, char *device_path)
+/*
+ * Verify that @num_devices satisfies the RAID profile constraints in the whole
+ * filesystem. It's up to the caller to adjust that number regarding eg. device
+ * replace.
+ */
+static int btrfs_check_raid_min_devices(struct btrfs_fs_info *fs_info,
+ u64 num_devices)
+{
+ u64 all_avail;
+ unsigned seq;
+ int i;
+
+ do {
+ seq = read_seqbegin(&fs_info->profiles_lock);
+
+ all_avail = fs_info->avail_data_alloc_bits |
+ fs_info->avail_system_alloc_bits |
+ fs_info->avail_metadata_alloc_bits;
+ } while (read_seqretry(&fs_info->profiles_lock, seq));
+
+ for (i = 0; i < BTRFS_NR_RAID_TYPES; i++) {
+ if (!(all_avail & btrfs_raid_group[i]))
+ continue;
+
+ if (num_devices < btrfs_raid_array[i].devs_min) {
+ int ret = btrfs_raid_mindev_error[i];
+
+ if (ret)
+ return ret;
+ }
+ }
+
+ return 0;
+}
+
+struct btrfs_device *btrfs_find_next_active_device(struct btrfs_fs_devices *fs_devs,
+ struct btrfs_device *device)
{
- struct btrfs_device *device;
struct btrfs_device *next_device;
- struct block_device *bdev;
- struct buffer_head *bh = NULL;
- struct btrfs_super_block *disk_super;
+
+ list_for_each_entry(next_device, &fs_devs->devices, dev_list) {
+ if (next_device != device &&
+ !next_device->missing && next_device->bdev)
+ return next_device;
+ }
+
+ return NULL;
+}
+
+/*
+ * Helper function to check if the given device is part of s_bdev / latest_bdev
+ * and replace it with the provided or the next active device, in the context
+ * where this function called, there should be always be another device (or
+ * this_dev) which is active.
+ */
+void btrfs_assign_next_active_device(struct btrfs_fs_info *fs_info,
+ struct btrfs_device *device, struct btrfs_device *this_dev)
+{
+ struct btrfs_device *next_device;
+
+ if (this_dev)
+ next_device = this_dev;
+ else
+ next_device = btrfs_find_next_active_device(fs_info->fs_devices,
+ device);
+ ASSERT(next_device);
+
+ if (fs_info->sb->s_bdev &&
+ (fs_info->sb->s_bdev == device->bdev))
+ fs_info->sb->s_bdev = next_device->bdev;
+
+ if (fs_info->fs_devices->latest_bdev == device->bdev)
+ fs_info->fs_devices->latest_bdev = next_device->bdev;
+}
+
+int btrfs_rm_device(struct btrfs_root *root, char *device_path, u64 devid)
+{
+ struct btrfs_device *device;
struct btrfs_fs_devices *cur_devices;
- u64 all_avail;
- u64 devid;
u64 num_devices;
- u8 *dev_uuid;
- unsigned seq;
int ret = 0;
bool clear_super = false;
+ char *dev_name = NULL;
mutex_lock(&uuid_mutex);
- do {
- seq = read_seqbegin(&root->fs_info->profiles_lock);
-
- all_avail = root->fs_info->avail_data_alloc_bits |
- root->fs_info->avail_system_alloc_bits |
- root->fs_info->avail_metadata_alloc_bits;
- } while (read_seqretry(&root->fs_info->profiles_lock, seq));
-
num_devices = root->fs_info->fs_devices->num_devices;
btrfs_dev_replace_lock(&root->fs_info->dev_replace, 0);
if (btrfs_dev_replace_is_ongoing(&root->fs_info->dev_replace)) {
@@ -1722,78 +1815,23 @@ int btrfs_rm_device(struct btrfs_root *root, char *device_path)
}
btrfs_dev_replace_unlock(&root->fs_info->dev_replace, 0);
- if ((all_avail & BTRFS_BLOCK_GROUP_RAID10) && num_devices <= 4) {
- ret = BTRFS_ERROR_DEV_RAID10_MIN_NOT_MET;
- goto out;
- }
-
- if ((all_avail & BTRFS_BLOCK_GROUP_RAID1) && num_devices <= 2) {
- ret = BTRFS_ERROR_DEV_RAID1_MIN_NOT_MET;
+ ret = btrfs_check_raid_min_devices(root->fs_info, num_devices - 1);
+ if (ret)
goto out;
- }
- if ((all_avail & BTRFS_BLOCK_GROUP_RAID5) &&
- root->fs_info->fs_devices->rw_devices <= 2) {
- ret = BTRFS_ERROR_DEV_RAID5_MIN_NOT_MET;
- goto out;
- }
- if ((all_avail & BTRFS_BLOCK_GROUP_RAID6) &&
- root->fs_info->fs_devices->rw_devices <= 3) {
- ret = BTRFS_ERROR_DEV_RAID6_MIN_NOT_MET;
+ ret = btrfs_find_device_by_devspec(root, devid, device_path,
+ &device);
+ if (ret)
goto out;
- }
-
- if (strcmp(device_path, "missing") == 0) {
- struct list_head *devices;
- struct btrfs_device *tmp;
-
- device = NULL;
- devices = &root->fs_info->fs_devices->devices;
- /*
- * It is safe to read the devices since the volume_mutex
- * is held.
- */
- list_for_each_entry(tmp, devices, dev_list) {
- if (tmp->in_fs_metadata &&
- !tmp->is_tgtdev_for_dev_replace &&
- !tmp->bdev) {
- device = tmp;
- break;
- }
- }
- bdev = NULL;
- bh = NULL;
- disk_super = NULL;
- if (!device) {
- ret = BTRFS_ERROR_DEV_MISSING_NOT_FOUND;
- goto out;
- }
- } else {
- ret = btrfs_get_bdev_and_sb(device_path,
- FMODE_WRITE | FMODE_EXCL,
- root->fs_info->bdev_holder, 0,
- &bdev, &bh);
- if (ret)
- goto out;
- disk_super = (struct btrfs_super_block *)bh->b_data;
- devid = btrfs_stack_device_id(&disk_super->dev_item);
- dev_uuid = disk_super->dev_item.uuid;
- device = btrfs_find_device(root->fs_info, devid, dev_uuid,
- disk_super->fsid);
- if (!device) {
- ret = -ENOENT;
- goto error_brelse;
- }
- }
if (device->is_tgtdev_for_dev_replace) {
ret = BTRFS_ERROR_DEV_TGT_REPLACE;
- goto error_brelse;
+ goto out;
}
if (device->writeable && root->fs_info->fs_devices->rw_devices == 1) {
ret = BTRFS_ERROR_DEV_ONLY_WRITABLE;
- goto error_brelse;
+ goto out;
}
if (device->writeable) {
@@ -1801,6 +1839,11 @@ int btrfs_rm_device(struct btrfs_root *root, char *device_path)
list_del_init(&device->dev_alloc_list);
device->fs_devices->rw_devices--;
unlock_chunks(root);
+ dev_name = kstrdup(device->name->str, GFP_KERNEL);
+ if (!dev_name) {
+ ret = -ENOMEM;
+ goto error_undo;
+ }
clear_super = true;
}
@@ -1842,12 +1885,7 @@ int btrfs_rm_device(struct btrfs_root *root, char *device_path)
if (device->missing)
device->fs_devices->missing_devices--;
- next_device = list_entry(root->fs_info->fs_devices->devices.next,
- struct btrfs_device, dev_list);
- if (device->bdev == root->fs_info->sb->s_bdev)
- root->fs_info->sb->s_bdev = next_device->bdev;
- if (device->bdev == root->fs_info->fs_devices->latest_bdev)
- root->fs_info->fs_devices->latest_bdev = next_device->bdev;
+ btrfs_assign_next_active_device(root->fs_info, device, NULL);
if (device->bdev) {
device->fs_devices->open_devices--;
@@ -1883,63 +1921,23 @@ int btrfs_rm_device(struct btrfs_root *root, char *device_path)
* at this point, the device is zero sized. We want to
* remove it from the devices list and zero out the old super
*/
- if (clear_super && disk_super) {
- u64 bytenr;
- int i;
-
- /* make sure this device isn't detected as part of
- * the FS anymore
- */
- memset(&disk_super->magic, 0, sizeof(disk_super->magic));
- set_buffer_dirty(bh);
- sync_dirty_buffer(bh);
-
- /* clear the mirror copies of super block on the disk
- * being removed, 0th copy is been taken care above and
- * the below would take of the rest
- */
- for (i = 1; i < BTRFS_SUPER_MIRROR_MAX; i++) {
- bytenr = btrfs_sb_offset(i);
- if (bytenr + BTRFS_SUPER_INFO_SIZE >=
- i_size_read(bdev->bd_inode))
- break;
-
- brelse(bh);
- bh = __bread(bdev, bytenr / 4096,
- BTRFS_SUPER_INFO_SIZE);
- if (!bh)
- continue;
-
- disk_super = (struct btrfs_super_block *)bh->b_data;
-
- if (btrfs_super_bytenr(disk_super) != bytenr ||
- btrfs_super_magic(disk_super) != BTRFS_MAGIC) {
- continue;
- }
- memset(&disk_super->magic, 0,
- sizeof(disk_super->magic));
- set_buffer_dirty(bh);
- sync_dirty_buffer(bh);
+ if (clear_super) {
+ struct block_device *bdev;
+
+ bdev = blkdev_get_by_path(dev_name, FMODE_READ | FMODE_EXCL,
+ root->fs_info->bdev_holder);
+ if (!IS_ERR(bdev)) {
+ btrfs_scratch_superblocks(bdev, dev_name);
+ blkdev_put(bdev, FMODE_READ | FMODE_EXCL);
}
}
- ret = 0;
-
- if (bdev) {
- /* Notify udev that device has changed */
- btrfs_kobject_uevent(bdev, KOBJ_CHANGE);
-
- /* Update ctime/mtime for device path for libblkid */
- update_dev_time(device_path);
- }
-
-error_brelse:
- brelse(bh);
- if (bdev)
- blkdev_put(bdev, FMODE_READ | FMODE_EXCL);
out:
+ kfree(dev_name);
+
mutex_unlock(&uuid_mutex);
return ret;
+
error_undo:
if (device->writeable) {
lock_chunks(root);
@@ -1948,7 +1946,7 @@ error_undo:
device->fs_devices->rw_devices++;
unlock_chunks(root);
}
- goto error_brelse;
+ goto out;
}
void btrfs_rm_dev_replace_remove_srcdev(struct btrfs_fs_info *fs_info,
@@ -1972,11 +1970,8 @@ void btrfs_rm_dev_replace_remove_srcdev(struct btrfs_fs_info *fs_info,
if (srcdev->missing)
fs_devices->missing_devices--;
- if (srcdev->writeable) {
+ if (srcdev->writeable)
fs_devices->rw_devices--;
- /* zero out the old super if it is writable */
- btrfs_scratch_superblocks(srcdev->bdev, srcdev->name->str);
- }
if (srcdev->bdev)
fs_devices->open_devices--;
@@ -1987,6 +1982,10 @@ void btrfs_rm_dev_replace_free_srcdev(struct btrfs_fs_info *fs_info,
{
struct btrfs_fs_devices *fs_devices = srcdev->fs_devices;
+ if (srcdev->writeable) {
+ /* zero out the old super if it is writable */
+ btrfs_scratch_superblocks(srcdev->bdev, srcdev->name->str);
+ }
call_rcu(&srcdev->rcu, free_device);
/*
@@ -2016,32 +2015,33 @@ void btrfs_rm_dev_replace_free_srcdev(struct btrfs_fs_info *fs_info,
void btrfs_destroy_dev_replace_tgtdev(struct btrfs_fs_info *fs_info,
struct btrfs_device *tgtdev)
{
- struct btrfs_device *next_device;
-
mutex_lock(&uuid_mutex);
WARN_ON(!tgtdev);
mutex_lock(&fs_info->fs_devices->device_list_mutex);
btrfs_sysfs_rm_device_link(fs_info->fs_devices, tgtdev);
- if (tgtdev->bdev) {
- btrfs_scratch_superblocks(tgtdev->bdev, tgtdev->name->str);
+ if (tgtdev->bdev)
fs_info->fs_devices->open_devices--;
- }
+
fs_info->fs_devices->num_devices--;
- next_device = list_entry(fs_info->fs_devices->devices.next,
- struct btrfs_device, dev_list);
- if (tgtdev->bdev == fs_info->sb->s_bdev)
- fs_info->sb->s_bdev = next_device->bdev;
- if (tgtdev->bdev == fs_info->fs_devices->latest_bdev)
- fs_info->fs_devices->latest_bdev = next_device->bdev;
- list_del_rcu(&tgtdev->dev_list);
+ btrfs_assign_next_active_device(fs_info, tgtdev, NULL);
- call_rcu(&tgtdev->rcu, free_device);
+ list_del_rcu(&tgtdev->dev_list);
mutex_unlock(&fs_info->fs_devices->device_list_mutex);
mutex_unlock(&uuid_mutex);
+
+ /*
+ * The update_dev_time() with in btrfs_scratch_superblocks()
+ * may lead to a call to btrfs_show_devname() which will try
+ * to hold device_list_mutex. And here this device
+ * is already out of device list, so we don't have to hold
+ * the device_list_mutex lock.
+ */
+ btrfs_scratch_superblocks(tgtdev->bdev, tgtdev->name->str);
+ call_rcu(&tgtdev->rcu, free_device);
}
static int btrfs_find_device_by_path(struct btrfs_root *root, char *device_path,
@@ -2102,6 +2102,31 @@ int btrfs_find_device_missing_or_by_path(struct btrfs_root *root,
}
/*
+ * Lookup a device given by device id, or the path if the id is 0.
+ */
+int btrfs_find_device_by_devspec(struct btrfs_root *root, u64 devid,
+ char *devpath,
+ struct btrfs_device **device)
+{
+ int ret;
+
+ if (devid) {
+ ret = 0;
+ *device = btrfs_find_device(root->fs_info, devid, NULL,
+ NULL);
+ if (!*device)
+ ret = -ENOENT;
+ } else {
+ if (!devpath || !devpath[0])
+ return -EINVAL;
+
+ ret = btrfs_find_device_missing_or_by_path(root, devpath,
+ device);
+ }
+ return ret;
+}
+
+/*
* does all the dirty work required for changing file system's UUID.
*/
static int btrfs_prepare_sprout(struct btrfs_root *root)
@@ -2165,7 +2190,7 @@ static int btrfs_prepare_sprout(struct btrfs_root *root)
}
/*
- * strore the expected generation for seed devices in device items.
+ * Store the expected generation for seed devices in device items.
*/
static int btrfs_finish_sprout(struct btrfs_trans_handle *trans,
struct btrfs_root *root)
@@ -2418,7 +2443,7 @@ int btrfs_init_new_device(struct btrfs_root *root, char *device_path)
ret = btrfs_relocate_sys_chunks(root);
if (ret < 0)
- btrfs_std_error(root->fs_info, ret,
+ btrfs_handle_fs_error(root->fs_info, ret,
"Failed to relocate sys chunks after "
"device initialization. This can be fixed "
"using the \"btrfs balance\" command.");
@@ -2663,7 +2688,7 @@ static int btrfs_free_chunk(struct btrfs_trans_handle *trans,
if (ret < 0)
goto out;
else if (ret > 0) { /* Logic error or corruption */
- btrfs_std_error(root->fs_info, -ENOENT,
+ btrfs_handle_fs_error(root->fs_info, -ENOENT,
"Failed lookup while freeing chunk.");
ret = -ENOENT;
goto out;
@@ -2671,7 +2696,7 @@ static int btrfs_free_chunk(struct btrfs_trans_handle *trans,
ret = btrfs_del_item(trans, root, path);
if (ret < 0)
- btrfs_std_error(root->fs_info, ret,
+ btrfs_handle_fs_error(root->fs_info, ret,
"Failed to delete chunk item.");
out:
btrfs_free_path(path);
@@ -2736,6 +2761,7 @@ int btrfs_remove_chunk(struct btrfs_trans_handle *trans,
u64 dev_extent_len = 0;
u64 chunk_objectid = BTRFS_FIRST_CHUNK_TREE_OBJECTID;
int i, ret = 0;
+ struct btrfs_fs_devices *fs_devices = root->fs_info->fs_devices;
/* Just in case */
root = root->fs_info->chunk_root;
@@ -2762,12 +2788,19 @@ int btrfs_remove_chunk(struct btrfs_trans_handle *trans,
check_system_chunk(trans, extent_root, map->type);
unlock_chunks(root->fs_info->chunk_root);
+ /*
+ * Take the device list mutex to prevent races with the final phase of
+ * a device replace operation that replaces the device object associated
+ * with map stripes (dev-replace.c:btrfs_dev_replace_finishing()).
+ */
+ mutex_lock(&fs_devices->device_list_mutex);
for (i = 0; i < map->num_stripes; i++) {
struct btrfs_device *device = map->stripes[i].dev;
ret = btrfs_free_dev_extent(trans, device,
map->stripes[i].physical,
&dev_extent_len);
if (ret) {
+ mutex_unlock(&fs_devices->device_list_mutex);
btrfs_abort_transaction(trans, root, ret);
goto out;
}
@@ -2786,11 +2819,14 @@ int btrfs_remove_chunk(struct btrfs_trans_handle *trans,
if (map->stripes[i].dev) {
ret = btrfs_update_device(trans, map->stripes[i].dev);
if (ret) {
+ mutex_unlock(&fs_devices->device_list_mutex);
btrfs_abort_transaction(trans, root, ret);
goto out;
}
}
}
+ mutex_unlock(&fs_devices->device_list_mutex);
+
ret = btrfs_free_chunk(trans, root, chunk_objectid, chunk_offset);
if (ret) {
btrfs_abort_transaction(trans, root, ret);
@@ -2857,7 +2893,7 @@ static int btrfs_relocate_chunk(struct btrfs_root *root, u64 chunk_offset)
chunk_offset);
if (IS_ERR(trans)) {
ret = PTR_ERR(trans);
- btrfs_std_error(root->fs_info, ret, NULL);
+ btrfs_handle_fs_error(root->fs_info, ret, NULL);
return ret;
}
@@ -3362,7 +3398,7 @@ static int should_balance_chunk(struct btrfs_root *root,
} else if ((bargs->flags & BTRFS_BALANCE_ARGS_LIMIT_RANGE)) {
/*
* Same logic as the 'limit' filter; the minimum cannot be
- * determined here because we do not have the global informatoin
+ * determined here because we do not have the global information
* about the count of all chunks that satisfy the filters.
*/
if (bargs->limit_max == 0)
@@ -3402,6 +3438,7 @@ static int __btrfs_balance(struct btrfs_fs_info *fs_info)
u32 count_meta = 0;
u32 count_sys = 0;
int chunk_reserved = 0;
+ u64 bytes_used = 0;
/* step one make some room on all the devices */
devices = &fs_info->fs_devices->devices;
@@ -3540,7 +3577,13 @@ again:
goto loop;
}
- if ((chunk_type & BTRFS_BLOCK_GROUP_DATA) && !chunk_reserved) {
+ ASSERT(fs_info->data_sinfo);
+ spin_lock(&fs_info->data_sinfo->lock);
+ bytes_used = fs_info->data_sinfo->bytes_used;
+ spin_unlock(&fs_info->data_sinfo->lock);
+
+ if ((chunk_type & BTRFS_BLOCK_GROUP_DATA) &&
+ !chunk_reserved && !bytes_used) {
trans = btrfs_start_transaction(chunk_root, 0);
if (IS_ERR(trans)) {
mutex_unlock(&fs_info->delete_unused_bgs_mutex);
@@ -3632,7 +3675,7 @@ static void __cancel_balance(struct btrfs_fs_info *fs_info)
unset_balance_control(fs_info);
ret = del_balance_item(fs_info->tree_root);
if (ret)
- btrfs_std_error(fs_info, ret, NULL);
+ btrfs_handle_fs_error(fs_info, ret, NULL);
atomic_set(&fs_info->mutually_exclusive_operation_running, 0);
}
@@ -3693,10 +3736,8 @@ int btrfs_balance(struct btrfs_balance_control *bctl,
num_devices--;
}
btrfs_dev_replace_unlock(&fs_info->dev_replace, 0);
- allowed = BTRFS_AVAIL_ALLOC_BIT_SINGLE;
- if (num_devices == 1)
- allowed |= BTRFS_BLOCK_GROUP_DUP;
- else if (num_devices > 1)
+ allowed = BTRFS_AVAIL_ALLOC_BIT_SINGLE | BTRFS_BLOCK_GROUP_DUP;
+ if (num_devices > 1)
allowed |= (BTRFS_BLOCK_GROUP_RAID0 | BTRFS_BLOCK_GROUP_RAID1);
if (num_devices > 2)
allowed |= BTRFS_BLOCK_GROUP_RAID5;
@@ -4200,6 +4241,7 @@ int btrfs_create_uuid_tree(struct btrfs_fs_info *fs_info)
if (IS_ERR(uuid_root)) {
ret = PTR_ERR(uuid_root);
btrfs_abort_transaction(trans, tree_root, ret);
+ btrfs_end_transaction(trans, tree_root);
return ret;
}
@@ -4652,12 +4694,12 @@ static int __btrfs_alloc_chunk(struct btrfs_trans_handle *trans,
if (type & BTRFS_BLOCK_GROUP_RAID5) {
raid_stripe_len = find_raid56_stripe_len(ndevs - 1,
- btrfs_super_stripesize(info->super_copy));
+ extent_root->stripesize);
data_stripes = num_stripes - 1;
}
if (type & BTRFS_BLOCK_GROUP_RAID6) {
raid_stripe_len = find_raid56_stripe_len(ndevs - 2,
- btrfs_super_stripesize(info->super_copy));
+ extent_root->stripesize);
data_stripes = num_stripes - 2;
}
@@ -5278,7 +5320,15 @@ static int __btrfs_map_block(struct btrfs_fs_info *fs_info, int rw,
stripe_nr = div64_u64(stripe_nr, stripe_len);
stripe_offset = stripe_nr * stripe_len;
- BUG_ON(offset < stripe_offset);
+ if (offset < stripe_offset) {
+ btrfs_crit(fs_info, "stripe math has gone wrong, "
+ "stripe_offset=%llu, offset=%llu, start=%llu, "
+ "logical=%llu, stripe_len=%llu",
+ stripe_offset, offset, em->start, logical,
+ stripe_len);
+ free_extent_map(em);
+ return -EINVAL;
+ }
/* stripe_offset is the offset of this block in its stripe*/
stripe_offset = offset - stripe_offset;
@@ -5519,7 +5569,13 @@ static int __btrfs_map_block(struct btrfs_fs_info *fs_info, int rw,
&stripe_index);
mirror_num = stripe_index + 1;
}
- BUG_ON(stripe_index >= map->num_stripes);
+ if (stripe_index >= map->num_stripes) {
+ btrfs_crit(fs_info, "stripe index math went horribly wrong, "
+ "got stripe_index=%u, num_stripes=%u",
+ stripe_index, map->num_stripes);
+ ret = -EINVAL;
+ goto out;
+ }
num_alloc_stripes = num_stripes;
if (dev_replace_is_ongoing) {
@@ -5718,20 +5774,17 @@ static int __btrfs_map_block(struct btrfs_fs_info *fs_info, int rw,
}
}
if (found) {
- if (physical_of_found + map->stripe_len <=
- dev_replace->cursor_left) {
- struct btrfs_bio_stripe *tgtdev_stripe =
- bbio->stripes + num_stripes;
+ struct btrfs_bio_stripe *tgtdev_stripe =
+ bbio->stripes + num_stripes;
- tgtdev_stripe->physical = physical_of_found;
- tgtdev_stripe->length =
- bbio->stripes[index_srcdev].length;
- tgtdev_stripe->dev = dev_replace->tgtdev;
- bbio->tgtdev_map[index_srcdev] = num_stripes;
+ tgtdev_stripe->physical = physical_of_found;
+ tgtdev_stripe->length =
+ bbio->stripes[index_srcdev].length;
+ tgtdev_stripe->dev = dev_replace->tgtdev;
+ bbio->tgtdev_map[index_srcdev] = num_stripes;
- tgtdev_indexes++;
- num_stripes++;
- }
+ tgtdev_indexes++;
+ num_stripes++;
}
}
@@ -6032,7 +6085,7 @@ static void bbio_error(struct btrfs_bio *bbio, struct bio *bio, u64 logical)
{
atomic_inc(&bbio->error);
if (atomic_dec_and_test(&bbio->stripes_pending)) {
- /* Shoud be the original bio. */
+ /* Should be the original bio. */
WARN_ON(bio != bbio->orig_bio);
btrfs_io_bio(bio)->mirror_num = bbio->mirror_num;
@@ -6206,27 +6259,23 @@ struct btrfs_device *btrfs_alloc_device(struct btrfs_fs_info *fs_info,
return dev;
}
-static int read_one_chunk(struct btrfs_root *root, struct btrfs_key *key,
- struct extent_buffer *leaf,
- struct btrfs_chunk *chunk)
+/* Return -EIO if any error, otherwise return 0. */
+static int btrfs_check_chunk_valid(struct btrfs_root *root,
+ struct extent_buffer *leaf,
+ struct btrfs_chunk *chunk, u64 logical)
{
- struct btrfs_mapping_tree *map_tree = &root->fs_info->mapping_tree;
- struct map_lookup *map;
- struct extent_map *em;
- u64 logical;
u64 length;
u64 stripe_len;
- u64 devid;
- u8 uuid[BTRFS_UUID_SIZE];
- int num_stripes;
- int ret;
- int i;
+ u16 num_stripes;
+ u16 sub_stripes;
+ u64 type;
- logical = key->offset;
length = btrfs_chunk_length(leaf, chunk);
stripe_len = btrfs_chunk_stripe_len(leaf, chunk);
num_stripes = btrfs_chunk_num_stripes(leaf, chunk);
- /* Validation check */
+ sub_stripes = btrfs_chunk_sub_stripes(leaf, chunk);
+ type = btrfs_chunk_type(leaf, chunk);
+
if (!num_stripes) {
btrfs_err(root->fs_info, "invalid chunk num_stripes: %u",
num_stripes);
@@ -6237,24 +6286,70 @@ static int read_one_chunk(struct btrfs_root *root, struct btrfs_key *key,
"invalid chunk logical %llu", logical);
return -EIO;
}
+ if (btrfs_chunk_sector_size(leaf, chunk) != root->sectorsize) {
+ btrfs_err(root->fs_info, "invalid chunk sectorsize %u",
+ btrfs_chunk_sector_size(leaf, chunk));
+ return -EIO;
+ }
if (!length || !IS_ALIGNED(length, root->sectorsize)) {
btrfs_err(root->fs_info,
"invalid chunk length %llu", length);
return -EIO;
}
- if (!is_power_of_2(stripe_len)) {
+ if (!is_power_of_2(stripe_len) || stripe_len != BTRFS_STRIPE_LEN) {
btrfs_err(root->fs_info, "invalid chunk stripe length: %llu",
stripe_len);
return -EIO;
}
if (~(BTRFS_BLOCK_GROUP_TYPE_MASK | BTRFS_BLOCK_GROUP_PROFILE_MASK) &
- btrfs_chunk_type(leaf, chunk)) {
+ type) {
btrfs_err(root->fs_info, "unrecognized chunk type: %llu",
~(BTRFS_BLOCK_GROUP_TYPE_MASK |
BTRFS_BLOCK_GROUP_PROFILE_MASK) &
btrfs_chunk_type(leaf, chunk));
return -EIO;
}
+ if ((type & BTRFS_BLOCK_GROUP_RAID10 && sub_stripes != 2) ||
+ (type & BTRFS_BLOCK_GROUP_RAID1 && num_stripes < 1) ||
+ (type & BTRFS_BLOCK_GROUP_RAID5 && num_stripes < 2) ||
+ (type & BTRFS_BLOCK_GROUP_RAID6 && num_stripes < 3) ||
+ (type & BTRFS_BLOCK_GROUP_DUP && num_stripes > 2) ||
+ ((type & BTRFS_BLOCK_GROUP_PROFILE_MASK) == 0 &&
+ num_stripes != 1)) {
+ btrfs_err(root->fs_info,
+ "invalid num_stripes:sub_stripes %u:%u for profile %llu",
+ num_stripes, sub_stripes,
+ type & BTRFS_BLOCK_GROUP_PROFILE_MASK);
+ return -EIO;
+ }
+
+ return 0;
+}
+
+static int read_one_chunk(struct btrfs_root *root, struct btrfs_key *key,
+ struct extent_buffer *leaf,
+ struct btrfs_chunk *chunk)
+{
+ struct btrfs_mapping_tree *map_tree = &root->fs_info->mapping_tree;
+ struct map_lookup *map;
+ struct extent_map *em;
+ u64 logical;
+ u64 length;
+ u64 stripe_len;
+ u64 devid;
+ u8 uuid[BTRFS_UUID_SIZE];
+ int num_stripes;
+ int ret;
+ int i;
+
+ logical = key->offset;
+ length = btrfs_chunk_length(leaf, chunk);
+ stripe_len = btrfs_chunk_stripe_len(leaf, chunk);
+ num_stripes = btrfs_chunk_num_stripes(leaf, chunk);
+
+ ret = btrfs_check_chunk_valid(root, leaf, chunk, logical);
+ if (ret)
+ return ret;
read_lock(&map_tree->map_tree.lock);
em = lookup_extent_mapping(&map_tree->map_tree, logical, 1);
@@ -6502,6 +6597,7 @@ int btrfs_read_sys_array(struct btrfs_root *root)
u32 array_size;
u32 len = 0;
u32 cur_offset;
+ u64 type;
struct btrfs_key key;
ASSERT(BTRFS_SUPER_INFO_SIZE <= root->nodesize);
@@ -6511,12 +6607,12 @@ int btrfs_read_sys_array(struct btrfs_root *root)
* overallocate but we can keep it as-is, only the first page is used.
*/
sb = btrfs_find_create_tree_block(root, BTRFS_SUPER_INFO_OFFSET);
- if (!sb)
- return -ENOMEM;
+ if (IS_ERR(sb))
+ return PTR_ERR(sb);
set_extent_buffer_uptodate(sb);
btrfs_set_buffer_lockdep_class(root->root_key.objectid, sb, 0);
/*
- * The sb extent buffer is artifical and just used to read the system array.
+ * The sb extent buffer is artificial and just used to read the system array.
* set_extent_buffer_uptodate() call does not properly mark all it's
* pages up-to-date when the page is larger: extent does not cover the
* whole page and consequently check_page_uptodate does not find all
@@ -6568,6 +6664,15 @@ int btrfs_read_sys_array(struct btrfs_root *root)
break;
}
+ type = btrfs_chunk_type(sb, chunk);
+ if ((type & BTRFS_BLOCK_GROUP_SYSTEM) == 0) {
+ btrfs_err(root->fs_info,
+ "invalid chunk type %llu in sys_array at offset %u",
+ type, cur_offset);
+ ret = -EIO;
+ break;
+ }
+
len = btrfs_chunk_item_size(num_stripes);
if (cur_offset + len > array_size)
goto out_short_read;
@@ -6586,13 +6691,15 @@ int btrfs_read_sys_array(struct btrfs_root *root)
sb_array_offset += len;
cur_offset += len;
}
- free_extent_buffer(sb);
+ clear_extent_buffer_uptodate(sb);
+ free_extent_buffer_stale(sb);
return ret;
out_short_read:
printk(KERN_ERR "BTRFS: sys_array too short to read %u bytes at offset %u\n",
len, cur_offset);
- free_extent_buffer(sb);
+ clear_extent_buffer_uptodate(sb);
+ free_extent_buffer_stale(sb);
return -EIO;
}
@@ -6604,6 +6711,7 @@ int btrfs_read_chunk_tree(struct btrfs_root *root)
struct btrfs_key found_key;
int ret;
int slot;
+ u64 total_dev = 0;
root = root->fs_info->chunk_root;
@@ -6645,6 +6753,7 @@ int btrfs_read_chunk_tree(struct btrfs_root *root)
ret = read_one_dev(root, leaf, dev_item);
if (ret)
goto error;
+ total_dev++;
} else if (found_key.type == BTRFS_CHUNK_ITEM_KEY) {
struct btrfs_chunk *chunk;
chunk = btrfs_item_ptr(leaf, slot, struct btrfs_chunk);
@@ -6654,6 +6763,28 @@ int btrfs_read_chunk_tree(struct btrfs_root *root)
}
path->slots[0]++;
}
+
+ /*
+ * After loading chunk tree, we've got all device information,
+ * do another round of validation checks.
+ */
+ if (total_dev != root->fs_info->fs_devices->total_devices) {
+ btrfs_err(root->fs_info,
+ "super_num_devices %llu mismatch with num_devices %llu found here",
+ btrfs_super_num_devices(root->fs_info->super_copy),
+ total_dev);
+ ret = -EINVAL;
+ goto error;
+ }
+ if (btrfs_super_total_bytes(root->fs_info->super_copy) <
+ root->fs_info->fs_devices->total_rw_bytes) {
+ btrfs_err(root->fs_info,
+ "super_total_bytes %llu mismatch with fs_devices total_rw_bytes %llu",
+ btrfs_super_total_bytes(root->fs_info->super_copy),
+ root->fs_info->fs_devices->total_rw_bytes);
+ ret = -EINVAL;
+ goto error;
+ }
ret = 0;
error:
unlock_chunks(root);