summaryrefslogtreecommitdiff
path: root/drivers/md/md.c
diff options
context:
space:
mode:
Diffstat (limited to 'drivers/md/md.c')
-rw-r--r--drivers/md/md.c164
1 files changed, 88 insertions, 76 deletions
diff --git a/drivers/md/md.c b/drivers/md/md.c
index e25f00f01..3fe3d04a9 100644
--- a/drivers/md/md.c
+++ b/drivers/md/md.c
@@ -257,13 +257,17 @@ static void md_make_request(struct request_queue *q, struct bio *bio)
unsigned int sectors;
int cpu;
+ blk_queue_split(q, &bio, q->bio_split);
+
if (mddev == NULL || mddev->pers == NULL
|| !mddev->ready) {
bio_io_error(bio);
return;
}
if (mddev->ro == 1 && unlikely(rw == WRITE)) {
- bio_endio(bio, bio_sectors(bio) == 0 ? 0 : -EROFS);
+ if (bio_sectors(bio) != 0)
+ bio->bi_error = -EROFS;
+ bio_endio(bio);
return;
}
smp_rmb(); /* Ensure implications of 'active' are visible */
@@ -350,34 +354,11 @@ static int md_congested(void *data, int bits)
return mddev_congested(mddev, bits);
}
-static int md_mergeable_bvec(struct request_queue *q,
- struct bvec_merge_data *bvm,
- struct bio_vec *biovec)
-{
- struct mddev *mddev = q->queuedata;
- int ret;
- rcu_read_lock();
- if (mddev->suspended) {
- /* Must always allow one vec */
- if (bvm->bi_size == 0)
- ret = biovec->bv_len;
- else
- ret = 0;
- } else {
- struct md_personality *pers = mddev->pers;
- if (pers && pers->mergeable_bvec)
- ret = pers->mergeable_bvec(mddev, bvm, biovec);
- else
- ret = biovec->bv_len;
- }
- rcu_read_unlock();
- return ret;
-}
/*
* Generic flush handling for md
*/
-static void md_end_flush(struct bio *bio, int err)
+static void md_end_flush(struct bio *bio)
{
struct md_rdev *rdev = bio->bi_private;
struct mddev *mddev = rdev->mddev;
@@ -433,7 +414,7 @@ static void md_submit_flush_data(struct work_struct *ws)
if (bio->bi_iter.bi_size == 0)
/* an empty barrier - all done */
- bio_endio(bio, 0);
+ bio_endio(bio);
else {
bio->bi_rw &= ~REQ_FLUSH;
mddev->pers->make_request(mddev, bio);
@@ -502,6 +483,8 @@ static void mddev_put(struct mddev *mddev)
bioset_free(bs);
}
+static void md_safemode_timeout(unsigned long data);
+
void mddev_init(struct mddev *mddev)
{
mutex_init(&mddev->open_mutex);
@@ -509,7 +492,8 @@ void mddev_init(struct mddev *mddev)
mutex_init(&mddev->bitmap_info.mutex);
INIT_LIST_HEAD(&mddev->disks);
INIT_LIST_HEAD(&mddev->all_mddevs);
- init_timer(&mddev->safemode_timer);
+ setup_timer(&mddev->safemode_timer, md_safemode_timeout,
+ (unsigned long) mddev);
atomic_set(&mddev->active, 1);
atomic_set(&mddev->openers, 0);
atomic_set(&mddev->active_io, 0);
@@ -728,15 +712,13 @@ void md_rdev_clear(struct md_rdev *rdev)
}
EXPORT_SYMBOL_GPL(md_rdev_clear);
-static void super_written(struct bio *bio, int error)
+static void super_written(struct bio *bio)
{
struct md_rdev *rdev = bio->bi_private;
struct mddev *mddev = rdev->mddev;
- if (error || !test_bit(BIO_UPTODATE, &bio->bi_flags)) {
- printk("md: super_written gets error=%d, uptodate=%d\n",
- error, test_bit(BIO_UPTODATE, &bio->bi_flags));
- WARN_ON(test_bit(BIO_UPTODATE, &bio->bi_flags));
+ if (bio->bi_error) {
+ printk("md: super_written gets error=%d\n", bio->bi_error);
md_error(mddev, rdev);
}
@@ -791,7 +773,7 @@ int sync_page_io(struct md_rdev *rdev, sector_t sector, int size,
bio_add_page(bio, page, size, 0);
submit_bio_wait(rw, bio);
- ret = test_bit(BIO_UPTODATE, &bio->bi_flags);
+ ret = !bio->bi_error;
bio_put(bio);
return ret;
}
@@ -3276,8 +3258,6 @@ int strict_strtoul_scaled(const char *cp, unsigned long *res, int scale)
return 0;
}
-static void md_safemode_timeout(unsigned long data);
-
static ssize_t
safe_delay_show(struct mddev *mddev, char *page)
{
@@ -4210,6 +4190,8 @@ action_show(struct mddev *mddev, char *page)
type = "repair";
} else if (test_bit(MD_RECOVERY_RECOVER, &recovery))
type = "recover";
+ else if (mddev->reshape_position != MaxSector)
+ type = "reshape";
}
return sprintf(page, "%s\n", type);
}
@@ -5186,7 +5168,6 @@ int md_run(struct mddev *mddev)
if (mddev->queue) {
mddev->queue->backing_dev_info.congested_data = mddev;
mddev->queue->backing_dev_info.congested_fn = md_congested;
- blk_queue_merge_bvec(mddev->queue, md_mergeable_bvec);
}
if (pers->sync_request) {
if (mddev->kobj.sd &&
@@ -5202,8 +5183,6 @@ int md_run(struct mddev *mddev)
atomic_set(&mddev->max_corr_read_errors,
MD_DEFAULT_MAX_CORRECTED_READ_ERRORS);
mddev->safemode = 0;
- mddev->safemode_timer.function = md_safemode_timeout;
- mddev->safemode_timer.data = (unsigned long) mddev;
mddev->safemode_delay = (200 * HZ)/1000 +1; /* 200 msec delay */
mddev->in_sync = 1;
smp_wmb();
@@ -5216,6 +5195,11 @@ int md_run(struct mddev *mddev)
if (sysfs_link_rdev(mddev, rdev))
/* failure here is OK */;
+ if (mddev->degraded && !mddev->ro)
+ /* This ensures that recovering status is reported immediately
+ * via sysfs - until a lack of spares is confirmed.
+ */
+ set_bit(MD_RECOVERY_RECOVER, &mddev->recovery);
set_bit(MD_RECOVERY_NEEDED, &mddev->recovery);
if (mddev->flags & MD_UPDATE_SB_FLAGS)
@@ -5315,7 +5299,6 @@ static void md_clean(struct mddev *mddev)
mddev->degraded = 0;
mddev->safemode = 0;
mddev->private = NULL;
- mddev->merge_check_needed = 0;
mddev->bitmap_info.offset = 0;
mddev->bitmap_info.default_offset = 0;
mddev->bitmap_info.default_space = 0;
@@ -5426,9 +5409,13 @@ static int md_set_readonly(struct mddev *mddev, struct block_device *bdev)
* which will now never happen */
wake_up_process(mddev->sync_thread->tsk);
+ if (mddev->external && test_bit(MD_CHANGE_PENDING, &mddev->flags))
+ return -EBUSY;
mddev_unlock(mddev);
wait_event(resync_wait, !test_bit(MD_RECOVERY_RUNNING,
&mddev->recovery));
+ wait_event(mddev->sb_wait,
+ !test_bit(MD_CHANGE_PENDING, &mddev->flags));
mddev_lock_nointr(mddev);
mutex_lock(&mddev->open_mutex);
@@ -5514,7 +5501,6 @@ static int do_md_stop(struct mddev *mddev, int mode,
__md_stop_writes(mddev);
__md_stop(mddev);
- mddev->queue->merge_bvec_fn = NULL;
mddev->queue->backing_dev_info.congested_fn = NULL;
/* tell userspace to handle 'inactive' */
@@ -5765,16 +5751,16 @@ static int get_bitmap_file(struct mddev *mddev, void __user * arg)
err = 0;
spin_lock(&mddev->lock);
- /* bitmap disabled, zero the first byte and copy out */
- if (!mddev->bitmap_info.file)
- file->pathname[0] = '\0';
- else if ((ptr = file_path(mddev->bitmap_info.file,
- file->pathname, sizeof(file->pathname))),
- IS_ERR(ptr))
- err = PTR_ERR(ptr);
- else
- memmove(file->pathname, ptr,
- sizeof(file->pathname)-(ptr-file->pathname));
+ /* bitmap enabled */
+ if (mddev->bitmap_info.file) {
+ ptr = file_path(mddev->bitmap_info.file, file->pathname,
+ sizeof(file->pathname));
+ if (IS_ERR(ptr))
+ err = PTR_ERR(ptr);
+ else
+ memmove(file->pathname, ptr,
+ sizeof(file->pathname)-(ptr-file->pathname));
+ }
spin_unlock(&mddev->lock);
if (err == 0 &&
@@ -7093,7 +7079,7 @@ static void status_unused(struct seq_file *seq)
seq_printf(seq, "\n");
}
-static void status_resync(struct seq_file *seq, struct mddev *mddev)
+static int status_resync(struct seq_file *seq, struct mddev *mddev)
{
sector_t max_sectors, resync, res;
unsigned long dt, db;
@@ -7101,18 +7087,32 @@ static void status_resync(struct seq_file *seq, struct mddev *mddev)
int scale;
unsigned int per_milli;
- if (mddev->curr_resync <= 3)
- resync = 0;
- else
- resync = mddev->curr_resync
- - atomic_read(&mddev->recovery_active);
-
if (test_bit(MD_RECOVERY_SYNC, &mddev->recovery) ||
test_bit(MD_RECOVERY_RESHAPE, &mddev->recovery))
max_sectors = mddev->resync_max_sectors;
else
max_sectors = mddev->dev_sectors;
+ resync = mddev->curr_resync;
+ if (resync <= 3) {
+ if (test_bit(MD_RECOVERY_DONE, &mddev->recovery))
+ /* Still cleaning up */
+ resync = max_sectors;
+ } else
+ resync -= atomic_read(&mddev->recovery_active);
+
+ if (resync == 0) {
+ if (mddev->recovery_cp < MaxSector) {
+ seq_printf(seq, "\tresync=PENDING");
+ return 1;
+ }
+ return 0;
+ }
+ if (resync < 3) {
+ seq_printf(seq, "\tresync=DELAYED");
+ return 1;
+ }
+
WARN_ON(max_sectors == 0);
/* Pick 'scale' such that (resync>>scale)*1000 will fit
* in a sector_t, and (max_sectors>>scale) will fit in a
@@ -7177,6 +7177,7 @@ static void status_resync(struct seq_file *seq, struct mddev *mddev)
((unsigned long)rt % 60)/6);
seq_printf(seq, " speed=%ldK/sec", db/2/dt);
+ return 1;
}
static void *md_seq_start(struct seq_file *seq, loff_t *pos)
@@ -7322,13 +7323,8 @@ static int md_seq_show(struct seq_file *seq, void *v)
mddev->pers->status(seq, mddev);
seq_printf(seq, "\n ");
if (mddev->pers->sync_request) {
- if (mddev->curr_resync > 2) {
- status_resync(seq, mddev);
+ if (status_resync(seq, mddev))
seq_printf(seq, "\n ");
- } else if (mddev->curr_resync >= 1)
- seq_printf(seq, "\tresync=DELAYED\n ");
- else if (mddev->recovery_cp < MaxSector)
- seq_printf(seq, "\tresync=PENDING\n ");
}
} else
seq_printf(seq, "\n ");
@@ -7411,15 +7407,19 @@ int unregister_md_personality(struct md_personality *p)
}
EXPORT_SYMBOL(unregister_md_personality);
-int register_md_cluster_operations(struct md_cluster_operations *ops, struct module *module)
+int register_md_cluster_operations(struct md_cluster_operations *ops,
+ struct module *module)
{
- if (md_cluster_ops != NULL)
- return -EALREADY;
+ int ret = 0;
spin_lock(&pers_lock);
- md_cluster_ops = ops;
- md_cluster_mod = module;
+ if (md_cluster_ops != NULL)
+ ret = -EALREADY;
+ else {
+ md_cluster_ops = ops;
+ md_cluster_mod = module;
+ }
spin_unlock(&pers_lock);
- return 0;
+ return ret;
}
EXPORT_SYMBOL(register_md_cluster_operations);
@@ -7817,7 +7817,8 @@ void md_do_sync(struct md_thread *thread)
> (max_sectors >> 4)) ||
time_after_eq(jiffies, update_time + UPDATE_FREQUENCY) ||
(j - mddev->curr_resync_completed)*2
- >= mddev->resync_max - mddev->curr_resync_completed
+ >= mddev->resync_max - mddev->curr_resync_completed ||
+ mddev->curr_resync_completed > mddev->resync_max
)) {
/* time to update curr_resync_completed */
wait_event(mddev->recovery_wait,
@@ -7862,6 +7863,9 @@ void md_do_sync(struct md_thread *thread)
break;
j += sectors;
+ if (j > max_sectors)
+ /* when skipping, extra large numbers can be returned. */
+ j = max_sectors;
if (j > 2)
mddev->curr_resync = j;
if (mddev_is_clustered(mddev))
@@ -7930,12 +7934,15 @@ void md_do_sync(struct md_thread *thread)
blk_finish_plug(&plug);
wait_event(mddev->recovery_wait, !atomic_read(&mddev->recovery_active));
+ if (!test_bit(MD_RECOVERY_RESHAPE, &mddev->recovery) &&
+ !test_bit(MD_RECOVERY_INTR, &mddev->recovery) &&
+ mddev->curr_resync > 2) {
+ mddev->curr_resync_completed = mddev->curr_resync;
+ sysfs_notify(&mddev->kobj, NULL, "sync_completed");
+ }
/* tell personality that we are finished */
mddev->pers->sync_request(mddev, max_sectors, &skipped);
- if (mddev_is_clustered(mddev))
- md_cluster_ops->resync_finish(mddev);
-
if (!test_bit(MD_RECOVERY_CHECK, &mddev->recovery) &&
mddev->curr_resync > 2) {
if (test_bit(MD_RECOVERY_SYNC, &mddev->recovery)) {
@@ -7969,6 +7976,9 @@ void md_do_sync(struct md_thread *thread)
}
}
skip:
+ if (mddev_is_clustered(mddev))
+ md_cluster_ops->resync_finish(mddev);
+
set_bit(MD_CHANGE_DEVS, &mddev->flags);
spin_lock(&mddev->lock);
@@ -7979,11 +7989,11 @@ void md_do_sync(struct md_thread *thread)
mddev->resync_max = MaxSector;
} else if (test_bit(MD_RECOVERY_REQUESTED, &mddev->recovery))
mddev->resync_min = mddev->curr_resync_completed;
+ set_bit(MD_RECOVERY_DONE, &mddev->recovery);
mddev->curr_resync = 0;
spin_unlock(&mddev->lock);
wake_up(&resync_wait);
- set_bit(MD_RECOVERY_DONE, &mddev->recovery);
md_wakeup_thread(mddev->thread);
return;
}
@@ -8030,8 +8040,7 @@ static int remove_and_add_spares(struct mddev *mddev,
!test_bit(Bitmap_sync, &rdev->flags)))
continue;
- if (rdev->saved_raid_disk < 0)
- rdev->recovery_offset = 0;
+ rdev->recovery_offset = 0;
if (mddev->pers->
hot_add_disk(mddev, rdev) == 0) {
if (sysfs_link_rdev(mddev, rdev))
@@ -8152,7 +8161,9 @@ void md_check_recovery(struct mddev *mddev)
*/
set_bit(MD_RECOVERY_INTR, &mddev->recovery);
md_reap_sync_thread(mddev);
+ clear_bit(MD_RECOVERY_RECOVER, &mddev->recovery);
clear_bit(MD_RECOVERY_NEEDED, &mddev->recovery);
+ clear_bit(MD_CHANGE_PENDING, &mddev->flags);
goto unlock;
}
@@ -8598,6 +8609,7 @@ int rdev_set_badblocks(struct md_rdev *rdev, sector_t s, int sectors,
/* Make sure they get written out promptly */
sysfs_notify_dirent_safe(rdev->sysfs_state);
set_bit(MD_CHANGE_CLEAN, &rdev->mddev->flags);
+ set_bit(MD_CHANGE_PENDING, &rdev->mddev->flags);
md_wakeup_thread(rdev->mddev->thread);
}
return rv;