summaryrefslogtreecommitdiff
path: root/drivers/md/dm-mpath.c
diff options
context:
space:
mode:
authorAndré Fabian Silva Delgado <emulatorman@parabola.nu>2016-10-20 00:10:27 -0300
committerAndré Fabian Silva Delgado <emulatorman@parabola.nu>2016-10-20 00:10:27 -0300
commitd0b2f91bede3bd5e3d24dd6803e56eee959c1797 (patch)
tree7fee4ab0509879c373c4f2cbd5b8a5be5b4041ee /drivers/md/dm-mpath.c
parente914f8eb445e8f74b00303c19c2ffceaedd16a05 (diff)
Linux-libre 4.8.2-gnupck-4.8.2-gnu
Diffstat (limited to 'drivers/md/dm-mpath.c')
-rw-r--r--drivers/md/dm-mpath.c374
1 files changed, 336 insertions, 38 deletions
diff --git a/drivers/md/dm-mpath.c b/drivers/md/dm-mpath.c
index 52baf8a5b..ac734e5bb 100644
--- a/drivers/md/dm-mpath.c
+++ b/drivers/md/dm-mpath.c
@@ -7,7 +7,8 @@
#include <linux/device-mapper.h>
-#include "dm.h"
+#include "dm-rq.h"
+#include "dm-bio-record.h"
#include "dm-path-selector.h"
#include "dm-uevent.h"
@@ -89,6 +90,8 @@ struct multipath {
atomic_t pg_init_in_progress; /* Only one pg_init allowed at once */
atomic_t pg_init_count; /* Number of times pg_init called */
+ unsigned queue_mode;
+
/*
* We must use a mempool of dm_mpath_io structs so that we
* can resubmit bios on error.
@@ -97,10 +100,13 @@ struct multipath {
struct mutex work_mutex;
struct work_struct trigger_event;
+
+ struct work_struct process_queued_bios;
+ struct bio_list queued_bios;
};
/*
- * Context information attached to each bio we process.
+ * Context information attached to each io we process.
*/
struct dm_mpath_io {
struct pgpath *pgpath;
@@ -114,6 +120,7 @@ static struct kmem_cache *_mpio_cache;
static struct workqueue_struct *kmultipathd, *kmpath_handlerd;
static void trigger_event(struct work_struct *work);
static void activate_path(struct work_struct *work);
+static void process_queued_bios(struct work_struct *work);
/*-----------------------------------------------
* Multipath state flags.
@@ -185,7 +192,7 @@ static void free_priority_group(struct priority_group *pg,
kfree(pg);
}
-static struct multipath *alloc_multipath(struct dm_target *ti, bool use_blk_mq)
+static struct multipath *alloc_multipath(struct dm_target *ti)
{
struct multipath *m;
@@ -203,15 +210,7 @@ static struct multipath *alloc_multipath(struct dm_target *ti, bool use_blk_mq)
mutex_init(&m->work_mutex);
m->mpio_pool = NULL;
- if (!use_blk_mq) {
- unsigned min_ios = dm_get_reserved_rq_based_ios();
-
- m->mpio_pool = mempool_create_slab_pool(min_ios, _mpio_cache);
- if (!m->mpio_pool) {
- kfree(m);
- return NULL;
- }
- }
+ m->queue_mode = DM_TYPE_NONE;
m->ti = ti;
ti->private = m;
@@ -220,6 +219,39 @@ static struct multipath *alloc_multipath(struct dm_target *ti, bool use_blk_mq)
return m;
}
+static int alloc_multipath_stage2(struct dm_target *ti, struct multipath *m)
+{
+ if (m->queue_mode == DM_TYPE_NONE) {
+ /*
+ * Default to request-based.
+ */
+ if (dm_use_blk_mq(dm_table_get_md(ti->table)))
+ m->queue_mode = DM_TYPE_MQ_REQUEST_BASED;
+ else
+ m->queue_mode = DM_TYPE_REQUEST_BASED;
+ }
+
+ if (m->queue_mode == DM_TYPE_REQUEST_BASED) {
+ unsigned min_ios = dm_get_reserved_rq_based_ios();
+
+ m->mpio_pool = mempool_create_slab_pool(min_ios, _mpio_cache);
+ if (!m->mpio_pool)
+ return -ENOMEM;
+ }
+ else if (m->queue_mode == DM_TYPE_BIO_BASED) {
+ INIT_WORK(&m->process_queued_bios, process_queued_bios);
+ /*
+ * bio-based doesn't support any direct scsi_dh management;
+ * it just discovers if a scsi_dh is attached.
+ */
+ set_bit(MPATHF_RETAIN_ATTACHED_HW_HANDLER, &m->flags);
+ }
+
+ dm_table_set_type(ti->table, m->queue_mode);
+
+ return 0;
+}
+
static void free_multipath(struct multipath *m)
{
struct priority_group *pg, *tmp;
@@ -272,6 +304,41 @@ static void clear_request_fn_mpio(struct multipath *m, union map_info *info)
}
}
+static size_t multipath_per_bio_data_size(void)
+{
+ return sizeof(struct dm_mpath_io) + sizeof(struct dm_bio_details);
+}
+
+static struct dm_mpath_io *get_mpio_from_bio(struct bio *bio)
+{
+ return dm_per_bio_data(bio, multipath_per_bio_data_size());
+}
+
+static struct dm_bio_details *get_bio_details_from_bio(struct bio *bio)
+{
+ /* dm_bio_details is immediately after the dm_mpath_io in bio's per-bio-data */
+ struct dm_mpath_io *mpio = get_mpio_from_bio(bio);
+ void *bio_details = mpio + 1;
+
+ return bio_details;
+}
+
+static void multipath_init_per_bio_data(struct bio *bio, struct dm_mpath_io **mpio_p,
+ struct dm_bio_details **bio_details_p)
+{
+ struct dm_mpath_io *mpio = get_mpio_from_bio(bio);
+ struct dm_bio_details *bio_details = get_bio_details_from_bio(bio);
+
+ memset(mpio, 0, sizeof(*mpio));
+ memset(bio_details, 0, sizeof(*bio_details));
+ dm_bio_record(bio_details, bio);
+
+ if (mpio_p)
+ *mpio_p = mpio;
+ if (bio_details_p)
+ *bio_details_p = bio_details;
+}
+
/*-----------------------------------------------
* Path selection
*-----------------------------------------------*/
@@ -431,16 +498,40 @@ failed:
* and multipath_resume() calls and we have no need to check
* for the DMF_NOFLUSH_SUSPENDING flag.
*/
-static int must_push_back(struct multipath *m)
+static bool __must_push_back(struct multipath *m)
{
- return (test_bit(MPATHF_QUEUE_IF_NO_PATH, &m->flags) ||
- ((test_bit(MPATHF_QUEUE_IF_NO_PATH, &m->flags) !=
- test_bit(MPATHF_SAVED_QUEUE_IF_NO_PATH, &m->flags)) &&
- dm_noflush_suspending(m->ti)));
+ return ((test_bit(MPATHF_QUEUE_IF_NO_PATH, &m->flags) !=
+ test_bit(MPATHF_SAVED_QUEUE_IF_NO_PATH, &m->flags)) &&
+ dm_noflush_suspending(m->ti));
+}
+
+static bool must_push_back_rq(struct multipath *m)
+{
+ bool r;
+ unsigned long flags;
+
+ spin_lock_irqsave(&m->lock, flags);
+ r = (test_bit(MPATHF_QUEUE_IF_NO_PATH, &m->flags) ||
+ __must_push_back(m));
+ spin_unlock_irqrestore(&m->lock, flags);
+
+ return r;
+}
+
+static bool must_push_back_bio(struct multipath *m)
+{
+ bool r;
+ unsigned long flags;
+
+ spin_lock_irqsave(&m->lock, flags);
+ r = __must_push_back(m);
+ spin_unlock_irqrestore(&m->lock, flags);
+
+ return r;
}
/*
- * Map cloned requests
+ * Map cloned requests (request-based multipath)
*/
static int __multipath_map(struct dm_target *ti, struct request *clone,
union map_info *map_context,
@@ -459,7 +550,7 @@ static int __multipath_map(struct dm_target *ti, struct request *clone,
pgpath = choose_pgpath(m, nr_bytes);
if (!pgpath) {
- if (!must_push_back(m))
+ if (!must_push_back_rq(m))
r = -EIO; /* Failed */
return r;
} else if (test_bit(MPATHF_QUEUE_IO, &m->flags) ||
@@ -530,6 +621,108 @@ static void multipath_release_clone(struct request *clone)
}
/*
+ * Map cloned bios (bio-based multipath)
+ */
+static int __multipath_map_bio(struct multipath *m, struct bio *bio, struct dm_mpath_io *mpio)
+{
+ size_t nr_bytes = bio->bi_iter.bi_size;
+ struct pgpath *pgpath;
+ unsigned long flags;
+ bool queue_io;
+
+ /* Do we need to select a new pgpath? */
+ pgpath = lockless_dereference(m->current_pgpath);
+ queue_io = test_bit(MPATHF_QUEUE_IO, &m->flags);
+ if (!pgpath || !queue_io)
+ pgpath = choose_pgpath(m, nr_bytes);
+
+ if ((pgpath && queue_io) ||
+ (!pgpath && test_bit(MPATHF_QUEUE_IF_NO_PATH, &m->flags))) {
+ /* Queue for the daemon to resubmit */
+ spin_lock_irqsave(&m->lock, flags);
+ bio_list_add(&m->queued_bios, bio);
+ spin_unlock_irqrestore(&m->lock, flags);
+ /* PG_INIT_REQUIRED cannot be set without QUEUE_IO */
+ if (queue_io || test_bit(MPATHF_PG_INIT_REQUIRED, &m->flags))
+ pg_init_all_paths(m);
+ else if (!queue_io)
+ queue_work(kmultipathd, &m->process_queued_bios);
+ return DM_MAPIO_SUBMITTED;
+ }
+
+ if (!pgpath) {
+ if (!must_push_back_bio(m))
+ return -EIO;
+ return DM_MAPIO_REQUEUE;
+ }
+
+ mpio->pgpath = pgpath;
+ mpio->nr_bytes = nr_bytes;
+
+ bio->bi_error = 0;
+ bio->bi_bdev = pgpath->path.dev->bdev;
+ bio->bi_opf |= REQ_FAILFAST_TRANSPORT;
+
+ if (pgpath->pg->ps.type->start_io)
+ pgpath->pg->ps.type->start_io(&pgpath->pg->ps,
+ &pgpath->path,
+ nr_bytes);
+ return DM_MAPIO_REMAPPED;
+}
+
+static int multipath_map_bio(struct dm_target *ti, struct bio *bio)
+{
+ struct multipath *m = ti->private;
+ struct dm_mpath_io *mpio = NULL;
+
+ multipath_init_per_bio_data(bio, &mpio, NULL);
+
+ return __multipath_map_bio(m, bio, mpio);
+}
+
+static void process_queued_bios_list(struct multipath *m)
+{
+ if (m->queue_mode == DM_TYPE_BIO_BASED)
+ queue_work(kmultipathd, &m->process_queued_bios);
+}
+
+static void process_queued_bios(struct work_struct *work)
+{
+ int r;
+ unsigned long flags;
+ struct bio *bio;
+ struct bio_list bios;
+ struct blk_plug plug;
+ struct multipath *m =
+ container_of(work, struct multipath, process_queued_bios);
+
+ bio_list_init(&bios);
+
+ spin_lock_irqsave(&m->lock, flags);
+
+ if (bio_list_empty(&m->queued_bios)) {
+ spin_unlock_irqrestore(&m->lock, flags);
+ return;
+ }
+
+ bio_list_merge(&bios, &m->queued_bios);
+ bio_list_init(&m->queued_bios);
+
+ spin_unlock_irqrestore(&m->lock, flags);
+
+ blk_start_plug(&plug);
+ while ((bio = bio_list_pop(&bios))) {
+ r = __multipath_map_bio(m, bio, get_mpio_from_bio(bio));
+ if (r < 0 || r == DM_MAPIO_REQUEUE) {
+ bio->bi_error = r;
+ bio_endio(bio);
+ } else if (r == DM_MAPIO_REMAPPED)
+ generic_make_request(bio);
+ }
+ blk_finish_plug(&plug);
+}
+
+/*
* If we run out of usable paths, should we queue I/O or error it?
*/
static int queue_if_no_path(struct multipath *m, bool queue_if_no_path,
@@ -557,8 +750,10 @@ static int queue_if_no_path(struct multipath *m, bool queue_if_no_path,
spin_unlock_irqrestore(&m->lock, flags);
- if (!queue_if_no_path)
+ if (!queue_if_no_path) {
dm_table_run_md_queue_async(m->ti->table);
+ process_queued_bios_list(m);
+ }
return 0;
}
@@ -798,6 +993,12 @@ static int parse_hw_handler(struct dm_arg_set *as, struct multipath *m)
if (!hw_argc)
return 0;
+ if (m->queue_mode == DM_TYPE_BIO_BASED) {
+ dm_consume_args(as, hw_argc);
+ DMERR("bio-based multipath doesn't allow hardware handler args");
+ return 0;
+ }
+
m->hw_handler_name = kstrdup(dm_shift_arg(as), GFP_KERNEL);
if (hw_argc > 1) {
@@ -833,7 +1034,7 @@ static int parse_features(struct dm_arg_set *as, struct multipath *m)
const char *arg_name;
static struct dm_arg _args[] = {
- {0, 6, "invalid number of feature args"},
+ {0, 8, "invalid number of feature args"},
{1, 50, "pg_init_retries must be between 1 and 50"},
{0, 60000, "pg_init_delay_msecs must be between 0 and 60000"},
};
@@ -873,6 +1074,24 @@ static int parse_features(struct dm_arg_set *as, struct multipath *m)
continue;
}
+ if (!strcasecmp(arg_name, "queue_mode") &&
+ (argc >= 1)) {
+ const char *queue_mode_name = dm_shift_arg(as);
+
+ if (!strcasecmp(queue_mode_name, "bio"))
+ m->queue_mode = DM_TYPE_BIO_BASED;
+ else if (!strcasecmp(queue_mode_name, "rq"))
+ m->queue_mode = DM_TYPE_REQUEST_BASED;
+ else if (!strcasecmp(queue_mode_name, "mq"))
+ m->queue_mode = DM_TYPE_MQ_REQUEST_BASED;
+ else {
+ ti->error = "Unknown 'queue_mode' requested";
+ r = -EINVAL;
+ }
+ argc--;
+ continue;
+ }
+
ti->error = "Unrecognised multipath feature request";
r = -EINVAL;
} while (argc && !r);
@@ -880,8 +1099,7 @@ static int parse_features(struct dm_arg_set *as, struct multipath *m)
return r;
}
-static int multipath_ctr(struct dm_target *ti, unsigned int argc,
- char **argv)
+static int multipath_ctr(struct dm_target *ti, unsigned argc, char **argv)
{
/* target arguments */
static struct dm_arg _args[] = {
@@ -894,12 +1112,11 @@ static int multipath_ctr(struct dm_target *ti, unsigned int argc,
struct dm_arg_set as;
unsigned pg_count = 0;
unsigned next_pg_num;
- bool use_blk_mq = dm_use_blk_mq(dm_table_get_md(ti->table));
as.argc = argc;
as.argv = argv;
- m = alloc_multipath(ti, use_blk_mq);
+ m = alloc_multipath(ti);
if (!m) {
ti->error = "can't allocate multipath";
return -EINVAL;
@@ -909,6 +1126,10 @@ static int multipath_ctr(struct dm_target *ti, unsigned int argc,
if (r)
goto bad;
+ r = alloc_multipath_stage2(ti, m);
+ if (r)
+ goto bad;
+
r = parse_hw_handler(&as, m);
if (r)
goto bad;
@@ -958,7 +1179,9 @@ static int multipath_ctr(struct dm_target *ti, unsigned int argc,
ti->num_flush_bios = 1;
ti->num_discard_bios = 1;
ti->num_write_same_bios = 1;
- if (use_blk_mq)
+ if (m->queue_mode == DM_TYPE_BIO_BASED)
+ ti->per_io_data_size = multipath_per_bio_data_size();
+ else if (m->queue_mode == DM_TYPE_MQ_REQUEST_BASED)
ti->per_io_data_size = sizeof(struct dm_mpath_io);
return 0;
@@ -1083,8 +1306,10 @@ static int reinstate_path(struct pgpath *pgpath)
out:
spin_unlock_irqrestore(&m->lock, flags);
- if (run_queue)
+ if (run_queue) {
dm_table_run_md_queue_async(m->ti->table);
+ process_queued_bios_list(m);
+ }
return r;
}
@@ -1281,6 +1506,8 @@ static void pg_init_done(void *data, int errors)
}
clear_bit(MPATHF_QUEUE_IO, &m->flags);
+ process_queued_bios_list(m);
+
/*
* Wake up any thread waiting to suspend.
*/
@@ -1328,7 +1555,7 @@ static int do_end_io(struct multipath *m, struct request *clone,
* during end I/O handling, since those clone requests don't have
* bio clones. If we queue them inside the multipath target,
* we need to make bio clones, that requires memory allocation.
- * (See drivers/md/dm.c:end_clone_bio() about why the clone requests
+ * (See drivers/md/dm-rq.c:end_clone_bio() about why the clone requests
* don't have bio clones.)
* Instead of queueing the clone request here, we queue the original
* request into dm core, which will remake a clone request and
@@ -1347,7 +1574,7 @@ static int do_end_io(struct multipath *m, struct request *clone,
if (!atomic_read(&m->nr_valid_paths)) {
if (!test_bit(MPATHF_QUEUE_IF_NO_PATH, &m->flags)) {
- if (!must_push_back(m))
+ if (!must_push_back_rq(m))
r = -EIO;
} else {
if (error == -EBADE)
@@ -1381,6 +1608,64 @@ static int multipath_end_io(struct dm_target *ti, struct request *clone,
return r;
}
+static int do_end_io_bio(struct multipath *m, struct bio *clone,
+ int error, struct dm_mpath_io *mpio)
+{
+ unsigned long flags;
+
+ if (!error)
+ return 0; /* I/O complete */
+
+ if (noretry_error(error))
+ return error;
+
+ if (mpio->pgpath)
+ fail_path(mpio->pgpath);
+
+ if (!atomic_read(&m->nr_valid_paths)) {
+ if (!test_bit(MPATHF_QUEUE_IF_NO_PATH, &m->flags)) {
+ if (!must_push_back_bio(m))
+ return -EIO;
+ return DM_ENDIO_REQUEUE;
+ } else {
+ if (error == -EBADE)
+ return error;
+ }
+ }
+
+ /* Queue for the daemon to resubmit */
+ dm_bio_restore(get_bio_details_from_bio(clone), clone);
+
+ spin_lock_irqsave(&m->lock, flags);
+ bio_list_add(&m->queued_bios, clone);
+ spin_unlock_irqrestore(&m->lock, flags);
+ if (!test_bit(MPATHF_QUEUE_IO, &m->flags))
+ queue_work(kmultipathd, &m->process_queued_bios);
+
+ return DM_ENDIO_INCOMPLETE;
+}
+
+static int multipath_end_io_bio(struct dm_target *ti, struct bio *clone, int error)
+{
+ struct multipath *m = ti->private;
+ struct dm_mpath_io *mpio = get_mpio_from_bio(clone);
+ struct pgpath *pgpath;
+ struct path_selector *ps;
+ int r;
+
+ BUG_ON(!mpio);
+
+ r = do_end_io_bio(m, clone, error, mpio);
+ pgpath = mpio->pgpath;
+ if (pgpath) {
+ ps = &pgpath->pg->ps;
+ if (ps->type->end_io)
+ ps->type->end_io(ps, &pgpath->path, mpio->nr_bytes);
+ }
+
+ return r;
+}
+
/*
* Suspend can't complete until all the I/O is processed so if
* the last path fails we must error any remaining I/O.
@@ -1409,12 +1694,14 @@ static void multipath_postsuspend(struct dm_target *ti)
static void multipath_resume(struct dm_target *ti)
{
struct multipath *m = ti->private;
+ unsigned long flags;
+ spin_lock_irqsave(&m->lock, flags);
if (test_bit(MPATHF_SAVED_QUEUE_IF_NO_PATH, &m->flags))
set_bit(MPATHF_QUEUE_IF_NO_PATH, &m->flags);
else
clear_bit(MPATHF_QUEUE_IF_NO_PATH, &m->flags);
- smp_mb__after_atomic();
+ spin_unlock_irqrestore(&m->lock, flags);
}
/*
@@ -1454,7 +1741,9 @@ static void multipath_status(struct dm_target *ti, status_type_t type,
DMEMIT("%u ", test_bit(MPATHF_QUEUE_IF_NO_PATH, &m->flags) +
(m->pg_init_retries > 0) * 2 +
(m->pg_init_delay_msecs != DM_PG_INIT_DELAY_DEFAULT) * 2 +
- test_bit(MPATHF_RETAIN_ATTACHED_HW_HANDLER, &m->flags));
+ test_bit(MPATHF_RETAIN_ATTACHED_HW_HANDLER, &m->flags) +
+ (m->queue_mode != DM_TYPE_REQUEST_BASED) * 2);
+
if (test_bit(MPATHF_QUEUE_IF_NO_PATH, &m->flags))
DMEMIT("queue_if_no_path ");
if (m->pg_init_retries)
@@ -1463,6 +1752,16 @@ static void multipath_status(struct dm_target *ti, status_type_t type,
DMEMIT("pg_init_delay_msecs %u ", m->pg_init_delay_msecs);
if (test_bit(MPATHF_RETAIN_ATTACHED_HW_HANDLER, &m->flags))
DMEMIT("retain_attached_hw_handler ");
+ if (m->queue_mode != DM_TYPE_REQUEST_BASED) {
+ switch(m->queue_mode) {
+ case DM_TYPE_BIO_BASED:
+ DMEMIT("queue_mode bio ");
+ break;
+ case DM_TYPE_MQ_REQUEST_BASED:
+ DMEMIT("queue_mode mq ");
+ break;
+ }
+ }
}
if (!m->hw_handler_name || type == STATUSTYPE_INFO)
@@ -1642,6 +1941,7 @@ static int multipath_prepare_ioctl(struct dm_target *ti,
if (test_bit(MPATHF_PG_INIT_REQUIRED, &m->flags))
pg_init_all_paths(m);
dm_table_run_md_queue_async(m->ti->table);
+ process_queued_bios_list(m);
}
/*
@@ -1748,7 +2048,7 @@ static int multipath_busy(struct dm_target *ti)
*---------------------------------------------------------------*/
static struct target_type multipath_target = {
.name = "multipath",
- .version = {1, 11, 0},
+ .version = {1, 12, 0},
.features = DM_TARGET_SINGLETON | DM_TARGET_IMMUTABLE,
.module = THIS_MODULE,
.ctr = multipath_ctr,
@@ -1757,6 +2057,8 @@ static struct target_type multipath_target = {
.clone_and_map_rq = multipath_clone_and_map,
.release_clone_rq = multipath_release_clone,
.rq_end_io = multipath_end_io,
+ .map = multipath_map_bio,
+ .end_io = multipath_end_io_bio,
.presuspend = multipath_presuspend,
.postsuspend = multipath_postsuspend,
.resume = multipath_resume,
@@ -1771,14 +2073,14 @@ static int __init dm_multipath_init(void)
{
int r;
- /* allocate a slab for the dm_ios */
+ /* allocate a slab for the dm_mpath_ios */
_mpio_cache = KMEM_CACHE(dm_mpath_io, 0);
if (!_mpio_cache)
return -ENOMEM;
r = dm_register_target(&multipath_target);
if (r < 0) {
- DMERR("register failed %d", r);
+ DMERR("request-based register failed %d", r);
r = -EINVAL;
goto bad_register_target;
}
@@ -1804,10 +2106,6 @@ static int __init dm_multipath_init(void)
goto bad_alloc_kmpath_handlerd;
}
- DMINFO("version %u.%u.%u loaded",
- multipath_target.version[0], multipath_target.version[1],
- multipath_target.version[2]);
-
return 0;
bad_alloc_kmpath_handlerd: