summaryrefslogtreecommitdiff
path: root/drivers/block
diff options
context:
space:
mode:
Diffstat (limited to 'drivers/block')
-rw-r--r--drivers/block/aoe/aoeblk.c2
-rw-r--r--drivers/block/aoe/aoecmd.c10
-rw-r--r--drivers/block/aoe/aoedev.c2
-rw-r--r--drivers/block/brd.c23
-rw-r--r--drivers/block/drbd/drbd_actlog.c4
-rw-r--r--drivers/block/drbd/drbd_bitmap.c19
-rw-r--r--drivers/block/drbd/drbd_int.h12
-rw-r--r--drivers/block/drbd/drbd_main.c1
-rw-r--r--drivers/block/drbd/drbd_nl.c4
-rw-r--r--drivers/block/drbd/drbd_req.c47
-rw-r--r--drivers/block/drbd/drbd_worker.c44
-rw-r--r--drivers/block/floppy.c7
-rw-r--r--drivers/block/loop.c15
-rw-r--r--drivers/block/nbd.c388
-rw-r--r--drivers/block/null_blk.c40
-rw-r--r--drivers/block/nvme-core.c339
-rw-r--r--drivers/block/pktcdvd.c59
-rw-r--r--drivers/block/ps3vram.c5
-rw-r--r--drivers/block/rbd.c136
-rw-r--r--drivers/block/rsxx/dev.c11
-rw-r--r--drivers/block/skd_main.c2
-rw-r--r--drivers/block/umem.c6
-rw-r--r--drivers/block/virtio_blk.c8
-rw-r--r--drivers/block/xen-blkback/blkback.c4
-rw-r--r--drivers/block/xen-blkfront.c149
-rw-r--r--drivers/block/zram/zram_drv.c39
-rw-r--r--drivers/block/zram/zram_drv.h1
27 files changed, 811 insertions, 566 deletions
diff --git a/drivers/block/aoe/aoeblk.c b/drivers/block/aoe/aoeblk.c
index 46c282fff..dd73e1ff1 100644
--- a/drivers/block/aoe/aoeblk.c
+++ b/drivers/block/aoe/aoeblk.c
@@ -395,7 +395,7 @@ aoeblk_gdalloc(void *vp)
WARN_ON(d->flags & DEVFL_TKILL);
WARN_ON(d->gd);
WARN_ON(d->flags & DEVFL_UP);
- blk_queue_max_hw_sectors(q, 1024);
+ blk_queue_max_hw_sectors(q, BLK_DEF_MAX_SECTORS);
q->backing_dev_info.name = "aoe";
q->backing_dev_info.ra_pages = READ_AHEAD / PAGE_CACHE_SIZE;
d->bufpool = mp;
diff --git a/drivers/block/aoe/aoecmd.c b/drivers/block/aoe/aoecmd.c
index 422b7d84f..ad80c85e0 100644
--- a/drivers/block/aoe/aoecmd.c
+++ b/drivers/block/aoe/aoecmd.c
@@ -1110,7 +1110,7 @@ aoe_end_request(struct aoedev *d, struct request *rq, int fastfail)
d->ip.rq = NULL;
do {
bio = rq->bio;
- bok = !fastfail && test_bit(BIO_UPTODATE, &bio->bi_flags);
+ bok = !fastfail && !bio->bi_error;
} while (__blk_end_request(rq, bok ? 0 : -EIO, bio->bi_iter.bi_size));
/* cf. http://lkml.org/lkml/2006/10/31/28 */
@@ -1172,7 +1172,7 @@ ktiocomplete(struct frame *f)
ahout->cmdstat, ahin->cmdstat,
d->aoemajor, d->aoeminor);
noskb: if (buf)
- clear_bit(BIO_UPTODATE, &buf->bio->bi_flags);
+ buf->bio->bi_error = -EIO;
goto out;
}
@@ -1185,7 +1185,7 @@ noskb: if (buf)
"aoe: runt data size in read from",
(long) d->aoemajor, d->aoeminor,
skb->len, n);
- clear_bit(BIO_UPTODATE, &buf->bio->bi_flags);
+ buf->bio->bi_error = -EIO;
break;
}
if (n > f->iter.bi_size) {
@@ -1193,7 +1193,7 @@ noskb: if (buf)
"aoe: too-large data size in read from",
(long) d->aoemajor, d->aoeminor,
n, f->iter.bi_size);
- clear_bit(BIO_UPTODATE, &buf->bio->bi_flags);
+ buf->bio->bi_error = -EIO;
break;
}
bvcpy(skb, f->buf->bio, f->iter, n);
@@ -1695,7 +1695,7 @@ aoe_failbuf(struct aoedev *d, struct buf *buf)
if (buf == NULL)
return;
buf->iter.bi_size = 0;
- clear_bit(BIO_UPTODATE, &buf->bio->bi_flags);
+ buf->bio->bi_error = -EIO;
if (buf->nframesout == 0)
aoe_end_buf(d, buf);
}
diff --git a/drivers/block/aoe/aoedev.c b/drivers/block/aoe/aoedev.c
index e774c50b6..ffd194750 100644
--- a/drivers/block/aoe/aoedev.c
+++ b/drivers/block/aoe/aoedev.c
@@ -170,7 +170,7 @@ aoe_failip(struct aoedev *d)
if (rq == NULL)
return;
while ((bio = d->ip.nxbio)) {
- clear_bit(BIO_UPTODATE, &bio->bi_flags);
+ bio->bi_error = -EIO;
d->ip.nxbio = bio->bi_next;
n = (unsigned long) rq->special;
rq->special = (void *) --n;
diff --git a/drivers/block/brd.c b/drivers/block/brd.c
index 64ab4951e..b9794aeeb 100644
--- a/drivers/block/brd.c
+++ b/drivers/block/brd.c
@@ -331,14 +331,12 @@ static void brd_make_request(struct request_queue *q, struct bio *bio)
struct bio_vec bvec;
sector_t sector;
struct bvec_iter iter;
- int err = -EIO;
sector = bio->bi_iter.bi_sector;
if (bio_end_sector(bio) > get_capacity(bdev->bd_disk))
- goto out;
+ goto io_error;
if (unlikely(bio->bi_rw & REQ_DISCARD)) {
- err = 0;
discard_from_brd(brd, sector, bio->bi_iter.bi_size);
goto out;
}
@@ -349,15 +347,20 @@ static void brd_make_request(struct request_queue *q, struct bio *bio)
bio_for_each_segment(bvec, bio, iter) {
unsigned int len = bvec.bv_len;
+ int err;
+
err = brd_do_bvec(brd, bvec.bv_page, len,
bvec.bv_offset, rw, sector);
if (err)
- break;
+ goto io_error;
sector += len >> SECTOR_SHIFT;
}
out:
- bio_endio(bio, err);
+ bio_endio(bio);
+ return;
+io_error:
+ bio_io_error(bio);
}
static int brd_rw_page(struct block_device *bdev, sector_t sector,
@@ -371,7 +374,7 @@ static int brd_rw_page(struct block_device *bdev, sector_t sector,
#ifdef CONFIG_BLK_DEV_RAM_DAX
static long brd_direct_access(struct block_device *bdev, sector_t sector,
- void **kaddr, unsigned long *pfn, long size)
+ void __pmem **kaddr, unsigned long *pfn)
{
struct brd_device *brd = bdev->bd_disk->private_data;
struct page *page;
@@ -381,13 +384,9 @@ static long brd_direct_access(struct block_device *bdev, sector_t sector,
page = brd_insert_page(brd, sector);
if (!page)
return -ENOSPC;
- *kaddr = page_address(page);
+ *kaddr = (void __pmem *)page_address(page);
*pfn = page_to_pfn(page);
- /*
- * TODO: If size > PAGE_SIZE, we could look to see if the next page in
- * the file happens to be mapped to the next page of physical RAM.
- */
return PAGE_SIZE;
}
#else
@@ -500,7 +499,7 @@ static struct brd_device *brd_alloc(int i)
blk_queue_physical_block_size(brd->brd_queue, PAGE_SIZE);
brd->brd_queue->limits.discard_granularity = PAGE_SIZE;
- brd->brd_queue->limits.max_discard_sectors = UINT_MAX;
+ blk_queue_max_discard_sectors(brd->brd_queue, UINT_MAX);
brd->brd_queue->limits.discard_zeroes_data = 1;
queue_flag_set_unlocked(QUEUE_FLAG_DISCARD, brd->brd_queue);
diff --git a/drivers/block/drbd/drbd_actlog.c b/drivers/block/drbd/drbd_actlog.c
index 1318e3217..b3868e7a1 100644
--- a/drivers/block/drbd/drbd_actlog.c
+++ b/drivers/block/drbd/drbd_actlog.c
@@ -175,11 +175,11 @@ static int _drbd_md_sync_page_io(struct drbd_device *device,
atomic_inc(&device->md_io.in_use); /* drbd_md_put_buffer() is in the completion handler */
device->md_io.submit_jif = jiffies;
if (drbd_insert_fault(device, (rw & WRITE) ? DRBD_FAULT_MD_WR : DRBD_FAULT_MD_RD))
- bio_endio(bio, -EIO);
+ bio_io_error(bio);
else
submit_bio(rw, bio);
wait_until_done_or_force_detached(device, bdev, &device->md_io.done);
- if (bio_flagged(bio, BIO_UPTODATE))
+ if (!bio->bi_error)
err = device->md_io.error;
out:
diff --git a/drivers/block/drbd/drbd_bitmap.c b/drivers/block/drbd/drbd_bitmap.c
index 434c77dcc..e5e0f19ce 100644
--- a/drivers/block/drbd/drbd_bitmap.c
+++ b/drivers/block/drbd/drbd_bitmap.c
@@ -941,36 +941,27 @@ static void drbd_bm_aio_ctx_destroy(struct kref *kref)
}
/* bv_page may be a copy, or may be the original */
-static void drbd_bm_endio(struct bio *bio, int error)
+static void drbd_bm_endio(struct bio *bio)
{
struct drbd_bm_aio_ctx *ctx = bio->bi_private;
struct drbd_device *device = ctx->device;
struct drbd_bitmap *b = device->bitmap;
unsigned int idx = bm_page_to_idx(bio->bi_io_vec[0].bv_page);
- int uptodate = bio_flagged(bio, BIO_UPTODATE);
-
-
- /* strange behavior of some lower level drivers...
- * fail the request by clearing the uptodate flag,
- * but do not return any error?!
- * do we want to WARN() on this? */
- if (!error && !uptodate)
- error = -EIO;
if ((ctx->flags & BM_AIO_COPY_PAGES) == 0 &&
!bm_test_page_unchanged(b->bm_pages[idx]))
drbd_warn(device, "bitmap page idx %u changed during IO!\n", idx);
- if (error) {
+ if (bio->bi_error) {
/* ctx error will hold the completed-last non-zero error code,
* in case error codes differ. */
- ctx->error = error;
+ ctx->error = bio->bi_error;
bm_set_page_io_err(b->bm_pages[idx]);
/* Not identical to on disk version of it.
* Is BM_PAGE_IO_ERROR enough? */
if (__ratelimit(&drbd_ratelimit_state))
drbd_err(device, "IO ERROR %d on bitmap page idx %u\n",
- error, idx);
+ bio->bi_error, idx);
} else {
bm_clear_page_io_err(b->bm_pages[idx]);
dynamic_drbd_dbg(device, "bitmap page idx %u completed\n", idx);
@@ -1031,7 +1022,7 @@ static void bm_page_io_async(struct drbd_bm_aio_ctx *ctx, int page_nr) __must_ho
if (drbd_insert_fault(device, (rw & WRITE) ? DRBD_FAULT_MD_WR : DRBD_FAULT_MD_RD)) {
bio->bi_rw |= rw;
- bio_endio(bio, -EIO);
+ bio_io_error(bio);
} else {
submit_bio(rw, bio);
/* this should not count as user activity and cause the
diff --git a/drivers/block/drbd/drbd_int.h b/drivers/block/drbd/drbd_int.h
index efd19c2da..015c6e91b 100644
--- a/drivers/block/drbd/drbd_int.h
+++ b/drivers/block/drbd/drbd_int.h
@@ -1450,7 +1450,6 @@ extern void do_submit(struct work_struct *ws);
extern void __drbd_make_request(struct drbd_device *, struct bio *, unsigned long);
extern void drbd_make_request(struct request_queue *q, struct bio *bio);
extern int drbd_read_remote(struct drbd_device *device, struct drbd_request *req);
-extern int drbd_merge_bvec(struct request_queue *q, struct bvec_merge_data *bvm, struct bio_vec *bvec);
extern int is_valid_ar_handle(struct drbd_request *, sector_t);
@@ -1481,9 +1480,9 @@ extern int drbd_khelper(struct drbd_device *device, char *cmd);
/* drbd_worker.c */
/* bi_end_io handlers */
-extern void drbd_md_endio(struct bio *bio, int error);
-extern void drbd_peer_request_endio(struct bio *bio, int error);
-extern void drbd_request_endio(struct bio *bio, int error);
+extern void drbd_md_endio(struct bio *bio);
+extern void drbd_peer_request_endio(struct bio *bio);
+extern void drbd_request_endio(struct bio *bio);
extern int drbd_worker(struct drbd_thread *thi);
enum drbd_ret_code drbd_resync_after_valid(struct drbd_device *device, int o_minor);
void drbd_resync_after_changed(struct drbd_device *device);
@@ -1604,12 +1603,13 @@ static inline void drbd_generic_make_request(struct drbd_device *device,
__release(local);
if (!bio->bi_bdev) {
drbd_err(device, "drbd_generic_make_request: bio->bi_bdev == NULL\n");
- bio_endio(bio, -ENODEV);
+ bio->bi_error = -ENODEV;
+ bio_endio(bio);
return;
}
if (drbd_insert_fault(device, fault_type))
- bio_endio(bio, -EIO);
+ bio_io_error(bio);
else
generic_make_request(bio);
}
diff --git a/drivers/block/drbd/drbd_main.c b/drivers/block/drbd/drbd_main.c
index a1518539b..74d97f4ba 100644
--- a/drivers/block/drbd/drbd_main.c
+++ b/drivers/block/drbd/drbd_main.c
@@ -2774,7 +2774,6 @@ enum drbd_ret_code drbd_create_device(struct drbd_config_context *adm_ctx, unsig
This triggers a max_bio_size message upon first attach or connect */
blk_queue_max_hw_sectors(q, DRBD_MAX_BIO_SIZE_SAFE >> 8);
blk_queue_bounce_limit(q, BLK_BOUNCE_ANY);
- blk_queue_merge_bvec(q, drbd_merge_bvec);
q->queue_lock = &resource->req_lock;
device->md_io.page = alloc_page(GFP_KERNEL);
diff --git a/drivers/block/drbd/drbd_nl.c b/drivers/block/drbd/drbd_nl.c
index 74df8cfad..e80cbefbc 100644
--- a/drivers/block/drbd/drbd_nl.c
+++ b/drivers/block/drbd/drbd_nl.c
@@ -1156,14 +1156,14 @@ static void drbd_setup_queue_param(struct drbd_device *device, struct drbd_backi
/* For now, don't allow more than one activity log extent worth of data
* to be discarded in one go. We may need to rework drbd_al_begin_io()
* to allow for even larger discard ranges */
- q->limits.max_discard_sectors = DRBD_MAX_DISCARD_SECTORS;
+ blk_queue_max_discard_sectors(q, DRBD_MAX_DISCARD_SECTORS);
queue_flag_set_unlocked(QUEUE_FLAG_DISCARD, q);
/* REALLY? Is stacking secdiscard "legal"? */
if (blk_queue_secdiscard(b))
queue_flag_set_unlocked(QUEUE_FLAG_SECDISCARD, q);
} else {
- q->limits.max_discard_sectors = 0;
+ blk_queue_max_discard_sectors(q, 0);
queue_flag_clear_unlocked(QUEUE_FLAG_DISCARD, q);
queue_flag_clear_unlocked(QUEUE_FLAG_SECDISCARD, q);
}
diff --git a/drivers/block/drbd/drbd_req.c b/drivers/block/drbd/drbd_req.c
index 3907202fb..211592682 100644
--- a/drivers/block/drbd/drbd_req.c
+++ b/drivers/block/drbd/drbd_req.c
@@ -201,7 +201,8 @@ void start_new_tl_epoch(struct drbd_connection *connection)
void complete_master_bio(struct drbd_device *device,
struct bio_and_error *m)
{
- bio_endio(m->bio, m->error);
+ m->bio->bi_error = m->error;
+ bio_endio(m->bio);
dec_ap_bio(device);
}
@@ -1153,12 +1154,12 @@ drbd_submit_req_private_bio(struct drbd_request *req)
rw == WRITE ? DRBD_FAULT_DT_WR
: rw == READ ? DRBD_FAULT_DT_RD
: DRBD_FAULT_DT_RA))
- bio_endio(bio, -EIO);
+ bio_io_error(bio);
else
generic_make_request(bio);
put_ldev(device);
} else
- bio_endio(bio, -EIO);
+ bio_io_error(bio);
}
static void drbd_queue_write(struct drbd_device *device, struct drbd_request *req)
@@ -1191,7 +1192,8 @@ drbd_request_prepare(struct drbd_device *device, struct bio *bio, unsigned long
/* only pass the error to the upper layers.
* if user cannot handle io errors, that's not our business. */
drbd_err(device, "could not kmalloc() req\n");
- bio_endio(bio, -ENOMEM);
+ bio->bi_error = -ENOMEM;
+ bio_endio(bio);
return ERR_PTR(-ENOMEM);
}
req->start_jif = start_jif;
@@ -1497,6 +1499,8 @@ void drbd_make_request(struct request_queue *q, struct bio *bio)
struct drbd_device *device = (struct drbd_device *) q->queuedata;
unsigned long start_jif;
+ blk_queue_split(q, &bio, q->bio_split);
+
start_jif = jiffies;
/*
@@ -1508,41 +1512,6 @@ void drbd_make_request(struct request_queue *q, struct bio *bio)
__drbd_make_request(device, bio, start_jif);
}
-/* This is called by bio_add_page().
- *
- * q->max_hw_sectors and other global limits are already enforced there.
- *
- * We need to call down to our lower level device,
- * in case it has special restrictions.
- *
- * We also may need to enforce configured max-bio-bvecs limits.
- *
- * As long as the BIO is empty we have to allow at least one bvec,
- * regardless of size and offset, so no need to ask lower levels.
- */
-int drbd_merge_bvec(struct request_queue *q, struct bvec_merge_data *bvm, struct bio_vec *bvec)
-{
- struct drbd_device *device = (struct drbd_device *) q->queuedata;
- unsigned int bio_size = bvm->bi_size;
- int limit = DRBD_MAX_BIO_SIZE;
- int backing_limit;
-
- if (bio_size && get_ldev(device)) {
- unsigned int max_hw_sectors = queue_max_hw_sectors(q);
- struct request_queue * const b =
- device->ldev->backing_bdev->bd_disk->queue;
- if (b->merge_bvec_fn) {
- bvm->bi_bdev = device->ldev->backing_bdev;
- backing_limit = b->merge_bvec_fn(b, bvm, bvec);
- limit = min(limit, backing_limit);
- }
- put_ldev(device);
- if ((limit >> 9) > max_hw_sectors)
- limit = max_hw_sectors << 9;
- }
- return limit;
-}
-
void request_timer_fn(unsigned long data)
{
struct drbd_device *device = (struct drbd_device *) data;
diff --git a/drivers/block/drbd/drbd_worker.c b/drivers/block/drbd/drbd_worker.c
index d0fae55d8..5578c1477 100644
--- a/drivers/block/drbd/drbd_worker.c
+++ b/drivers/block/drbd/drbd_worker.c
@@ -65,12 +65,12 @@ rwlock_t global_state_lock;
/* used for synchronous meta data and bitmap IO
* submitted by drbd_md_sync_page_io()
*/
-void drbd_md_endio(struct bio *bio, int error)
+void drbd_md_endio(struct bio *bio)
{
struct drbd_device *device;
device = bio->bi_private;
- device->md_io.error = error;
+ device->md_io.error = bio->bi_error;
/* We grabbed an extra reference in _drbd_md_sync_page_io() to be able
* to timeout on the lower level device, and eventually detach from it.
@@ -170,31 +170,20 @@ void drbd_endio_write_sec_final(struct drbd_peer_request *peer_req) __releases(l
/* writes on behalf of the partner, or resync writes,
* "submitted" by the receiver.
*/
-void drbd_peer_request_endio(struct bio *bio, int error)
+void drbd_peer_request_endio(struct bio *bio)
{
struct drbd_peer_request *peer_req = bio->bi_private;
struct drbd_device *device = peer_req->peer_device->device;
- int uptodate = bio_flagged(bio, BIO_UPTODATE);
int is_write = bio_data_dir(bio) == WRITE;
int is_discard = !!(bio->bi_rw & REQ_DISCARD);
- if (error && __ratelimit(&drbd_ratelimit_state))
+ if (bio->bi_error && __ratelimit(&drbd_ratelimit_state))
drbd_warn(device, "%s: error=%d s=%llus\n",
is_write ? (is_discard ? "discard" : "write")
- : "read", error,
+ : "read", bio->bi_error,
(unsigned long long)peer_req->i.sector);
- if (!error && !uptodate) {
- if (__ratelimit(&drbd_ratelimit_state))
- drbd_warn(device, "%s: setting error to -EIO s=%llus\n",
- is_write ? "write" : "read",
- (unsigned long long)peer_req->i.sector);
- /* strange behavior of some lower level drivers...
- * fail the request by clearing the uptodate flag,
- * but do not return any error?! */
- error = -EIO;
- }
- if (error)
+ if (bio->bi_error)
set_bit(__EE_WAS_ERROR, &peer_req->flags);
bio_put(bio); /* no need for the bio anymore */
@@ -208,24 +197,13 @@ void drbd_peer_request_endio(struct bio *bio, int error)
/* read, readA or write requests on R_PRIMARY coming from drbd_make_request
*/
-void drbd_request_endio(struct bio *bio, int error)
+void drbd_request_endio(struct bio *bio)
{
unsigned long flags;
struct drbd_request *req = bio->bi_private;
struct drbd_device *device = req->device;
struct bio_and_error m;
enum drbd_req_event what;
- int uptodate = bio_flagged(bio, BIO_UPTODATE);
-
- if (!error && !uptodate) {
- drbd_warn(device, "p %s: setting error to -EIO\n",
- bio_data_dir(bio) == WRITE ? "write" : "read");
- /* strange behavior of some lower level drivers...
- * fail the request by clearing the uptodate flag,
- * but do not return any error?! */
- error = -EIO;
- }
-
/* If this request was aborted locally before,
* but now was completed "successfully",
@@ -259,14 +237,14 @@ void drbd_request_endio(struct bio *bio, int error)
if (__ratelimit(&drbd_ratelimit_state))
drbd_emerg(device, "delayed completion of aborted local request; disk-timeout may be too aggressive\n");
- if (!error)
+ if (!bio->bi_error)
panic("possible random memory corruption caused by delayed completion of aborted local request\n");
}
/* to avoid recursion in __req_mod */
- if (unlikely(error)) {
+ if (unlikely(bio->bi_error)) {
if (bio->bi_rw & REQ_DISCARD)
- what = (error == -EOPNOTSUPP)
+ what = (bio->bi_error == -EOPNOTSUPP)
? DISCARD_COMPLETED_NOTSUPP
: DISCARD_COMPLETED_WITH_ERROR;
else
@@ -279,7 +257,7 @@ void drbd_request_endio(struct bio *bio, int error)
what = COMPLETED_OK;
bio_put(req->private_bio);
- req->private_bio = ERR_PTR(error);
+ req->private_bio = ERR_PTR(bio->bi_error);
/* not req_mod(), we need irqsave here! */
spin_lock_irqsave(&device->resource->req_lock, flags);
diff --git a/drivers/block/floppy.c b/drivers/block/floppy.c
index a08cda955..331363e7d 100644
--- a/drivers/block/floppy.c
+++ b/drivers/block/floppy.c
@@ -3771,13 +3771,14 @@ struct rb0_cbdata {
struct completion complete;
};
-static void floppy_rb0_cb(struct bio *bio, int err)
+static void floppy_rb0_cb(struct bio *bio)
{
struct rb0_cbdata *cbdata = (struct rb0_cbdata *)bio->bi_private;
int drive = cbdata->drive;
- if (err) {
- pr_info("floppy: error %d while reading block 0\n", err);
+ if (bio->bi_error) {
+ pr_info("floppy: error %d while reading block 0\n",
+ bio->bi_error);
set_bit(FD_OPEN_SHOULD_FAIL_BIT, &UDRS->flags);
}
complete(&cbdata->complete);
diff --git a/drivers/block/loop.c b/drivers/block/loop.c
index 23103ad14..291ec9e39 100644
--- a/drivers/block/loop.c
+++ b/drivers/block/loop.c
@@ -693,7 +693,7 @@ static void loop_config_discard(struct loop_device *lo)
lo->lo_encrypt_key_size) {
q->limits.discard_granularity = 0;
q->limits.discard_alignment = 0;
- q->limits.max_discard_sectors = 0;
+ blk_queue_max_discard_sectors(q, 0);
q->limits.discard_zeroes_data = 0;
queue_flag_clear_unlocked(QUEUE_FLAG_DISCARD, q);
return;
@@ -701,7 +701,7 @@ static void loop_config_discard(struct loop_device *lo)
q->limits.discard_granularity = inode->i_sb->s_blocksize;
q->limits.discard_alignment = 0;
- q->limits.max_discard_sectors = UINT_MAX >> 9;
+ blk_queue_max_discard_sectors(q, UINT_MAX >> 9);
q->limits.discard_zeroes_data = 1;
queue_flag_set_unlocked(QUEUE_FLAG_DISCARD, q);
}
@@ -1504,17 +1504,16 @@ static void loop_handle_cmd(struct loop_cmd *cmd)
{
const bool write = cmd->rq->cmd_flags & REQ_WRITE;
struct loop_device *lo = cmd->rq->q->queuedata;
- int ret = -EIO;
+ int ret = 0;
- if (write && (lo->lo_flags & LO_FLAGS_READ_ONLY))
+ if (write && (lo->lo_flags & LO_FLAGS_READ_ONLY)) {
+ ret = -EIO;
goto failed;
+ }
ret = do_req_filebacked(lo, cmd->rq);
-
failed:
- if (ret)
- cmd->rq->errors = -EIO;
- blk_mq_complete_request(cmd->rq);
+ blk_mq_complete_request(cmd->rq, ret ? -EIO : 0);
}
static void loop_queue_write_work(struct work_struct *work)
diff --git a/drivers/block/nbd.c b/drivers/block/nbd.c
index 0e385d8e9..1b8762338 100644
--- a/drivers/block/nbd.c
+++ b/drivers/block/nbd.c
@@ -33,6 +33,7 @@
#include <linux/net.h>
#include <linux/kthread.h>
#include <linux/types.h>
+#include <linux/debugfs.h>
#include <asm/uaccess.h>
#include <asm/types.h>
@@ -40,8 +41,7 @@
#include <linux/nbd.h>
struct nbd_device {
- int flags;
- int harderror; /* Code of hard error */
+ u32 flags;
struct socket * sock; /* If == NULL, device is not ready, yet */
int magic;
@@ -56,11 +56,25 @@ struct nbd_device {
struct gendisk *disk;
int blksize;
loff_t bytesize;
- pid_t pid; /* pid of nbd-client, if attached */
int xmit_timeout;
- int disconnect; /* a disconnect has been requested by user */
+ bool disconnect; /* a disconnect has been requested by user */
+
+ struct timer_list timeout_timer;
+ spinlock_t tasks_lock;
+ struct task_struct *task_recv;
+ struct task_struct *task_send;
+
+#if IS_ENABLED(CONFIG_DEBUG_FS)
+ struct dentry *dbg_dir;
+#endif
};
+#if IS_ENABLED(CONFIG_DEBUG_FS)
+static struct dentry *nbd_dbg_dir;
+#endif
+
+#define nbd_name(nbd) ((nbd)->disk->disk_name)
+
#define NBD_MAGIC 0x68797548
static unsigned int nbds_max = 16;
@@ -113,26 +127,38 @@ static void nbd_end_request(struct nbd_device *nbd, struct request *req)
/*
* Forcibly shutdown the socket causing all listeners to error
*/
-static void sock_shutdown(struct nbd_device *nbd, int lock)
+static void sock_shutdown(struct nbd_device *nbd)
{
- if (lock)
- mutex_lock(&nbd->tx_lock);
- if (nbd->sock) {
- dev_warn(disk_to_dev(nbd->disk), "shutting down socket\n");
- kernel_sock_shutdown(nbd->sock, SHUT_RDWR);
- nbd->sock = NULL;
- }
- if (lock)
- mutex_unlock(&nbd->tx_lock);
+ if (!nbd->sock)
+ return;
+
+ dev_warn(disk_to_dev(nbd->disk), "shutting down socket\n");
+ kernel_sock_shutdown(nbd->sock, SHUT_RDWR);
+ nbd->sock = NULL;
+ del_timer_sync(&nbd->timeout_timer);
}
static void nbd_xmit_timeout(unsigned long arg)
{
- struct task_struct *task = (struct task_struct *)arg;
+ struct nbd_device *nbd = (struct nbd_device *)arg;
+ unsigned long flags;
+
+ if (list_empty(&nbd->queue_head))
+ return;
+
+ nbd->disconnect = true;
+
+ spin_lock_irqsave(&nbd->tasks_lock, flags);
- printk(KERN_WARNING "nbd: killing hung xmit (%s, pid: %d)\n",
- task->comm, task->pid);
- force_sig(SIGKILL, task);
+ if (nbd->task_recv)
+ force_sig(SIGKILL, nbd->task_recv);
+
+ if (nbd->task_send)
+ force_sig(SIGKILL, nbd->task_send);
+
+ spin_unlock_irqrestore(&nbd->tasks_lock, flags);
+
+ dev_err(nbd_to_dev(nbd), "Connection timed out, killed receiver and sender, shutting down connection\n");
}
/*
@@ -171,33 +197,12 @@ static int sock_xmit(struct nbd_device *nbd, int send, void *buf, int size,
msg.msg_controllen = 0;
msg.msg_flags = msg_flags | MSG_NOSIGNAL;
- if (send) {
- struct timer_list ti;
-
- if (nbd->xmit_timeout) {
- init_timer(&ti);
- ti.function = nbd_xmit_timeout;
- ti.data = (unsigned long)current;
- ti.expires = jiffies + nbd->xmit_timeout;
- add_timer(&ti);
- }
+ if (send)
result = kernel_sendmsg(sock, &msg, &iov, 1, size);
- if (nbd->xmit_timeout)
- del_timer_sync(&ti);
- } else
+ else
result = kernel_recvmsg(sock, &msg, &iov, 1, size,
msg.msg_flags);
- if (signal_pending(current)) {
- siginfo_t info;
- printk(KERN_WARNING "nbd (pid %d: %s) got signal %d\n",
- task_pid_nr(current), current->comm,
- dequeue_signal_lock(current, &current->blocked, &info));
- result = -EINTR;
- sock_shutdown(nbd, !send);
- break;
- }
-
if (result <= 0) {
if (result == 0)
result = -EPIPE; /* short read */
@@ -210,6 +215,9 @@ static int sock_xmit(struct nbd_device *nbd, int send, void *buf, int size,
sigprocmask(SIG_SETMASK, &oldset, NULL);
tsk_restore_flags(current, pflags, PF_MEMALLOC);
+ if (!send && nbd->xmit_timeout)
+ mod_timer(&nbd->timeout_timer, jiffies + nbd->xmit_timeout);
+
return result;
}
@@ -333,26 +341,24 @@ static struct request *nbd_read_stat(struct nbd_device *nbd)
if (result <= 0) {
dev_err(disk_to_dev(nbd->disk),
"Receive control failed (result %d)\n", result);
- goto harderror;
+ return ERR_PTR(result);
}
if (ntohl(reply.magic) != NBD_REPLY_MAGIC) {
dev_err(disk_to_dev(nbd->disk), "Wrong magic (0x%lx)\n",
(unsigned long)ntohl(reply.magic));
- result = -EPROTO;
- goto harderror;
+ return ERR_PTR(-EPROTO);
}
req = nbd_find_request(nbd, *(struct request **)reply.handle);
if (IS_ERR(req)) {
result = PTR_ERR(req);
if (result != -ENOENT)
- goto harderror;
+ return ERR_PTR(result);
dev_err(disk_to_dev(nbd->disk), "Unexpected reply (%p)\n",
reply.handle);
- result = -EBADR;
- goto harderror;
+ return ERR_PTR(-EBADR);
}
if (ntohl(reply.error)) {
@@ -380,18 +386,15 @@ static struct request *nbd_read_stat(struct nbd_device *nbd)
}
}
return req;
-harderror:
- nbd->harderror = result;
- return NULL;
}
static ssize_t pid_show(struct device *dev,
struct device_attribute *attr, char *buf)
{
struct gendisk *disk = dev_to_disk(dev);
+ struct nbd_device *nbd = (struct nbd_device *)disk->private_data;
- return sprintf(buf, "%ld\n",
- (long) ((struct nbd_device *)disk->private_data)->pid);
+ return sprintf(buf, "%d\n", task_pid_nr(nbd->task_recv));
}
static struct device_attribute pid_attr = {
@@ -399,28 +402,60 @@ static struct device_attribute pid_attr = {
.show = pid_show,
};
-static int nbd_do_it(struct nbd_device *nbd)
+static int nbd_thread_recv(struct nbd_device *nbd)
{
struct request *req;
int ret;
+ unsigned long flags;
BUG_ON(nbd->magic != NBD_MAGIC);
sk_set_memalloc(nbd->sock->sk);
- nbd->pid = task_pid_nr(current);
+
+ spin_lock_irqsave(&nbd->tasks_lock, flags);
+ nbd->task_recv = current;
+ spin_unlock_irqrestore(&nbd->tasks_lock, flags);
+
ret = device_create_file(disk_to_dev(nbd->disk), &pid_attr);
if (ret) {
dev_err(disk_to_dev(nbd->disk), "device_create_file failed!\n");
- nbd->pid = 0;
+
+ spin_lock_irqsave(&nbd->tasks_lock, flags);
+ nbd->task_recv = NULL;
+ spin_unlock_irqrestore(&nbd->tasks_lock, flags);
+
return ret;
}
- while ((req = nbd_read_stat(nbd)) != NULL)
+ while (1) {
+ req = nbd_read_stat(nbd);
+ if (IS_ERR(req)) {
+ ret = PTR_ERR(req);
+ break;
+ }
+
nbd_end_request(nbd, req);
+ }
device_remove_file(disk_to_dev(nbd->disk), &pid_attr);
- nbd->pid = 0;
- return 0;
+
+ spin_lock_irqsave(&nbd->tasks_lock, flags);
+ nbd->task_recv = NULL;
+ spin_unlock_irqrestore(&nbd->tasks_lock, flags);
+
+ if (signal_pending(current)) {
+ siginfo_t info;
+
+ ret = dequeue_signal_lock(current, &current->blocked, &info);
+ dev_warn(nbd_to_dev(nbd), "pid %d, %s, got signal %d\n",
+ task_pid_nr(current), current->comm, ret);
+ mutex_lock(&nbd->tx_lock);
+ sock_shutdown(nbd);
+ mutex_unlock(&nbd->tx_lock);
+ ret = -ETIMEDOUT;
+ }
+
+ return ret;
}
static void nbd_clear_que(struct nbd_device *nbd)
@@ -455,6 +490,7 @@ static void nbd_clear_que(struct nbd_device *nbd)
req->errors++;
nbd_end_request(nbd, req);
}
+ dev_dbg(disk_to_dev(nbd->disk), "queue cleared\n");
}
@@ -482,6 +518,9 @@ static void nbd_handle_req(struct nbd_device *nbd, struct request *req)
nbd->active_req = req;
+ if (nbd->xmit_timeout && list_empty_careful(&nbd->queue_head))
+ mod_timer(&nbd->timeout_timer, jiffies + nbd->xmit_timeout);
+
if (nbd_send_req(nbd, req) != 0) {
dev_err(disk_to_dev(nbd->disk), "Request send failed\n");
req->errors++;
@@ -503,10 +542,15 @@ error_out:
nbd_end_request(nbd, req);
}
-static int nbd_thread(void *data)
+static int nbd_thread_send(void *data)
{
struct nbd_device *nbd = data;
struct request *req;
+ unsigned long flags;
+
+ spin_lock_irqsave(&nbd->tasks_lock, flags);
+ nbd->task_send = current;
+ spin_unlock_irqrestore(&nbd->tasks_lock, flags);
set_user_nice(current, MIN_NICE);
while (!kthread_should_stop() || !list_empty(&nbd->waiting_queue)) {
@@ -515,6 +559,20 @@ static int nbd_thread(void *data)
kthread_should_stop() ||
!list_empty(&nbd->waiting_queue));
+ if (signal_pending(current)) {
+ siginfo_t info;
+ int ret;
+
+ ret = dequeue_signal_lock(current, &current->blocked,
+ &info);
+ dev_warn(nbd_to_dev(nbd), "pid %d, %s, got signal %d\n",
+ task_pid_nr(current), current->comm, ret);
+ mutex_lock(&nbd->tx_lock);
+ sock_shutdown(nbd);
+ mutex_unlock(&nbd->tx_lock);
+ break;
+ }
+
/* extract request */
if (list_empty(&nbd->waiting_queue))
continue;
@@ -528,6 +586,17 @@ static int nbd_thread(void *data)
/* handle request */
nbd_handle_req(nbd, req);
}
+
+ spin_lock_irqsave(&nbd->tasks_lock, flags);
+ nbd->task_send = NULL;
+ spin_unlock_irqrestore(&nbd->tasks_lock, flags);
+
+ /* Clear maybe pending signals */
+ if (signal_pending(current)) {
+ siginfo_t info;
+ dequeue_signal_lock(current, &current->blocked, &info);
+ }
+
return 0;
}
@@ -538,7 +607,7 @@ static int nbd_thread(void *data)
* { printk( "Warning: Ignoring result!\n"); nbd_end_request( req ); }
*/
-static void do_nbd_request(struct request_queue *q)
+static void nbd_request_handler(struct request_queue *q)
__releases(q->queue_lock) __acquires(q->queue_lock)
{
struct request *req;
@@ -574,6 +643,9 @@ static void do_nbd_request(struct request_queue *q)
}
}
+static int nbd_dev_dbg_init(struct nbd_device *nbd);
+static void nbd_dev_dbg_close(struct nbd_device *nbd);
+
/* Must be called with tx_lock held */
static int __nbd_ioctl(struct block_device *bdev, struct nbd_device *nbd,
@@ -597,7 +669,7 @@ static int __nbd_ioctl(struct block_device *bdev, struct nbd_device *nbd,
if (!nbd->sock)
return -EINVAL;
- nbd->disconnect = 1;
+ nbd->disconnect = true;
nbd_send_req(nbd, &sreq);
return 0;
@@ -625,7 +697,7 @@ static int __nbd_ioctl(struct block_device *bdev, struct nbd_device *nbd,
nbd->sock = sock;
if (max_part > 0)
bdev->bd_invalidated = 1;
- nbd->disconnect = 0; /* we're connected now */
+ nbd->disconnect = false; /* we're connected now */
return 0;
}
return -EINVAL;
@@ -648,6 +720,12 @@ static int __nbd_ioctl(struct block_device *bdev, struct nbd_device *nbd,
case NBD_SET_TIMEOUT:
nbd->xmit_timeout = arg * HZ;
+ if (arg)
+ mod_timer(&nbd->timeout_timer,
+ jiffies + nbd->xmit_timeout);
+ else
+ del_timer_sync(&nbd->timeout_timer);
+
return 0;
case NBD_SET_FLAGS:
@@ -666,7 +744,7 @@ static int __nbd_ioctl(struct block_device *bdev, struct nbd_device *nbd,
struct socket *sock;
int error;
- if (nbd->pid)
+ if (nbd->task_recv)
return -EBUSY;
if (!nbd->sock)
return -EINVAL;
@@ -683,24 +761,24 @@ static int __nbd_ioctl(struct block_device *bdev, struct nbd_device *nbd,
else
blk_queue_flush(nbd->disk->queue, 0);
- thread = kthread_run(nbd_thread, nbd, "%s",
- nbd->disk->disk_name);
+ thread = kthread_run(nbd_thread_send, nbd, "%s",
+ nbd_name(nbd));
if (IS_ERR(thread)) {
mutex_lock(&nbd->tx_lock);
return PTR_ERR(thread);
}
- error = nbd_do_it(nbd);
+ nbd_dev_dbg_init(nbd);
+ error = nbd_thread_recv(nbd);
+ nbd_dev_dbg_close(nbd);
kthread_stop(thread);
mutex_lock(&nbd->tx_lock);
- if (error)
- return error;
- sock_shutdown(nbd, 0);
+
+ sock_shutdown(nbd);
sock = nbd->sock;
nbd->sock = NULL;
nbd_clear_que(nbd);
- dev_warn(disk_to_dev(nbd->disk), "queue cleared\n");
kill_bdev(bdev);
queue_flag_clear_unlocked(QUEUE_FLAG_DISCARD, nbd->disk->queue);
set_device_ro(bdev, false);
@@ -714,7 +792,7 @@ static int __nbd_ioctl(struct block_device *bdev, struct nbd_device *nbd,
blkdev_reread_part(bdev);
if (nbd->disconnect) /* user requested, ignore socket errors */
return 0;
- return nbd->harderror;
+ return error;
}
case NBD_CLEAR_QUE:
@@ -758,6 +836,161 @@ static const struct block_device_operations nbd_fops =
.ioctl = nbd_ioctl,
};
+#if IS_ENABLED(CONFIG_DEBUG_FS)
+
+static int nbd_dbg_tasks_show(struct seq_file *s, void *unused)
+{
+ struct nbd_device *nbd = s->private;
+
+ if (nbd->task_recv)
+ seq_printf(s, "recv: %d\n", task_pid_nr(nbd->task_recv));
+ if (nbd->task_send)
+ seq_printf(s, "send: %d\n", task_pid_nr(nbd->task_send));
+
+ return 0;
+}
+
+static int nbd_dbg_tasks_open(struct inode *inode, struct file *file)
+{
+ return single_open(file, nbd_dbg_tasks_show, inode->i_private);
+}
+
+static const struct file_operations nbd_dbg_tasks_ops = {
+ .open = nbd_dbg_tasks_open,
+ .read = seq_read,
+ .llseek = seq_lseek,
+ .release = single_release,
+};
+
+static int nbd_dbg_flags_show(struct seq_file *s, void *unused)
+{
+ struct nbd_device *nbd = s->private;
+ u32 flags = nbd->flags;
+
+ seq_printf(s, "Hex: 0x%08x\n\n", flags);
+
+ seq_puts(s, "Known flags:\n");
+
+ if (flags & NBD_FLAG_HAS_FLAGS)
+ seq_puts(s, "NBD_FLAG_HAS_FLAGS\n");
+ if (flags & NBD_FLAG_READ_ONLY)
+ seq_puts(s, "NBD_FLAG_READ_ONLY\n");
+ if (flags & NBD_FLAG_SEND_FLUSH)
+ seq_puts(s, "NBD_FLAG_SEND_FLUSH\n");
+ if (flags & NBD_FLAG_SEND_TRIM)
+ seq_puts(s, "NBD_FLAG_SEND_TRIM\n");
+
+ return 0;
+}
+
+static int nbd_dbg_flags_open(struct inode *inode, struct file *file)
+{
+ return single_open(file, nbd_dbg_flags_show, inode->i_private);
+}
+
+static const struct file_operations nbd_dbg_flags_ops = {
+ .open = nbd_dbg_flags_open,
+ .read = seq_read,
+ .llseek = seq_lseek,
+ .release = single_release,
+};
+
+static int nbd_dev_dbg_init(struct nbd_device *nbd)
+{
+ struct dentry *dir;
+ struct dentry *f;
+
+ dir = debugfs_create_dir(nbd_name(nbd), nbd_dbg_dir);
+ if (IS_ERR_OR_NULL(dir)) {
+ dev_err(nbd_to_dev(nbd), "Failed to create debugfs dir for '%s' (%ld)\n",
+ nbd_name(nbd), PTR_ERR(dir));
+ return PTR_ERR(dir);
+ }
+ nbd->dbg_dir = dir;
+
+ f = debugfs_create_file("tasks", 0444, dir, nbd, &nbd_dbg_tasks_ops);
+ if (IS_ERR_OR_NULL(f)) {
+ dev_err(nbd_to_dev(nbd), "Failed to create debugfs file 'tasks', %ld\n",
+ PTR_ERR(f));
+ return PTR_ERR(f);
+ }
+
+ f = debugfs_create_u64("size_bytes", 0444, dir, &nbd->bytesize);
+ if (IS_ERR_OR_NULL(f)) {
+ dev_err(nbd_to_dev(nbd), "Failed to create debugfs file 'size_bytes', %ld\n",
+ PTR_ERR(f));
+ return PTR_ERR(f);
+ }
+
+ f = debugfs_create_u32("timeout", 0444, dir, &nbd->xmit_timeout);
+ if (IS_ERR_OR_NULL(f)) {
+ dev_err(nbd_to_dev(nbd), "Failed to create debugfs file 'timeout', %ld\n",
+ PTR_ERR(f));
+ return PTR_ERR(f);
+ }
+
+ f = debugfs_create_u32("blocksize", 0444, dir, &nbd->blksize);
+ if (IS_ERR_OR_NULL(f)) {
+ dev_err(nbd_to_dev(nbd), "Failed to create debugfs file 'blocksize', %ld\n",
+ PTR_ERR(f));
+ return PTR_ERR(f);
+ }
+
+ f = debugfs_create_file("flags", 0444, dir, &nbd, &nbd_dbg_flags_ops);
+ if (IS_ERR_OR_NULL(f)) {
+ dev_err(nbd_to_dev(nbd), "Failed to create debugfs file 'flags', %ld\n",
+ PTR_ERR(f));
+ return PTR_ERR(f);
+ }
+
+ return 0;
+}
+
+static void nbd_dev_dbg_close(struct nbd_device *nbd)
+{
+ debugfs_remove_recursive(nbd->dbg_dir);
+}
+
+static int nbd_dbg_init(void)
+{
+ struct dentry *dbg_dir;
+
+ dbg_dir = debugfs_create_dir("nbd", NULL);
+ if (IS_ERR(dbg_dir))
+ return PTR_ERR(dbg_dir);
+
+ nbd_dbg_dir = dbg_dir;
+
+ return 0;
+}
+
+static void nbd_dbg_close(void)
+{
+ debugfs_remove_recursive(nbd_dbg_dir);
+}
+
+#else /* IS_ENABLED(CONFIG_DEBUG_FS) */
+
+static int nbd_dev_dbg_init(struct nbd_device *nbd)
+{
+ return 0;
+}
+
+static void nbd_dev_dbg_close(struct nbd_device *nbd)
+{
+}
+
+static int nbd_dbg_init(void)
+{
+ return 0;
+}
+
+static void nbd_dbg_close(void)
+{
+}
+
+#endif
+
/*
* And here should be modules and kernel interface
* (Just smiley confuses emacs :-)
@@ -811,7 +1044,7 @@ static int __init nbd_init(void)
* every gendisk to have its very own request_queue struct.
* These structs are big so we dynamically allocate them.
*/
- disk->queue = blk_init_queue(do_nbd_request, &nbd_lock);
+ disk->queue = blk_init_queue(nbd_request_handler, &nbd_lock);
if (!disk->queue) {
put_disk(disk);
goto out;
@@ -822,7 +1055,7 @@ static int __init nbd_init(void)
queue_flag_set_unlocked(QUEUE_FLAG_NONROT, disk->queue);
queue_flag_clear_unlocked(QUEUE_FLAG_ADD_RANDOM, disk->queue);
disk->queue->limits.discard_granularity = 512;
- disk->queue->limits.max_discard_sectors = UINT_MAX;
+ blk_queue_max_discard_sectors(disk->queue, UINT_MAX);
disk->queue->limits.discard_zeroes_data = 0;
blk_queue_max_hw_sectors(disk->queue, 65536);
disk->queue->limits.max_sectors = 256;
@@ -835,13 +1068,19 @@ static int __init nbd_init(void)
printk(KERN_INFO "nbd: registered device at major %d\n", NBD_MAJOR);
+ nbd_dbg_init();
+
for (i = 0; i < nbds_max; i++) {
struct gendisk *disk = nbd_dev[i].disk;
nbd_dev[i].magic = NBD_MAGIC;
INIT_LIST_HEAD(&nbd_dev[i].waiting_queue);
spin_lock_init(&nbd_dev[i].queue_lock);
+ spin_lock_init(&nbd_dev[i].tasks_lock);
INIT_LIST_HEAD(&nbd_dev[i].queue_head);
mutex_init(&nbd_dev[i].tx_lock);
+ init_timer(&nbd_dev[i].timeout_timer);
+ nbd_dev[i].timeout_timer.function = nbd_xmit_timeout;
+ nbd_dev[i].timeout_timer.data = (unsigned long)&nbd_dev[i];
init_waitqueue_head(&nbd_dev[i].active_wq);
init_waitqueue_head(&nbd_dev[i].waiting_wq);
nbd_dev[i].blksize = 1024;
@@ -868,6 +1107,9 @@ out:
static void __exit nbd_cleanup(void)
{
int i;
+
+ nbd_dbg_close();
+
for (i = 0; i < nbds_max; i++) {
struct gendisk *disk = nbd_dev[i].disk;
nbd_dev[i].magic = 0;
diff --git a/drivers/block/null_blk.c b/drivers/block/null_blk.c
index 3177b245d..1c9e4fe5a 100644
--- a/drivers/block/null_blk.c
+++ b/drivers/block/null_blk.c
@@ -222,7 +222,7 @@ static void end_cmd(struct nullb_cmd *cmd)
blk_end_request_all(cmd->rq, 0);
break;
case NULL_Q_BIO:
- bio_endio(cmd->bio, 0);
+ bio_endio(cmd->bio);
break;
}
@@ -289,7 +289,7 @@ static inline void null_handle_cmd(struct nullb_cmd *cmd)
case NULL_IRQ_SOFTIRQ:
switch (queue_mode) {
case NULL_Q_MQ:
- blk_mq_complete_request(cmd->rq);
+ blk_mq_complete_request(cmd->rq, cmd->rq->errors);
break;
case NULL_Q_RQ:
blk_complete_request(cmd->rq);
@@ -406,6 +406,22 @@ static struct blk_mq_ops null_mq_ops = {
.complete = null_softirq_done_fn,
};
+static void cleanup_queue(struct nullb_queue *nq)
+{
+ kfree(nq->tag_map);
+ kfree(nq->cmds);
+}
+
+static void cleanup_queues(struct nullb *nullb)
+{
+ int i;
+
+ for (i = 0; i < nullb->nr_queues; i++)
+ cleanup_queue(&nullb->queues[i]);
+
+ kfree(nullb->queues);
+}
+
static void null_del_dev(struct nullb *nullb)
{
list_del_init(&nullb->list);
@@ -415,6 +431,7 @@ static void null_del_dev(struct nullb *nullb)
if (queue_mode == NULL_Q_MQ)
blk_mq_free_tag_set(&nullb->tag_set);
put_disk(nullb->disk);
+ cleanup_queues(nullb);
kfree(nullb);
}
@@ -459,22 +476,6 @@ static int setup_commands(struct nullb_queue *nq)
return 0;
}
-static void cleanup_queue(struct nullb_queue *nq)
-{
- kfree(nq->tag_map);
- kfree(nq->cmds);
-}
-
-static void cleanup_queues(struct nullb *nullb)
-{
- int i;
-
- for (i = 0; i < nullb->nr_queues; i++)
- cleanup_queue(&nullb->queues[i]);
-
- kfree(nullb->queues);
-}
-
static int setup_queues(struct nullb *nullb)
{
nullb->queues = kzalloc(submit_queues * sizeof(struct nullb_queue),
@@ -588,8 +589,7 @@ static int null_add_dev(void)
blk_queue_physical_block_size(nullb->q, bs);
size = gb * 1024 * 1024 * 1024ULL;
- sector_div(size, bs);
- set_capacity(disk, size);
+ set_capacity(disk, size >> 9);
disk->flags |= GENHD_FL_EXT_DEVT | GENHD_FL_SUPPRESS_PARTITION_INFO;
disk->major = null_major;
diff --git a/drivers/block/nvme-core.c b/drivers/block/nvme-core.c
index 7920c2741..ccc0c1f93 100644
--- a/drivers/block/nvme-core.c
+++ b/drivers/block/nvme-core.c
@@ -72,6 +72,10 @@ module_param(nvme_char_major, int, 0);
static int use_threaded_interrupts;
module_param(use_threaded_interrupts, int, 0);
+static bool use_cmb_sqes = true;
+module_param(use_cmb_sqes, bool, 0644);
+MODULE_PARM_DESC(use_cmb_sqes, "use controller's memory buffer for I/O SQes");
+
static DEFINE_SPINLOCK(dev_list_lock);
static LIST_HEAD(dev_list);
static struct task_struct *nvme_thread;
@@ -103,6 +107,7 @@ struct nvme_queue {
char irqname[24]; /* nvme4294967295-65535\0 */
spinlock_t q_lock;
struct nvme_command *sq_cmds;
+ struct nvme_command __iomem *sq_cmds_io;
volatile struct nvme_completion *cqes;
struct blk_mq_tags **tags;
dma_addr_t sq_dma_addr;
@@ -379,27 +384,28 @@ static void *nvme_finish_cmd(struct nvme_queue *nvmeq, int tag,
*
* Safe to use from interrupt context
*/
-static int __nvme_submit_cmd(struct nvme_queue *nvmeq, struct nvme_command *cmd)
+static void __nvme_submit_cmd(struct nvme_queue *nvmeq,
+ struct nvme_command *cmd)
{
u16 tail = nvmeq->sq_tail;
- memcpy(&nvmeq->sq_cmds[tail], cmd, sizeof(*cmd));
+ if (nvmeq->sq_cmds_io)
+ memcpy_toio(&nvmeq->sq_cmds_io[tail], cmd, sizeof(*cmd));
+ else
+ memcpy(&nvmeq->sq_cmds[tail], cmd, sizeof(*cmd));
+
if (++tail == nvmeq->q_depth)
tail = 0;
writel(tail, nvmeq->q_db);
nvmeq->sq_tail = tail;
-
- return 0;
}
-static int nvme_submit_cmd(struct nvme_queue *nvmeq, struct nvme_command *cmd)
+static void nvme_submit_cmd(struct nvme_queue *nvmeq, struct nvme_command *cmd)
{
unsigned long flags;
- int ret;
spin_lock_irqsave(&nvmeq->q_lock, flags);
- ret = __nvme_submit_cmd(nvmeq, cmd);
+ __nvme_submit_cmd(nvmeq, cmd);
spin_unlock_irqrestore(&nvmeq->q_lock, flags);
- return ret;
}
static __le64 **iod_list(struct nvme_iod *iod)
@@ -597,31 +603,34 @@ static void req_completion(struct nvme_queue *nvmeq, void *ctx,
struct nvme_iod *iod = ctx;
struct request *req = iod_get_private(iod);
struct nvme_cmd_info *cmd_rq = blk_mq_rq_to_pdu(req);
-
u16 status = le16_to_cpup(&cqe->status) >> 1;
+ bool requeue = false;
+ int error = 0;
if (unlikely(status)) {
if (!(status & NVME_SC_DNR || blk_noretry_request(req))
&& (jiffies - req->start_time) < req->timeout) {
unsigned long flags;
+ requeue = true;
blk_mq_requeue_request(req);
spin_lock_irqsave(req->q->queue_lock, flags);
if (!blk_queue_stopped(req->q))
blk_mq_kick_requeue_list(req->q);
spin_unlock_irqrestore(req->q->queue_lock, flags);
- return;
+ goto release_iod;
}
+
if (req->cmd_type == REQ_TYPE_DRV_PRIV) {
if (cmd_rq->ctx == CMD_CTX_CANCELLED)
- req->errors = -EINTR;
+ error = -EINTR;
else
- req->errors = status;
+ error = status;
} else {
- req->errors = nvme_error_status(status);
+ error = nvme_error_status(status);
}
- } else
- req->errors = 0;
+ }
+
if (req->cmd_type == REQ_TYPE_DRV_PRIV) {
u32 result = le32_to_cpup(&cqe->result);
req->special = (void *)(uintptr_t)result;
@@ -630,8 +639,9 @@ static void req_completion(struct nvme_queue *nvmeq, void *ctx,
if (cmd_rq->aborted)
dev_warn(nvmeq->dev->dev,
"completing aborted command with status:%04x\n",
- status);
+ error);
+release_iod:
if (iod->nents) {
dma_unmap_sg(nvmeq->dev->dev, iod->sg, iod->nents,
rq_data_dir(req) ? DMA_TO_DEVICE : DMA_FROM_DEVICE);
@@ -644,7 +654,8 @@ static void req_completion(struct nvme_queue *nvmeq, void *ctx,
}
nvme_free_iod(nvmeq->dev, iod);
- blk_mq_complete_request(req);
+ if (likely(!requeue))
+ blk_mq_complete_request(req, error);
}
/* length is in bytes. gfp flags indicates whether we may sleep. */
@@ -730,18 +741,16 @@ static int nvme_setup_prps(struct nvme_dev *dev, struct nvme_iod *iod,
static void nvme_submit_priv(struct nvme_queue *nvmeq, struct request *req,
struct nvme_iod *iod)
{
- struct nvme_command *cmnd = &nvmeq->sq_cmds[nvmeq->sq_tail];
+ struct nvme_command cmnd;
- memcpy(cmnd, req->cmd, sizeof(struct nvme_command));
- cmnd->rw.command_id = req->tag;
+ memcpy(&cmnd, req->cmd, sizeof(cmnd));
+ cmnd.rw.command_id = req->tag;
if (req->nr_phys_segments) {
- cmnd->rw.prp1 = cpu_to_le64(sg_dma_address(iod->sg));
- cmnd->rw.prp2 = cpu_to_le64(iod->first_dma);
+ cmnd.rw.prp1 = cpu_to_le64(sg_dma_address(iod->sg));
+ cmnd.rw.prp2 = cpu_to_le64(iod->first_dma);
}
- if (++nvmeq->sq_tail == nvmeq->q_depth)
- nvmeq->sq_tail = 0;
- writel(nvmeq->sq_tail, nvmeq->q_db);
+ __nvme_submit_cmd(nvmeq, &cmnd);
}
/*
@@ -754,45 +763,41 @@ static void nvme_submit_discard(struct nvme_queue *nvmeq, struct nvme_ns *ns,
{
struct nvme_dsm_range *range =
(struct nvme_dsm_range *)iod_list(iod)[0];
- struct nvme_command *cmnd = &nvmeq->sq_cmds[nvmeq->sq_tail];
+ struct nvme_command cmnd;
range->cattr = cpu_to_le32(0);
range->nlb = cpu_to_le32(blk_rq_bytes(req) >> ns->lba_shift);
range->slba = cpu_to_le64(nvme_block_nr(ns, blk_rq_pos(req)));
- memset(cmnd, 0, sizeof(*cmnd));
- cmnd->dsm.opcode = nvme_cmd_dsm;
- cmnd->dsm.command_id = req->tag;
- cmnd->dsm.nsid = cpu_to_le32(ns->ns_id);
- cmnd->dsm.prp1 = cpu_to_le64(iod->first_dma);
- cmnd->dsm.nr = 0;
- cmnd->dsm.attributes = cpu_to_le32(NVME_DSMGMT_AD);
+ memset(&cmnd, 0, sizeof(cmnd));
+ cmnd.dsm.opcode = nvme_cmd_dsm;
+ cmnd.dsm.command_id = req->tag;
+ cmnd.dsm.nsid = cpu_to_le32(ns->ns_id);
+ cmnd.dsm.prp1 = cpu_to_le64(iod->first_dma);
+ cmnd.dsm.nr = 0;
+ cmnd.dsm.attributes = cpu_to_le32(NVME_DSMGMT_AD);
- if (++nvmeq->sq_tail == nvmeq->q_depth)
- nvmeq->sq_tail = 0;
- writel(nvmeq->sq_tail, nvmeq->q_db);
+ __nvme_submit_cmd(nvmeq, &cmnd);
}
static void nvme_submit_flush(struct nvme_queue *nvmeq, struct nvme_ns *ns,
int cmdid)
{
- struct nvme_command *cmnd = &nvmeq->sq_cmds[nvmeq->sq_tail];
+ struct nvme_command cmnd;
- memset(cmnd, 0, sizeof(*cmnd));
- cmnd->common.opcode = nvme_cmd_flush;
- cmnd->common.command_id = cmdid;
- cmnd->common.nsid = cpu_to_le32(ns->ns_id);
+ memset(&cmnd, 0, sizeof(cmnd));
+ cmnd.common.opcode = nvme_cmd_flush;
+ cmnd.common.command_id = cmdid;
+ cmnd.common.nsid = cpu_to_le32(ns->ns_id);
- if (++nvmeq->sq_tail == nvmeq->q_depth)
- nvmeq->sq_tail = 0;
- writel(nvmeq->sq_tail, nvmeq->q_db);
+ __nvme_submit_cmd(nvmeq, &cmnd);
}
static int nvme_submit_iod(struct nvme_queue *nvmeq, struct nvme_iod *iod,
struct nvme_ns *ns)
{
struct request *req = iod_get_private(iod);
- struct nvme_command *cmnd;
+ struct nvme_command cmnd;
u16 control = 0;
u32 dsmgmt = 0;
@@ -804,19 +809,16 @@ static int nvme_submit_iod(struct nvme_queue *nvmeq, struct nvme_iod *iod,
if (req->cmd_flags & REQ_RAHEAD)
dsmgmt |= NVME_RW_DSM_FREQ_PREFETCH;
- cmnd = &nvmeq->sq_cmds[nvmeq->sq_tail];
- memset(cmnd, 0, sizeof(*cmnd));
-
- cmnd->rw.opcode = (rq_data_dir(req) ? nvme_cmd_write : nvme_cmd_read);
- cmnd->rw.command_id = req->tag;
- cmnd->rw.nsid = cpu_to_le32(ns->ns_id);
- cmnd->rw.prp1 = cpu_to_le64(sg_dma_address(iod->sg));
- cmnd->rw.prp2 = cpu_to_le64(iod->first_dma);
- cmnd->rw.slba = cpu_to_le64(nvme_block_nr(ns, blk_rq_pos(req)));
- cmnd->rw.length = cpu_to_le16((blk_rq_bytes(req) >> ns->lba_shift) - 1);
+ memset(&cmnd, 0, sizeof(cmnd));
+ cmnd.rw.opcode = (rq_data_dir(req) ? nvme_cmd_write : nvme_cmd_read);
+ cmnd.rw.command_id = req->tag;
+ cmnd.rw.nsid = cpu_to_le32(ns->ns_id);
+ cmnd.rw.prp1 = cpu_to_le64(sg_dma_address(iod->sg));
+ cmnd.rw.prp2 = cpu_to_le64(iod->first_dma);
+ cmnd.rw.slba = cpu_to_le64(nvme_block_nr(ns, blk_rq_pos(req)));
+ cmnd.rw.length = cpu_to_le16((blk_rq_bytes(req) >> ns->lba_shift) - 1);
- if (blk_integrity_rq(req)) {
- cmnd->rw.metadata = cpu_to_le64(sg_dma_address(iod->meta_sg));
+ if (ns->ms) {
switch (ns->pi_type) {
case NVME_NS_DPS_PI_TYPE3:
control |= NVME_RW_PRINFO_PRCHK_GUARD;
@@ -825,19 +827,21 @@ static int nvme_submit_iod(struct nvme_queue *nvmeq, struct nvme_iod *iod,
case NVME_NS_DPS_PI_TYPE2:
control |= NVME_RW_PRINFO_PRCHK_GUARD |
NVME_RW_PRINFO_PRCHK_REF;
- cmnd->rw.reftag = cpu_to_le32(
+ cmnd.rw.reftag = cpu_to_le32(
nvme_block_nr(ns, blk_rq_pos(req)));
break;
}
- } else if (ns->ms)
- control |= NVME_RW_PRINFO_PRACT;
+ if (blk_integrity_rq(req))
+ cmnd.rw.metadata =
+ cpu_to_le64(sg_dma_address(iod->meta_sg));
+ else
+ control |= NVME_RW_PRINFO_PRACT;
+ }
- cmnd->rw.control = cpu_to_le16(control);
- cmnd->rw.dsmgmt = cpu_to_le32(dsmgmt);
+ cmnd.rw.control = cpu_to_le16(control);
+ cmnd.rw.dsmgmt = cpu_to_le32(dsmgmt);
- if (++nvmeq->sq_tail == nvmeq->q_depth)
- nvmeq->sq_tail = 0;
- writel(nvmeq->sq_tail, nvmeq->q_db);
+ __nvme_submit_cmd(nvmeq, &cmnd);
return 0;
}
@@ -864,8 +868,7 @@ static int nvme_queue_rq(struct blk_mq_hw_ctx *hctx,
if (ns && ns->ms && !blk_integrity_rq(req)) {
if (!(ns->pi_type && ns->ms == 8) &&
req->cmd_type != REQ_TYPE_DRV_PRIV) {
- req->errors = -EFAULT;
- blk_mq_complete_request(req);
+ blk_mq_complete_request(req, -EFAULT);
return BLK_MQ_RQ_QUEUE_OK;
}
}
@@ -1080,7 +1083,8 @@ static int nvme_submit_async_admin_req(struct nvme_dev *dev)
c.common.command_id = req->tag;
blk_mq_free_request(req);
- return __nvme_submit_cmd(nvmeq, &c);
+ __nvme_submit_cmd(nvmeq, &c);
+ return 0;
}
static int nvme_submit_admin_async_cmd(struct nvme_dev *dev,
@@ -1103,7 +1107,8 @@ static int nvme_submit_admin_async_cmd(struct nvme_dev *dev,
cmd->common.command_id = req->tag;
- return nvme_submit_cmd(nvmeq, cmd);
+ nvme_submit_cmd(nvmeq, cmd);
+ return 0;
}
static int adapter_delete_queue(struct nvme_dev *dev, u8 opcode, u16 id)
@@ -1315,12 +1320,7 @@ static void nvme_abort_req(struct request *req)
dev_warn(nvmeq->q_dmadev, "Aborting I/O %d QID %d\n", req->tag,
nvmeq->qid);
- if (nvme_submit_cmd(dev->queues[0], &cmd) < 0) {
- dev_warn(nvmeq->q_dmadev,
- "Could not abort I/O %d QID %d",
- req->tag, nvmeq->qid);
- blk_mq_free_request(abort_req);
- }
+ nvme_submit_cmd(dev->queues[0], &cmd);
}
static void nvme_cancel_queue_ios(struct request *req, void *data, bool reserved)
@@ -1374,7 +1374,8 @@ static void nvme_free_queue(struct nvme_queue *nvmeq)
{
dma_free_coherent(nvmeq->q_dmadev, CQ_SIZE(nvmeq->q_depth),
(void *)nvmeq->cqes, nvmeq->cq_dma_addr);
- dma_free_coherent(nvmeq->q_dmadev, SQ_SIZE(nvmeq->q_depth),
+ if (nvmeq->sq_cmds)
+ dma_free_coherent(nvmeq->q_dmadev, SQ_SIZE(nvmeq->q_depth),
nvmeq->sq_cmds, nvmeq->sq_dma_addr);
kfree(nvmeq);
}
@@ -1447,6 +1448,47 @@ static void nvme_disable_queue(struct nvme_dev *dev, int qid)
spin_unlock_irq(&nvmeq->q_lock);
}
+static int nvme_cmb_qdepth(struct nvme_dev *dev, int nr_io_queues,
+ int entry_size)
+{
+ int q_depth = dev->q_depth;
+ unsigned q_size_aligned = roundup(q_depth * entry_size, dev->page_size);
+
+ if (q_size_aligned * nr_io_queues > dev->cmb_size) {
+ u64 mem_per_q = div_u64(dev->cmb_size, nr_io_queues);
+ mem_per_q = round_down(mem_per_q, dev->page_size);
+ q_depth = div_u64(mem_per_q, entry_size);
+
+ /*
+ * Ensure the reduced q_depth is above some threshold where it
+ * would be better to map queues in system memory with the
+ * original depth
+ */
+ if (q_depth < 64)
+ return -ENOMEM;
+ }
+
+ return q_depth;
+}
+
+static int nvme_alloc_sq_cmds(struct nvme_dev *dev, struct nvme_queue *nvmeq,
+ int qid, int depth)
+{
+ if (qid && dev->cmb && use_cmb_sqes && NVME_CMB_SQS(dev->cmbsz)) {
+ unsigned offset = (qid - 1) *
+ roundup(SQ_SIZE(depth), dev->page_size);
+ nvmeq->sq_dma_addr = dev->cmb_dma_addr + offset;
+ nvmeq->sq_cmds_io = dev->cmb + offset;
+ } else {
+ nvmeq->sq_cmds = dma_alloc_coherent(dev->dev, SQ_SIZE(depth),
+ &nvmeq->sq_dma_addr, GFP_KERNEL);
+ if (!nvmeq->sq_cmds)
+ return -ENOMEM;
+ }
+
+ return 0;
+}
+
static struct nvme_queue *nvme_alloc_queue(struct nvme_dev *dev, int qid,
int depth)
{
@@ -1459,9 +1501,7 @@ static struct nvme_queue *nvme_alloc_queue(struct nvme_dev *dev, int qid,
if (!nvmeq->cqes)
goto free_nvmeq;
- nvmeq->sq_cmds = dma_alloc_coherent(dev->dev, SQ_SIZE(depth),
- &nvmeq->sq_dma_addr, GFP_KERNEL);
- if (!nvmeq->sq_cmds)
+ if (nvme_alloc_sq_cmds(dev, nvmeq, qid, depth))
goto free_cqdma;
nvmeq->q_dmadev = dev->dev;
@@ -1696,6 +1736,12 @@ static int nvme_configure_admin_queue(struct nvme_dev *dev)
page_shift = dev_page_max;
}
+ dev->subsystem = readl(&dev->bar->vs) >= NVME_VS(1, 1) ?
+ NVME_CAP_NSSRC(cap) : 0;
+
+ if (dev->subsystem && (readl(&dev->bar->csts) & NVME_CSTS_NSSRO))
+ writel(NVME_CSTS_NSSRO, &dev->bar->csts);
+
result = nvme_disable_ctrl(dev, cap);
if (result < 0)
return result;
@@ -1764,7 +1810,7 @@ static int nvme_submit_io(struct nvme_ns *ns, struct nvme_user_io __user *uio)
length = (io.nblocks + 1) << ns->lba_shift;
meta_len = (io.nblocks + 1) * ns->ms;
- metadata = (void __user *)(unsigned long)io.metadata;
+ metadata = (void __user *)(uintptr_t)io.metadata;
write = io.opcode & 1;
if (ns->ext) {
@@ -1804,7 +1850,7 @@ static int nvme_submit_io(struct nvme_ns *ns, struct nvme_user_io __user *uio)
c.rw.metadata = cpu_to_le64(meta_dma);
status = __nvme_submit_sync_cmd(ns->queue, &c, NULL,
- (void __user *)io.addr, length, NULL, 0);
+ (void __user *)(uintptr_t)io.addr, length, NULL, 0);
unmap:
if (meta) {
if (status == NVME_SC_SUCCESS && !write) {
@@ -1846,7 +1892,7 @@ static int nvme_user_cmd(struct nvme_dev *dev, struct nvme_ns *ns,
timeout = msecs_to_jiffies(cmd.timeout_ms);
status = __nvme_submit_sync_cmd(ns ? ns->queue : dev->admin_q, &c,
- NULL, (void __user *)cmd.addr, cmd.data_len,
+ NULL, (void __user *)(uintptr_t)cmd.addr, cmd.data_len,
&cmd.result, timeout);
if (status >= 0) {
if (put_user(cmd.result, &ucmd->result))
@@ -1856,6 +1902,15 @@ static int nvme_user_cmd(struct nvme_dev *dev, struct nvme_ns *ns,
return status;
}
+static int nvme_subsys_reset(struct nvme_dev *dev)
+{
+ if (!dev->subsystem)
+ return -ENOTTY;
+
+ writel(0x4E564D65, &dev->bar->nssr); /* "NVMe" */
+ return 0;
+}
+
static int nvme_ioctl(struct block_device *bdev, fmode_t mode, unsigned int cmd,
unsigned long arg)
{
@@ -1935,7 +1990,7 @@ static void nvme_config_discard(struct nvme_ns *ns)
ns->queue->limits.discard_zeroes_data = 0;
ns->queue->limits.discard_alignment = logical_block_size;
ns->queue->limits.discard_granularity = logical_block_size;
- ns->queue->limits.max_discard_sectors = 0xffffffff;
+ blk_queue_max_discard_sectors(ns->queue, 0xffffffff);
queue_flag_set_unlocked(QUEUE_FLAG_DISCARD, ns->queue);
}
@@ -1989,7 +2044,7 @@ static int nvme_revalidate_disk(struct gendisk *disk)
!ns->ext)
nvme_init_integrity(ns);
- if (ns->ms && !blk_get_integrity(disk))
+ if (ns->ms && !(ns->ms == 8 && ns->pi_type) && !blk_get_integrity(disk))
set_capacity(disk, 0);
else
set_capacity(disk, le64_to_cpup(&id->nsze) << (ns->lba_shift - 9));
@@ -2020,7 +2075,10 @@ static int nvme_kthread(void *data)
spin_lock(&dev_list_lock);
list_for_each_entry_safe(dev, next, &dev_list, node) {
int i;
- if (readl(&dev->bar->csts) & NVME_CSTS_CFS) {
+ u32 csts = readl(&dev->bar->csts);
+
+ if ((dev->subsystem && (csts & NVME_CSTS_NSSRO)) ||
+ csts & NVME_CSTS_CFS) {
if (work_busy(&dev->reset_work))
continue;
list_del_init(&dev->node);
@@ -2067,7 +2125,6 @@ static void nvme_alloc_ns(struct nvme_dev *dev, unsigned nsid)
goto out_free_ns;
queue_flag_set_unlocked(QUEUE_FLAG_NOMERGES, ns->queue);
queue_flag_set_unlocked(QUEUE_FLAG_NONROT, ns->queue);
- queue_flag_set_unlocked(QUEUE_FLAG_SG_GAPS, ns->queue);
ns->dev = dev;
ns->queue->queuedata = ns;
@@ -2081,12 +2138,16 @@ static void nvme_alloc_ns(struct nvme_dev *dev, unsigned nsid)
list_add_tail(&ns->list, &dev->namespaces);
blk_queue_logical_block_size(ns->queue, 1 << ns->lba_shift);
- if (dev->max_hw_sectors)
+ if (dev->max_hw_sectors) {
blk_queue_max_hw_sectors(ns->queue, dev->max_hw_sectors);
+ blk_queue_max_segments(ns->queue,
+ ((dev->max_hw_sectors << 9) / dev->page_size) + 1);
+ }
if (dev->stripe_size)
blk_queue_chunk_sectors(ns->queue, dev->stripe_size >> 9);
if (dev->vwc & NVME_CTRL_VWC_PRESENT)
blk_queue_flush(ns->queue, REQ_FLUSH | REQ_FUA);
+ blk_queue_virt_boundary(ns->queue, dev->page_size - 1);
disk->major = nvme_major;
disk->first_minor = 0;
@@ -2159,6 +2220,58 @@ static int set_queue_count(struct nvme_dev *dev, int count)
return min(result & 0xffff, result >> 16) + 1;
}
+static void __iomem *nvme_map_cmb(struct nvme_dev *dev)
+{
+ u64 szu, size, offset;
+ u32 cmbloc;
+ resource_size_t bar_size;
+ struct pci_dev *pdev = to_pci_dev(dev->dev);
+ void __iomem *cmb;
+ dma_addr_t dma_addr;
+
+ if (!use_cmb_sqes)
+ return NULL;
+
+ dev->cmbsz = readl(&dev->bar->cmbsz);
+ if (!(NVME_CMB_SZ(dev->cmbsz)))
+ return NULL;
+
+ cmbloc = readl(&dev->bar->cmbloc);
+
+ szu = (u64)1 << (12 + 4 * NVME_CMB_SZU(dev->cmbsz));
+ size = szu * NVME_CMB_SZ(dev->cmbsz);
+ offset = szu * NVME_CMB_OFST(cmbloc);
+ bar_size = pci_resource_len(pdev, NVME_CMB_BIR(cmbloc));
+
+ if (offset > bar_size)
+ return NULL;
+
+ /*
+ * Controllers may support a CMB size larger than their BAR,
+ * for example, due to being behind a bridge. Reduce the CMB to
+ * the reported size of the BAR
+ */
+ if (size > bar_size - offset)
+ size = bar_size - offset;
+
+ dma_addr = pci_resource_start(pdev, NVME_CMB_BIR(cmbloc)) + offset;
+ cmb = ioremap_wc(dma_addr, size);
+ if (!cmb)
+ return NULL;
+
+ dev->cmb_dma_addr = dma_addr;
+ dev->cmb_size = size;
+ return cmb;
+}
+
+static inline void nvme_release_cmb(struct nvme_dev *dev)
+{
+ if (dev->cmb) {
+ iounmap(dev->cmb);
+ dev->cmb = NULL;
+ }
+}
+
static size_t db_bar_size(struct nvme_dev *dev, unsigned nr_io_queues)
{
return 4096 + ((nr_io_queues + 1) * 8 * dev->db_stride);
@@ -2177,6 +2290,15 @@ static int nvme_setup_io_queues(struct nvme_dev *dev)
if (result < nr_io_queues)
nr_io_queues = result;
+ if (dev->cmb && NVME_CMB_SQS(dev->cmbsz)) {
+ result = nvme_cmb_qdepth(dev, nr_io_queues,
+ sizeof(struct nvme_command));
+ if (result > 0)
+ dev->q_depth = result;
+ else
+ nvme_release_cmb(dev);
+ }
+
size = db_bar_size(dev, nr_io_queues);
if (size > 8192) {
iounmap(dev->bar);
@@ -2321,6 +2443,22 @@ static void nvme_scan_namespaces(struct nvme_dev *dev, unsigned nn)
list_sort(NULL, &dev->namespaces, ns_cmp);
}
+static void nvme_set_irq_hints(struct nvme_dev *dev)
+{
+ struct nvme_queue *nvmeq;
+ int i;
+
+ for (i = 0; i < dev->online_queues; i++) {
+ nvmeq = dev->queues[i];
+
+ if (!nvmeq->tags || !(*nvmeq->tags))
+ continue;
+
+ irq_set_affinity_hint(dev->entry[nvmeq->cq_vector].vector,
+ blk_mq_tags_cpumask(*nvmeq->tags));
+ }
+}
+
static void nvme_dev_scan(struct work_struct *work)
{
struct nvme_dev *dev = container_of(work, struct nvme_dev, scan_work);
@@ -2332,6 +2470,7 @@ static void nvme_dev_scan(struct work_struct *work)
return;
nvme_scan_namespaces(dev, le32_to_cpup(&ctrl->nn));
kfree(ctrl);
+ nvme_set_irq_hints(dev);
}
/*
@@ -2344,7 +2483,6 @@ static int nvme_dev_add(struct nvme_dev *dev)
{
struct pci_dev *pdev = to_pci_dev(dev->dev);
int res;
- unsigned nn;
struct nvme_id_ctrl *ctrl;
int shift = NVME_CAP_MPSMIN(readq(&dev->bar->cap)) + 12;
@@ -2354,7 +2492,6 @@ static int nvme_dev_add(struct nvme_dev *dev)
return -EIO;
}
- nn = le32_to_cpup(&ctrl->nn);
dev->oncs = le16_to_cpup(&ctrl->oncs);
dev->abort_limit = ctrl->acl + 1;
dev->vwc = ctrl->vwc;
@@ -2440,6 +2577,8 @@ static int nvme_dev_map(struct nvme_dev *dev)
dev->q_depth = min_t(int, NVME_CAP_MQES(cap) + 1, NVME_Q_DEPTH);
dev->db_stride = 1 << NVME_CAP_STRIDE(cap);
dev->dbs = ((void __iomem *)dev->bar) + 4096;
+ if (readl(&dev->bar->vs) >= NVME_VS(1, 2))
+ dev->cmb = nvme_map_cmb(dev);
return 0;
@@ -2820,6 +2959,8 @@ static long nvme_dev_ioctl(struct file *f, unsigned int cmd, unsigned long arg)
case NVME_IOCTL_RESET:
dev_warn(dev->dev, "resetting controller\n");
return nvme_reset(dev);
+ case NVME_IOCTL_SUBSYS_RESET:
+ return nvme_subsys_reset(dev);
default:
return -ENOTTY;
}
@@ -2833,22 +2974,6 @@ static const struct file_operations nvme_dev_fops = {
.compat_ioctl = nvme_dev_ioctl,
};
-static void nvme_set_irq_hints(struct nvme_dev *dev)
-{
- struct nvme_queue *nvmeq;
- int i;
-
- for (i = 0; i < dev->online_queues; i++) {
- nvmeq = dev->queues[i];
-
- if (!nvmeq->tags || !(*nvmeq->tags))
- continue;
-
- irq_set_affinity_hint(dev->entry[nvmeq->cq_vector].vector,
- blk_mq_tags_cpumask(*nvmeq->tags));
- }
-}
-
static int nvme_dev_start(struct nvme_dev *dev)
{
int result;
@@ -2890,8 +3015,6 @@ static int nvme_dev_start(struct nvme_dev *dev)
if (result)
goto free_tags;
- nvme_set_irq_hints(dev);
-
dev->event_limit = 1;
return result;
@@ -2942,7 +3065,6 @@ static int nvme_dev_resume(struct nvme_dev *dev)
} else {
nvme_unfreeze_queues(dev);
nvme_dev_add(dev);
- nvme_set_irq_hints(dev);
}
return 0;
}
@@ -3145,6 +3267,7 @@ static void nvme_remove(struct pci_dev *pdev)
nvme_dev_remove_admin(dev);
device_destroy(nvme_class, MKDEV(nvme_char_major, dev->instance));
nvme_free_queues(dev, 0);
+ nvme_release_cmb(dev);
nvme_release_prp_pools(dev);
kref_put(&dev->kref, nvme_free_dev);
}
diff --git a/drivers/block/pktcdvd.c b/drivers/block/pktcdvd.c
index 4c20c2281..7be2375db 100644
--- a/drivers/block/pktcdvd.c
+++ b/drivers/block/pktcdvd.c
@@ -977,7 +977,7 @@ static void pkt_make_local_copy(struct packet_data *pkt, struct bio_vec *bvec)
}
}
-static void pkt_end_io_read(struct bio *bio, int err)
+static void pkt_end_io_read(struct bio *bio)
{
struct packet_data *pkt = bio->bi_private;
struct pktcdvd_device *pd = pkt->pd;
@@ -985,9 +985,9 @@ static void pkt_end_io_read(struct bio *bio, int err)
pkt_dbg(2, pd, "bio=%p sec0=%llx sec=%llx err=%d\n",
bio, (unsigned long long)pkt->sector,
- (unsigned long long)bio->bi_iter.bi_sector, err);
+ (unsigned long long)bio->bi_iter.bi_sector, bio->bi_error);
- if (err)
+ if (bio->bi_error)
atomic_inc(&pkt->io_errors);
if (atomic_dec_and_test(&pkt->io_wait)) {
atomic_inc(&pkt->run_sm);
@@ -996,13 +996,13 @@ static void pkt_end_io_read(struct bio *bio, int err)
pkt_bio_finished(pd);
}
-static void pkt_end_io_packet_write(struct bio *bio, int err)
+static void pkt_end_io_packet_write(struct bio *bio)
{
struct packet_data *pkt = bio->bi_private;
struct pktcdvd_device *pd = pkt->pd;
BUG_ON(!pd);
- pkt_dbg(2, pd, "id=%d, err=%d\n", pkt->id, err);
+ pkt_dbg(2, pd, "id=%d, err=%d\n", pkt->id, bio->bi_error);
pd->stats.pkt_ended++;
@@ -1340,22 +1340,22 @@ static void pkt_start_write(struct pktcdvd_device *pd, struct packet_data *pkt)
pkt_queue_bio(pd, pkt->w_bio);
}
-static void pkt_finish_packet(struct packet_data *pkt, int uptodate)
+static void pkt_finish_packet(struct packet_data *pkt, int error)
{
struct bio *bio;
- if (!uptodate)
+ if (error)
pkt->cache_valid = 0;
/* Finish all bios corresponding to this packet */
- while ((bio = bio_list_pop(&pkt->orig_bios)))
- bio_endio(bio, uptodate ? 0 : -EIO);
+ while ((bio = bio_list_pop(&pkt->orig_bios))) {
+ bio->bi_error = error;
+ bio_endio(bio);
+ }
}
static void pkt_run_state_machine(struct pktcdvd_device *pd, struct packet_data *pkt)
{
- int uptodate;
-
pkt_dbg(2, pd, "pkt %d\n", pkt->id);
for (;;) {
@@ -1384,7 +1384,7 @@ static void pkt_run_state_machine(struct pktcdvd_device *pd, struct packet_data
if (atomic_read(&pkt->io_wait) > 0)
return;
- if (test_bit(BIO_UPTODATE, &pkt->w_bio->bi_flags)) {
+ if (!pkt->w_bio->bi_error) {
pkt_set_state(pkt, PACKET_FINISHED_STATE);
} else {
pkt_set_state(pkt, PACKET_RECOVERY_STATE);
@@ -1401,8 +1401,7 @@ static void pkt_run_state_machine(struct pktcdvd_device *pd, struct packet_data
break;
case PACKET_FINISHED_STATE:
- uptodate = test_bit(BIO_UPTODATE, &pkt->w_bio->bi_flags);
- pkt_finish_packet(pkt, uptodate);
+ pkt_finish_packet(pkt, pkt->w_bio->bi_error);
return;
default:
@@ -2332,13 +2331,14 @@ static void pkt_close(struct gendisk *disk, fmode_t mode)
}
-static void pkt_end_io_read_cloned(struct bio *bio, int err)
+static void pkt_end_io_read_cloned(struct bio *bio)
{
struct packet_stacked_data *psd = bio->bi_private;
struct pktcdvd_device *pd = psd->pd;
+ psd->bio->bi_error = bio->bi_error;
bio_put(bio);
- bio_endio(psd->bio, err);
+ bio_endio(psd->bio);
mempool_free(psd, psd_pool);
pkt_bio_finished(pd);
}
@@ -2447,6 +2447,10 @@ static void pkt_make_request(struct request_queue *q, struct bio *bio)
char b[BDEVNAME_SIZE];
struct bio *split;
+ blk_queue_bounce(q, &bio);
+
+ blk_queue_split(q, &bio, q->bio_split);
+
pd = q->queuedata;
if (!pd) {
pr_err("%s incorrect request queue\n",
@@ -2477,8 +2481,6 @@ static void pkt_make_request(struct request_queue *q, struct bio *bio)
goto end_io;
}
- blk_queue_bounce(q, &bio);
-
do {
sector_t zone = get_zone(bio->bi_iter.bi_sector, pd);
sector_t last_zone = get_zone(bio_end_sector(bio) - 1, pd);
@@ -2504,26 +2506,6 @@ end_io:
-static int pkt_merge_bvec(struct request_queue *q, struct bvec_merge_data *bmd,
- struct bio_vec *bvec)
-{
- struct pktcdvd_device *pd = q->queuedata;
- sector_t zone = get_zone(bmd->bi_sector, pd);
- int used = ((bmd->bi_sector - zone) << 9) + bmd->bi_size;
- int remaining = (pd->settings.size << 9) - used;
- int remaining2;
-
- /*
- * A bio <= PAGE_SIZE must be allowed. If it crosses a packet
- * boundary, pkt_make_request() will split the bio.
- */
- remaining2 = PAGE_SIZE - bmd->bi_size;
- remaining = max(remaining, remaining2);
-
- BUG_ON(remaining < 0);
- return remaining;
-}
-
static void pkt_init_queue(struct pktcdvd_device *pd)
{
struct request_queue *q = pd->disk->queue;
@@ -2531,7 +2513,6 @@ static void pkt_init_queue(struct pktcdvd_device *pd)
blk_queue_make_request(q, pkt_make_request);
blk_queue_logical_block_size(q, CD_FRAMESIZE);
blk_queue_max_hw_sectors(q, PACKET_MAX_SECTORS);
- blk_queue_merge_bvec(q, pkt_merge_bvec);
q->queuedata = pd;
}
diff --git a/drivers/block/ps3vram.c b/drivers/block/ps3vram.c
index b1612eb16..d89fcac59 100644
--- a/drivers/block/ps3vram.c
+++ b/drivers/block/ps3vram.c
@@ -593,7 +593,8 @@ out:
next = bio_list_peek(&priv->list);
spin_unlock_irq(&priv->lock);
- bio_endio(bio, error);
+ bio->bi_error = error;
+ bio_endio(bio);
return next;
}
@@ -605,6 +606,8 @@ static void ps3vram_make_request(struct request_queue *q, struct bio *bio)
dev_dbg(&dev->core, "%s\n", __func__);
+ blk_queue_split(q, &bio, q->bio_split);
+
spin_lock_irq(&priv->lock);
busy = !bio_list_empty(&priv->list);
bio_list_add(&priv->list, bio);
diff --git a/drivers/block/rbd.c b/drivers/block/rbd.c
index 324bf35ec..128e7df5b 100644
--- a/drivers/block/rbd.c
+++ b/drivers/block/rbd.c
@@ -96,6 +96,8 @@ static int atomic_dec_return_safe(atomic_t *v)
#define RBD_MINORS_PER_MAJOR 256
#define RBD_SINGLE_MAJOR_PART_SHIFT 4
+#define RBD_MAX_PARENT_CHAIN_LEN 16
+
#define RBD_SNAP_DEV_NAME_PREFIX "snap_"
#define RBD_MAX_SNAP_NAME_LEN \
(NAME_MAX - (sizeof (RBD_SNAP_DEV_NAME_PREFIX) - 1))
@@ -426,7 +428,7 @@ static ssize_t rbd_add_single_major(struct bus_type *bus, const char *buf,
size_t count);
static ssize_t rbd_remove_single_major(struct bus_type *bus, const char *buf,
size_t count);
-static int rbd_dev_image_probe(struct rbd_device *rbd_dev, bool mapping);
+static int rbd_dev_image_probe(struct rbd_device *rbd_dev, int depth);
static void rbd_spec_put(struct rbd_spec *spec);
static int rbd_dev_id_to_minor(int dev_id)
@@ -1863,9 +1865,11 @@ static void rbd_osd_req_callback(struct ceph_osd_request *osd_req,
rbd_osd_read_callback(obj_request);
break;
case CEPH_OSD_OP_SETALLOCHINT:
- rbd_assert(osd_req->r_ops[1].op == CEPH_OSD_OP_WRITE);
+ rbd_assert(osd_req->r_ops[1].op == CEPH_OSD_OP_WRITE ||
+ osd_req->r_ops[1].op == CEPH_OSD_OP_WRITEFULL);
/* fall through */
case CEPH_OSD_OP_WRITE:
+ case CEPH_OSD_OP_WRITEFULL:
rbd_osd_write_callback(obj_request);
break;
case CEPH_OSD_OP_STAT:
@@ -2401,7 +2405,10 @@ static void rbd_img_obj_request_fill(struct rbd_obj_request *obj_request,
opcode = CEPH_OSD_OP_ZERO;
}
} else if (op_type == OBJ_OP_WRITE) {
- opcode = CEPH_OSD_OP_WRITE;
+ if (!offset && length == object_size)
+ opcode = CEPH_OSD_OP_WRITEFULL;
+ else
+ opcode = CEPH_OSD_OP_WRITE;
osd_req_op_alloc_hint_init(osd_request, num_ops,
object_size, object_size);
num_ops++;
@@ -3474,52 +3481,6 @@ static int rbd_queue_rq(struct blk_mq_hw_ctx *hctx,
return BLK_MQ_RQ_QUEUE_OK;
}
-/*
- * a queue callback. Makes sure that we don't create a bio that spans across
- * multiple osd objects. One exception would be with a single page bios,
- * which we handle later at bio_chain_clone_range()
- */
-static int rbd_merge_bvec(struct request_queue *q, struct bvec_merge_data *bmd,
- struct bio_vec *bvec)
-{
- struct rbd_device *rbd_dev = q->queuedata;
- sector_t sector_offset;
- sector_t sectors_per_obj;
- sector_t obj_sector_offset;
- int ret;
-
- /*
- * Find how far into its rbd object the partition-relative
- * bio start sector is to offset relative to the enclosing
- * device.
- */
- sector_offset = get_start_sect(bmd->bi_bdev) + bmd->bi_sector;
- sectors_per_obj = 1 << (rbd_dev->header.obj_order - SECTOR_SHIFT);
- obj_sector_offset = sector_offset & (sectors_per_obj - 1);
-
- /*
- * Compute the number of bytes from that offset to the end
- * of the object. Account for what's already used by the bio.
- */
- ret = (int) (sectors_per_obj - obj_sector_offset) << SECTOR_SHIFT;
- if (ret > bmd->bi_size)
- ret -= bmd->bi_size;
- else
- ret = 0;
-
- /*
- * Don't send back more than was asked for. And if the bio
- * was empty, let the whole thing through because: "Note
- * that a block device *must* allow a single page to be
- * added to an empty bio."
- */
- rbd_assert(bvec->bv_len <= PAGE_SIZE);
- if (ret > (int) bvec->bv_len || !bmd->bi_size)
- ret = (int) bvec->bv_len;
-
- return ret;
-}
-
static void rbd_free_disk(struct rbd_device *rbd_dev)
{
struct gendisk *disk = rbd_dev->disk;
@@ -3806,6 +3767,7 @@ static int rbd_init_disk(struct rbd_device *rbd_dev)
/* set io sizes to object size */
segment_size = rbd_obj_bytes(&rbd_dev->header);
blk_queue_max_hw_sectors(q, segment_size / SECTOR_SIZE);
+ q->limits.max_sectors = queue_max_hw_sectors(q);
blk_queue_max_segments(q, segment_size / SECTOR_SIZE);
blk_queue_max_segment_size(q, segment_size);
blk_queue_io_min(q, segment_size);
@@ -3815,10 +3777,12 @@ static int rbd_init_disk(struct rbd_device *rbd_dev)
queue_flag_set_unlocked(QUEUE_FLAG_DISCARD, q);
q->limits.discard_granularity = segment_size;
q->limits.discard_alignment = segment_size;
- q->limits.max_discard_sectors = segment_size / SECTOR_SIZE;
+ blk_queue_max_discard_sectors(q, segment_size / SECTOR_SIZE);
q->limits.discard_zeroes_data = 1;
- blk_queue_merge_bvec(q, rbd_merge_bvec);
+ if (!ceph_test_opt(rbd_dev->rbd_client->client, NOCRC))
+ q->backing_dev_info.capabilities |= BDI_CAP_STABLE_WRITES;
+
disk->queue = q;
q->queuedata = rbd_dev;
@@ -4720,7 +4684,10 @@ static int rbd_dev_v2_header_info(struct rbd_device *rbd_dev)
}
ret = rbd_dev_v2_snap_context(rbd_dev);
- dout("rbd_dev_v2_snap_context returned %d\n", ret);
+ if (ret && first_time) {
+ kfree(rbd_dev->header.object_prefix);
+ rbd_dev->header.object_prefix = NULL;
+ }
return ret;
}
@@ -5169,44 +5136,51 @@ out_err:
return ret;
}
-static int rbd_dev_probe_parent(struct rbd_device *rbd_dev)
+/*
+ * @depth is rbd_dev_image_probe() -> rbd_dev_probe_parent() ->
+ * rbd_dev_image_probe() recursion depth, which means it's also the
+ * length of the already discovered part of the parent chain.
+ */
+static int rbd_dev_probe_parent(struct rbd_device *rbd_dev, int depth)
{
struct rbd_device *parent = NULL;
- struct rbd_spec *parent_spec;
- struct rbd_client *rbdc;
int ret;
if (!rbd_dev->parent_spec)
return 0;
- /*
- * We need to pass a reference to the client and the parent
- * spec when creating the parent rbd_dev. Images related by
- * parent/child relationships always share both.
- */
- parent_spec = rbd_spec_get(rbd_dev->parent_spec);
- rbdc = __rbd_get_client(rbd_dev->rbd_client);
- ret = -ENOMEM;
- parent = rbd_dev_create(rbdc, parent_spec, NULL);
- if (!parent)
+ if (++depth > RBD_MAX_PARENT_CHAIN_LEN) {
+ pr_info("parent chain is too long (%d)\n", depth);
+ ret = -EINVAL;
goto out_err;
+ }
- ret = rbd_dev_image_probe(parent, false);
+ parent = rbd_dev_create(rbd_dev->rbd_client, rbd_dev->parent_spec,
+ NULL);
+ if (!parent) {
+ ret = -ENOMEM;
+ goto out_err;
+ }
+
+ /*
+ * Images related by parent/child relationships always share
+ * rbd_client and spec/parent_spec, so bump their refcounts.
+ */
+ __rbd_get_client(rbd_dev->rbd_client);
+ rbd_spec_get(rbd_dev->parent_spec);
+
+ ret = rbd_dev_image_probe(parent, depth);
if (ret < 0)
goto out_err;
+
rbd_dev->parent = parent;
atomic_set(&rbd_dev->parent_ref, 1);
-
return 0;
+
out_err:
- if (parent) {
- rbd_dev_unparent(rbd_dev);
+ rbd_dev_unparent(rbd_dev);
+ if (parent)
rbd_dev_destroy(parent);
- } else {
- rbd_put_client(rbdc);
- rbd_spec_put(parent_spec);
- }
-
return ret;
}
@@ -5324,7 +5298,7 @@ static void rbd_dev_image_release(struct rbd_device *rbd_dev)
* parent), initiate a watch on its header object before using that
* object to get detailed information about the rbd image.
*/
-static int rbd_dev_image_probe(struct rbd_device *rbd_dev, bool mapping)
+static int rbd_dev_image_probe(struct rbd_device *rbd_dev, int depth)
{
int ret;
@@ -5342,7 +5316,7 @@ static int rbd_dev_image_probe(struct rbd_device *rbd_dev, bool mapping)
if (ret)
goto err_out_format;
- if (mapping) {
+ if (!depth) {
ret = rbd_dev_header_watch_sync(rbd_dev);
if (ret) {
if (ret == -ENOENT)
@@ -5363,7 +5337,7 @@ static int rbd_dev_image_probe(struct rbd_device *rbd_dev, bool mapping)
* Otherwise this is a parent image, identified by pool, image
* and snap ids - need to fill in names for those ids.
*/
- if (mapping)
+ if (!depth)
ret = rbd_spec_fill_snap_id(rbd_dev);
else
ret = rbd_spec_fill_names(rbd_dev);
@@ -5385,12 +5359,12 @@ static int rbd_dev_image_probe(struct rbd_device *rbd_dev, bool mapping)
* Need to warn users if this image is the one being
* mapped and has a parent.
*/
- if (mapping && rbd_dev->parent_spec)
+ if (!depth && rbd_dev->parent_spec)
rbd_warn(rbd_dev,
"WARNING: kernel layering is EXPERIMENTAL!");
}
- ret = rbd_dev_probe_parent(rbd_dev);
+ ret = rbd_dev_probe_parent(rbd_dev, depth);
if (ret)
goto err_out_probe;
@@ -5401,7 +5375,7 @@ static int rbd_dev_image_probe(struct rbd_device *rbd_dev, bool mapping)
err_out_probe:
rbd_dev_unprobe(rbd_dev);
err_out_watch:
- if (mapping)
+ if (!depth)
rbd_dev_header_unwatch_sync(rbd_dev);
out_header_name:
kfree(rbd_dev->header_name);
@@ -5464,7 +5438,7 @@ static ssize_t do_rbd_add(struct bus_type *bus,
spec = NULL; /* rbd_dev now owns this */
rbd_opts = NULL; /* rbd_dev now owns this */
- rc = rbd_dev_image_probe(rbd_dev, true);
+ rc = rbd_dev_image_probe(rbd_dev, 0);
if (rc < 0)
goto err_out_rbd_dev;
diff --git a/drivers/block/rsxx/dev.c b/drivers/block/rsxx/dev.c
index ac8c62cb4..3163e4cdc 100644
--- a/drivers/block/rsxx/dev.c
+++ b/drivers/block/rsxx/dev.c
@@ -137,7 +137,10 @@ static void bio_dma_done_cb(struct rsxx_cardinfo *card,
if (!card->eeh_state && card->gendisk)
disk_stats_complete(card, meta->bio, meta->start_time);
- bio_endio(meta->bio, atomic_read(&meta->error) ? -EIO : 0);
+ if (atomic_read(&meta->error))
+ bio_io_error(meta->bio);
+ else
+ bio_endio(meta->bio);
kmem_cache_free(bio_meta_pool, meta);
}
}
@@ -148,6 +151,8 @@ static void rsxx_make_request(struct request_queue *q, struct bio *bio)
struct rsxx_bio_meta *bio_meta;
int st = -EINVAL;
+ blk_queue_split(q, &bio, q->bio_split);
+
might_sleep();
if (!card)
@@ -199,7 +204,9 @@ static void rsxx_make_request(struct request_queue *q, struct bio *bio)
queue_err:
kmem_cache_free(bio_meta_pool, bio_meta);
req_err:
- bio_endio(bio, st);
+ if (st)
+ bio->bi_error = st;
+ bio_endio(bio);
}
/*----------------- Device Setup -------------------*/
diff --git a/drivers/block/skd_main.c b/drivers/block/skd_main.c
index 1e46eb230..586f9168f 100644
--- a/drivers/block/skd_main.c
+++ b/drivers/block/skd_main.c
@@ -4422,7 +4422,7 @@ static int skd_cons_disk(struct skd_device *skdev)
/* DISCARD Flag initialization. */
q->limits.discard_granularity = 8192;
q->limits.discard_alignment = 0;
- q->limits.max_discard_sectors = UINT_MAX >> 9;
+ blk_queue_max_discard_sectors(q, UINT_MAX >> 9);
q->limits.discard_zeroes_data = 1;
queue_flag_set_unlocked(QUEUE_FLAG_DISCARD, q);
queue_flag_set_unlocked(QUEUE_FLAG_NONROT, q);
diff --git a/drivers/block/umem.c b/drivers/block/umem.c
index 4cf81b5bf..04d65790a 100644
--- a/drivers/block/umem.c
+++ b/drivers/block/umem.c
@@ -456,7 +456,7 @@ static void process_page(unsigned long data)
PCI_DMA_TODEVICE : PCI_DMA_FROMDEVICE);
if (control & DMASCR_HARD_ERROR) {
/* error */
- clear_bit(BIO_UPTODATE, &bio->bi_flags);
+ bio->bi_error = -EIO;
dev_printk(KERN_WARNING, &card->dev->dev,
"I/O error on sector %d/%d\n",
le32_to_cpu(desc->local_addr)>>9,
@@ -505,7 +505,7 @@ static void process_page(unsigned long data)
return_bio = bio->bi_next;
bio->bi_next = NULL;
- bio_endio(bio, 0);
+ bio_endio(bio);
}
}
@@ -531,6 +531,8 @@ static void mm_make_request(struct request_queue *q, struct bio *bio)
(unsigned long long)bio->bi_iter.bi_sector,
bio->bi_iter.bi_size);
+ blk_queue_split(q, &bio, q->bio_split);
+
spin_lock_irq(&card->lock);
*card->biotail = bio;
bio->bi_next = NULL;
diff --git a/drivers/block/virtio_blk.c b/drivers/block/virtio_blk.c
index d4d05f064..6ca35495a 100644
--- a/drivers/block/virtio_blk.c
+++ b/drivers/block/virtio_blk.c
@@ -144,7 +144,7 @@ static void virtblk_done(struct virtqueue *vq)
do {
virtqueue_disable_cb(vq);
while ((vbr = virtqueue_get_buf(vblk->vqs[qid].vq, &len)) != NULL) {
- blk_mq_complete_request(vbr->req);
+ blk_mq_complete_request(vbr->req, vbr->req->errors);
req_done = true;
}
if (unlikely(virtqueue_is_broken(vq)))
@@ -478,8 +478,7 @@ static int virtblk_get_cache_mode(struct virtio_device *vdev)
struct virtio_blk_config, wce,
&writeback);
if (err)
- writeback = virtio_has_feature(vdev, VIRTIO_BLK_F_WCE) ||
- virtio_has_feature(vdev, VIRTIO_F_VERSION_1);
+ writeback = virtio_has_feature(vdev, VIRTIO_BLK_F_WCE);
return writeback;
}
@@ -657,6 +656,7 @@ static int virtblk_probe(struct virtio_device *vdev)
vblk->disk->private_data = vblk;
vblk->disk->fops = &virtblk_fops;
vblk->disk->driverfs_dev = &vdev->dev;
+ vblk->disk->flags |= GENHD_FL_EXT_DEVT;
vblk->index = index;
/* configure queue flush support */
@@ -840,7 +840,7 @@ static unsigned int features_legacy[] = {
static unsigned int features[] = {
VIRTIO_BLK_F_SEG_MAX, VIRTIO_BLK_F_SIZE_MAX, VIRTIO_BLK_F_GEOMETRY,
VIRTIO_BLK_F_RO, VIRTIO_BLK_F_BLK_SIZE,
- VIRTIO_BLK_F_TOPOLOGY,
+ VIRTIO_BLK_F_WCE, VIRTIO_BLK_F_TOPOLOGY, VIRTIO_BLK_F_CONFIG_WCE,
VIRTIO_BLK_F_MQ,
};
diff --git a/drivers/block/xen-blkback/blkback.c b/drivers/block/xen-blkback/blkback.c
index 954c0029f..6a685aec6 100644
--- a/drivers/block/xen-blkback/blkback.c
+++ b/drivers/block/xen-blkback/blkback.c
@@ -1078,9 +1078,9 @@ static void __end_block_io_op(struct pending_req *pending_req, int error)
/*
* bio callback.
*/
-static void end_block_io_op(struct bio *bio, int error)
+static void end_block_io_op(struct bio *bio)
{
- __end_block_io_op(bio->bi_private, error);
+ __end_block_io_op(bio->bi_private, bio->bi_error);
bio_put(bio);
}
diff --git a/drivers/block/xen-blkfront.c b/drivers/block/xen-blkfront.c
index 7a8a73f1f..a69c02dad 100644
--- a/drivers/block/xen-blkfront.c
+++ b/drivers/block/xen-blkfront.c
@@ -37,6 +37,7 @@
#include <linux/interrupt.h>
#include <linux/blkdev.h>
+#include <linux/blk-mq.h>
#include <linux/hdreg.h>
#include <linux/cdrom.h>
#include <linux/module.h>
@@ -82,7 +83,6 @@ struct blk_shadow {
struct split_bio {
struct bio *bio;
atomic_t pending;
- int err;
};
static DEFINE_MUTEX(blkfront_mutex);
@@ -148,6 +148,7 @@ struct blkfront_info
unsigned int feature_persistent:1;
unsigned int max_indirect_segments;
int is_ready;
+ struct blk_mq_tag_set tag_set;
};
static unsigned int nr_minors;
@@ -248,7 +249,7 @@ static struct grant *get_grant(grant_ref_t *gref_head,
struct blkfront_info *info)
{
struct grant *gnt_list_entry;
- unsigned long buffer_mfn;
+ unsigned long buffer_gfn;
BUG_ON(list_empty(&info->grants));
gnt_list_entry = list_first_entry(&info->grants, struct grant,
@@ -267,10 +268,10 @@ static struct grant *get_grant(grant_ref_t *gref_head,
BUG_ON(!pfn);
gnt_list_entry->pfn = pfn;
}
- buffer_mfn = pfn_to_mfn(gnt_list_entry->pfn);
+ buffer_gfn = pfn_to_gfn(gnt_list_entry->pfn);
gnttab_grant_foreign_access_ref(gnt_list_entry->gref,
info->xbdev->otherend_id,
- buffer_mfn, 0);
+ buffer_gfn, 0);
return gnt_list_entry;
}
@@ -617,54 +618,41 @@ static inline bool blkif_request_flush_invalid(struct request *req,
!(info->feature_flush & REQ_FUA)));
}
-/*
- * do_blkif_request
- * read a block; request is in a request queue
- */
-static void do_blkif_request(struct request_queue *rq)
+static int blkif_queue_rq(struct blk_mq_hw_ctx *hctx,
+ const struct blk_mq_queue_data *qd)
{
- struct blkfront_info *info = NULL;
- struct request *req;
- int queued;
-
- pr_debug("Entered do_blkif_request\n");
+ struct blkfront_info *info = qd->rq->rq_disk->private_data;
- queued = 0;
-
- while ((req = blk_peek_request(rq)) != NULL) {
- info = req->rq_disk->private_data;
-
- if (RING_FULL(&info->ring))
- goto wait;
+ blk_mq_start_request(qd->rq);
+ spin_lock_irq(&info->io_lock);
+ if (RING_FULL(&info->ring))
+ goto out_busy;
- blk_start_request(req);
+ if (blkif_request_flush_invalid(qd->rq, info))
+ goto out_err;
- if (blkif_request_flush_invalid(req, info)) {
- __blk_end_request_all(req, -EOPNOTSUPP);
- continue;
- }
+ if (blkif_queue_request(qd->rq))
+ goto out_busy;
- pr_debug("do_blk_req %p: cmd %p, sec %lx, "
- "(%u/%u) [%s]\n",
- req, req->cmd, (unsigned long)blk_rq_pos(req),
- blk_rq_cur_sectors(req), blk_rq_sectors(req),
- rq_data_dir(req) ? "write" : "read");
-
- if (blkif_queue_request(req)) {
- blk_requeue_request(rq, req);
-wait:
- /* Avoid pointless unplugs. */
- blk_stop_queue(rq);
- break;
- }
+ flush_requests(info);
+ spin_unlock_irq(&info->io_lock);
+ return BLK_MQ_RQ_QUEUE_OK;
- queued++;
- }
+out_err:
+ spin_unlock_irq(&info->io_lock);
+ return BLK_MQ_RQ_QUEUE_ERROR;
- if (queued != 0)
- flush_requests(info);
+out_busy:
+ spin_unlock_irq(&info->io_lock);
+ blk_mq_stop_hw_queue(hctx);
+ return BLK_MQ_RQ_QUEUE_BUSY;
}
+static struct blk_mq_ops blkfront_mq_ops = {
+ .queue_rq = blkif_queue_rq,
+ .map_queue = blk_mq_map_queue,
+};
+
static int xlvbd_init_blk_queue(struct gendisk *gd, u16 sector_size,
unsigned int physical_sector_size,
unsigned int segments)
@@ -672,9 +660,22 @@ static int xlvbd_init_blk_queue(struct gendisk *gd, u16 sector_size,
struct request_queue *rq;
struct blkfront_info *info = gd->private_data;
- rq = blk_init_queue(do_blkif_request, &info->io_lock);
- if (rq == NULL)
+ memset(&info->tag_set, 0, sizeof(info->tag_set));
+ info->tag_set.ops = &blkfront_mq_ops;
+ info->tag_set.nr_hw_queues = 1;
+ info->tag_set.queue_depth = BLK_RING_SIZE(info);
+ info->tag_set.numa_node = NUMA_NO_NODE;
+ info->tag_set.flags = BLK_MQ_F_SHOULD_MERGE | BLK_MQ_F_SG_MERGE;
+ info->tag_set.cmd_size = 0;
+ info->tag_set.driver_data = info;
+
+ if (blk_mq_alloc_tag_set(&info->tag_set))
return -1;
+ rq = blk_mq_init_queue(&info->tag_set);
+ if (IS_ERR(rq)) {
+ blk_mq_free_tag_set(&info->tag_set);
+ return -1;
+ }
queue_flag_set_unlocked(QUEUE_FLAG_VIRT, rq);
@@ -902,19 +903,15 @@ static int xlvbd_alloc_gendisk(blkif_sector_t capacity,
static void xlvbd_release_gendisk(struct blkfront_info *info)
{
unsigned int minor, nr_minors;
- unsigned long flags;
if (info->rq == NULL)
return;
- spin_lock_irqsave(&info->io_lock, flags);
-
/* No more blkif_request(). */
- blk_stop_queue(info->rq);
+ blk_mq_stop_hw_queues(info->rq);
/* No more gnttab callback work. */
gnttab_cancel_free_callback(&info->callback);
- spin_unlock_irqrestore(&info->io_lock, flags);
/* Flush gnttab callback work. Must be done with no locks held. */
flush_work(&info->work);
@@ -926,20 +923,18 @@ static void xlvbd_release_gendisk(struct blkfront_info *info)
xlbd_release_minors(minor, nr_minors);
blk_cleanup_queue(info->rq);
+ blk_mq_free_tag_set(&info->tag_set);
info->rq = NULL;
put_disk(info->gd);
info->gd = NULL;
}
+/* Must be called with io_lock holded */
static void kick_pending_request_queues(struct blkfront_info *info)
{
- if (!RING_FULL(&info->ring)) {
- /* Re-enable calldowns. */
- blk_start_queue(info->rq);
- /* Kick things off immediately. */
- do_blkif_request(info->rq);
- }
+ if (!RING_FULL(&info->ring))
+ blk_mq_start_stopped_hw_queues(info->rq, true);
}
static void blkif_restart_queue(struct work_struct *work)
@@ -964,7 +959,7 @@ static void blkif_free(struct blkfront_info *info, int suspend)
BLKIF_STATE_SUSPENDED : BLKIF_STATE_DISCONNECTED;
/* No more blkif_request(). */
if (info->rq)
- blk_stop_queue(info->rq);
+ blk_mq_stop_hw_queues(info->rq);
/* Remove all persistent grants */
if (!list_empty(&info->grants)) {
@@ -1201,7 +1196,7 @@ static irqreturn_t blkif_interrupt(int irq, void *dev_id)
queue_flag_clear(QUEUE_FLAG_DISCARD, rq);
queue_flag_clear(QUEUE_FLAG_SECDISCARD, rq);
}
- __blk_end_request_all(req, error);
+ blk_mq_complete_request(req, error);
break;
case BLKIF_OP_FLUSH_DISKCACHE:
case BLKIF_OP_WRITE_BARRIER:
@@ -1229,7 +1224,7 @@ static irqreturn_t blkif_interrupt(int irq, void *dev_id)
dev_dbg(&info->xbdev->dev, "Bad return from blkdev data "
"request: %x\n", bret->status);
- __blk_end_request_all(req, error);
+ blk_mq_complete_request(req, error);
break;
default:
BUG();
@@ -1481,16 +1476,14 @@ static int blkfront_probe(struct xenbus_device *dev,
return 0;
}
-static void split_bio_end(struct bio *bio, int error)
+static void split_bio_end(struct bio *bio)
{
struct split_bio *split_bio = bio->bi_private;
- if (error)
- split_bio->err = error;
-
if (atomic_dec_and_test(&split_bio->pending)) {
split_bio->bio->bi_phys_segments = 0;
- bio_endio(split_bio->bio, split_bio->err);
+ split_bio->bio->bi_error = bio->bi_error;
+ bio_endio(split_bio->bio);
kfree(split_bio);
}
bio_put(bio);
@@ -1558,28 +1551,6 @@ static int blkif_recover(struct blkfront_info *info)
kfree(copy);
- /*
- * Empty the queue, this is important because we might have
- * requests in the queue with more segments than what we
- * can handle now.
- */
- spin_lock_irq(&info->io_lock);
- while ((req = blk_fetch_request(info->rq)) != NULL) {
- if (req->cmd_flags &
- (REQ_FLUSH | REQ_FUA | REQ_DISCARD | REQ_SECURE)) {
- list_add(&req->queuelist, &requests);
- continue;
- }
- merge_bio.head = req->bio;
- merge_bio.tail = req->biotail;
- bio_list_merge(&bio_list, &merge_bio);
- req->bio = NULL;
- if (req->cmd_flags & (REQ_FLUSH | REQ_FUA))
- pr_alert("diskcache flush request found!\n");
- __blk_end_request_all(req, 0);
- }
- spin_unlock_irq(&info->io_lock);
-
xenbus_switch_state(info->xbdev, XenbusStateConnected);
spin_lock_irq(&info->io_lock);
@@ -1594,9 +1565,10 @@ static int blkif_recover(struct blkfront_info *info)
/* Requeue pending requests (flush or discard) */
list_del_init(&req->queuelist);
BUG_ON(req->nr_phys_segments > segs);
- blk_requeue_request(info->rq, req);
+ blk_mq_requeue_request(req);
}
spin_unlock_irq(&info->io_lock);
+ blk_mq_kick_requeue_list(info->rq);
while ((bio = bio_list_pop(&bio_list)) != NULL) {
/* Traverse the list of pending bios and re-queue them */
@@ -1984,7 +1956,8 @@ static void blkback_changed(struct xenbus_device *dev,
break;
/* Missed the backend's Closing state -- fallthrough */
case XenbusStateClosing:
- blkfront_closing(info);
+ if (info)
+ blkfront_closing(info);
break;
}
}
diff --git a/drivers/block/zram/zram_drv.c b/drivers/block/zram/zram_drv.c
index 763301c78..9fa15bb9d 100644
--- a/drivers/block/zram/zram_drv.c
+++ b/drivers/block/zram/zram_drv.c
@@ -388,7 +388,6 @@ static ssize_t comp_algorithm_store(struct device *dev,
static ssize_t compact_store(struct device *dev,
struct device_attribute *attr, const char *buf, size_t len)
{
- unsigned long nr_migrated;
struct zram *zram = dev_to_zram(dev);
struct zram_meta *meta;
@@ -399,8 +398,7 @@ static ssize_t compact_store(struct device *dev,
}
meta = zram->meta;
- nr_migrated = zs_compact(meta->mem_pool);
- atomic64_add(nr_migrated, &zram->stats.num_migrated);
+ zs_compact(meta->mem_pool);
up_read(&zram->init_lock);
return len;
@@ -428,26 +426,31 @@ static ssize_t mm_stat_show(struct device *dev,
struct device_attribute *attr, char *buf)
{
struct zram *zram = dev_to_zram(dev);
+ struct zs_pool_stats pool_stats;
u64 orig_size, mem_used = 0;
long max_used;
ssize_t ret;
+ memset(&pool_stats, 0x00, sizeof(struct zs_pool_stats));
+
down_read(&zram->init_lock);
- if (init_done(zram))
+ if (init_done(zram)) {
mem_used = zs_get_total_pages(zram->meta->mem_pool);
+ zs_pool_stats(zram->meta->mem_pool, &pool_stats);
+ }
orig_size = atomic64_read(&zram->stats.pages_stored);
max_used = atomic_long_read(&zram->stats.max_used_pages);
ret = scnprintf(buf, PAGE_SIZE,
- "%8llu %8llu %8llu %8lu %8ld %8llu %8llu\n",
+ "%8llu %8llu %8llu %8lu %8ld %8llu %8lu\n",
orig_size << PAGE_SHIFT,
(u64)atomic64_read(&zram->stats.compr_data_size),
mem_used << PAGE_SHIFT,
zram->limit_pages << PAGE_SHIFT,
max_used << PAGE_SHIFT,
(u64)atomic64_read(&zram->stats.zero_pages),
- (u64)atomic64_read(&zram->stats.num_migrated));
+ pool_stats.pages_compacted);
up_read(&zram->init_lock);
return ret;
@@ -619,7 +622,7 @@ static int zram_bvec_read(struct zram *zram, struct bio_vec *bvec,
uncmem = user_mem;
if (!uncmem) {
- pr_info("Unable to allocate temp memory\n");
+ pr_err("Unable to allocate temp memory\n");
ret = -ENOMEM;
goto out_cleanup;
}
@@ -716,7 +719,7 @@ static int zram_bvec_write(struct zram *zram, struct bio_vec *bvec, u32 index,
handle = zs_malloc(meta->mem_pool, clen);
if (!handle) {
- pr_info("Error allocating memory for compressed page: %u, size=%zu\n",
+ pr_err("Error allocating memory for compressed page: %u, size=%zu\n",
index, clen);
ret = -ENOMEM;
goto out;
@@ -848,7 +851,7 @@ static void __zram_make_request(struct zram *zram, struct bio *bio)
if (unlikely(bio->bi_rw & REQ_DISCARD)) {
zram_bio_discard(zram, index, offset, bio);
- bio_endio(bio, 0);
+ bio_endio(bio);
return;
}
@@ -881,8 +884,7 @@ static void __zram_make_request(struct zram *zram, struct bio *bio)
update_position(&index, &offset, &bvec);
}
- set_bit(BIO_UPTODATE, &bio->bi_flags);
- bio_endio(bio, 0);
+ bio_endio(bio);
return;
out:
@@ -899,6 +901,8 @@ static void zram_make_request(struct request_queue *queue, struct bio *bio)
if (unlikely(!zram_meta_get(zram)))
goto error;
+ blk_queue_split(queue, &bio, queue->bio_split);
+
if (!valid_io_request(zram, bio->bi_iter.bi_sector,
bio->bi_iter.bi_size)) {
atomic64_inc(&zram->stats.invalid_io);
@@ -1035,7 +1039,7 @@ static ssize_t disksize_store(struct device *dev,
comp = zcomp_create(zram->compressor, zram->max_comp_streams);
if (IS_ERR(comp)) {
- pr_info("Cannot initialise %s compressing backend\n",
+ pr_err("Cannot initialise %s compressing backend\n",
zram->compressor);
err = PTR_ERR(comp);
goto out_free_meta;
@@ -1213,7 +1217,7 @@ static int zram_add(void)
/* gendisk structure */
zram->disk = alloc_disk(1);
if (!zram->disk) {
- pr_warn("Error allocating disk structure for device %d\n",
+ pr_err("Error allocating disk structure for device %d\n",
device_id);
ret = -ENOMEM;
goto out_free_queue;
@@ -1242,7 +1246,7 @@ static int zram_add(void)
blk_queue_io_min(zram->disk->queue, PAGE_SIZE);
blk_queue_io_opt(zram->disk->queue, PAGE_SIZE);
zram->disk->queue->limits.discard_granularity = PAGE_SIZE;
- zram->disk->queue->limits.max_discard_sectors = UINT_MAX;
+ blk_queue_max_discard_sectors(zram->disk->queue, UINT_MAX);
/*
* zram_bio_discard() will clear all logical blocks if logical block
* size is identical with physical block size(PAGE_SIZE). But if it is
@@ -1262,7 +1266,8 @@ static int zram_add(void)
ret = sysfs_create_group(&disk_to_dev(zram->disk)->kobj,
&zram_disk_attr_group);
if (ret < 0) {
- pr_warn("Error creating sysfs group");
+ pr_err("Error creating sysfs group for device %d\n",
+ device_id);
goto out_free_disk;
}
strlcpy(zram->compressor, default_compressor, sizeof(zram->compressor));
@@ -1402,13 +1407,13 @@ static int __init zram_init(void)
ret = class_register(&zram_control_class);
if (ret) {
- pr_warn("Unable to register zram-control class\n");
+ pr_err("Unable to register zram-control class\n");
return ret;
}
zram_major = register_blkdev(0, "zram");
if (zram_major <= 0) {
- pr_warn("Unable to get major number\n");
+ pr_err("Unable to get major number\n");
class_unregister(&zram_control_class);
return -EBUSY;
}
diff --git a/drivers/block/zram/zram_drv.h b/drivers/block/zram/zram_drv.h
index 6dbe2df50..8e9233968 100644
--- a/drivers/block/zram/zram_drv.h
+++ b/drivers/block/zram/zram_drv.h
@@ -78,7 +78,6 @@ struct zram_stats {
atomic64_t compr_data_size; /* compressed size of pages stored */
atomic64_t num_reads; /* failed + successful */
atomic64_t num_writes; /* --do-- */
- atomic64_t num_migrated; /* no. of migrated object */
atomic64_t failed_reads; /* can happen when memory is too low */
atomic64_t failed_writes; /* can happen when memory is too low */
atomic64_t invalid_io; /* non-page-aligned I/O requests */