summaryrefslogtreecommitdiff
path: root/drivers/block/rbd.c
diff options
context:
space:
mode:
Diffstat (limited to 'drivers/block/rbd.c')
-rw-r--r--drivers/block/rbd.c136
1 files changed, 55 insertions, 81 deletions
diff --git a/drivers/block/rbd.c b/drivers/block/rbd.c
index 324bf35ec..128e7df5b 100644
--- a/drivers/block/rbd.c
+++ b/drivers/block/rbd.c
@@ -96,6 +96,8 @@ static int atomic_dec_return_safe(atomic_t *v)
#define RBD_MINORS_PER_MAJOR 256
#define RBD_SINGLE_MAJOR_PART_SHIFT 4
+#define RBD_MAX_PARENT_CHAIN_LEN 16
+
#define RBD_SNAP_DEV_NAME_PREFIX "snap_"
#define RBD_MAX_SNAP_NAME_LEN \
(NAME_MAX - (sizeof (RBD_SNAP_DEV_NAME_PREFIX) - 1))
@@ -426,7 +428,7 @@ static ssize_t rbd_add_single_major(struct bus_type *bus, const char *buf,
size_t count);
static ssize_t rbd_remove_single_major(struct bus_type *bus, const char *buf,
size_t count);
-static int rbd_dev_image_probe(struct rbd_device *rbd_dev, bool mapping);
+static int rbd_dev_image_probe(struct rbd_device *rbd_dev, int depth);
static void rbd_spec_put(struct rbd_spec *spec);
static int rbd_dev_id_to_minor(int dev_id)
@@ -1863,9 +1865,11 @@ static void rbd_osd_req_callback(struct ceph_osd_request *osd_req,
rbd_osd_read_callback(obj_request);
break;
case CEPH_OSD_OP_SETALLOCHINT:
- rbd_assert(osd_req->r_ops[1].op == CEPH_OSD_OP_WRITE);
+ rbd_assert(osd_req->r_ops[1].op == CEPH_OSD_OP_WRITE ||
+ osd_req->r_ops[1].op == CEPH_OSD_OP_WRITEFULL);
/* fall through */
case CEPH_OSD_OP_WRITE:
+ case CEPH_OSD_OP_WRITEFULL:
rbd_osd_write_callback(obj_request);
break;
case CEPH_OSD_OP_STAT:
@@ -2401,7 +2405,10 @@ static void rbd_img_obj_request_fill(struct rbd_obj_request *obj_request,
opcode = CEPH_OSD_OP_ZERO;
}
} else if (op_type == OBJ_OP_WRITE) {
- opcode = CEPH_OSD_OP_WRITE;
+ if (!offset && length == object_size)
+ opcode = CEPH_OSD_OP_WRITEFULL;
+ else
+ opcode = CEPH_OSD_OP_WRITE;
osd_req_op_alloc_hint_init(osd_request, num_ops,
object_size, object_size);
num_ops++;
@@ -3474,52 +3481,6 @@ static int rbd_queue_rq(struct blk_mq_hw_ctx *hctx,
return BLK_MQ_RQ_QUEUE_OK;
}
-/*
- * a queue callback. Makes sure that we don't create a bio that spans across
- * multiple osd objects. One exception would be with a single page bios,
- * which we handle later at bio_chain_clone_range()
- */
-static int rbd_merge_bvec(struct request_queue *q, struct bvec_merge_data *bmd,
- struct bio_vec *bvec)
-{
- struct rbd_device *rbd_dev = q->queuedata;
- sector_t sector_offset;
- sector_t sectors_per_obj;
- sector_t obj_sector_offset;
- int ret;
-
- /*
- * Find how far into its rbd object the partition-relative
- * bio start sector is to offset relative to the enclosing
- * device.
- */
- sector_offset = get_start_sect(bmd->bi_bdev) + bmd->bi_sector;
- sectors_per_obj = 1 << (rbd_dev->header.obj_order - SECTOR_SHIFT);
- obj_sector_offset = sector_offset & (sectors_per_obj - 1);
-
- /*
- * Compute the number of bytes from that offset to the end
- * of the object. Account for what's already used by the bio.
- */
- ret = (int) (sectors_per_obj - obj_sector_offset) << SECTOR_SHIFT;
- if (ret > bmd->bi_size)
- ret -= bmd->bi_size;
- else
- ret = 0;
-
- /*
- * Don't send back more than was asked for. And if the bio
- * was empty, let the whole thing through because: "Note
- * that a block device *must* allow a single page to be
- * added to an empty bio."
- */
- rbd_assert(bvec->bv_len <= PAGE_SIZE);
- if (ret > (int) bvec->bv_len || !bmd->bi_size)
- ret = (int) bvec->bv_len;
-
- return ret;
-}
-
static void rbd_free_disk(struct rbd_device *rbd_dev)
{
struct gendisk *disk = rbd_dev->disk;
@@ -3806,6 +3767,7 @@ static int rbd_init_disk(struct rbd_device *rbd_dev)
/* set io sizes to object size */
segment_size = rbd_obj_bytes(&rbd_dev->header);
blk_queue_max_hw_sectors(q, segment_size / SECTOR_SIZE);
+ q->limits.max_sectors = queue_max_hw_sectors(q);
blk_queue_max_segments(q, segment_size / SECTOR_SIZE);
blk_queue_max_segment_size(q, segment_size);
blk_queue_io_min(q, segment_size);
@@ -3815,10 +3777,12 @@ static int rbd_init_disk(struct rbd_device *rbd_dev)
queue_flag_set_unlocked(QUEUE_FLAG_DISCARD, q);
q->limits.discard_granularity = segment_size;
q->limits.discard_alignment = segment_size;
- q->limits.max_discard_sectors = segment_size / SECTOR_SIZE;
+ blk_queue_max_discard_sectors(q, segment_size / SECTOR_SIZE);
q->limits.discard_zeroes_data = 1;
- blk_queue_merge_bvec(q, rbd_merge_bvec);
+ if (!ceph_test_opt(rbd_dev->rbd_client->client, NOCRC))
+ q->backing_dev_info.capabilities |= BDI_CAP_STABLE_WRITES;
+
disk->queue = q;
q->queuedata = rbd_dev;
@@ -4720,7 +4684,10 @@ static int rbd_dev_v2_header_info(struct rbd_device *rbd_dev)
}
ret = rbd_dev_v2_snap_context(rbd_dev);
- dout("rbd_dev_v2_snap_context returned %d\n", ret);
+ if (ret && first_time) {
+ kfree(rbd_dev->header.object_prefix);
+ rbd_dev->header.object_prefix = NULL;
+ }
return ret;
}
@@ -5169,44 +5136,51 @@ out_err:
return ret;
}
-static int rbd_dev_probe_parent(struct rbd_device *rbd_dev)
+/*
+ * @depth is rbd_dev_image_probe() -> rbd_dev_probe_parent() ->
+ * rbd_dev_image_probe() recursion depth, which means it's also the
+ * length of the already discovered part of the parent chain.
+ */
+static int rbd_dev_probe_parent(struct rbd_device *rbd_dev, int depth)
{
struct rbd_device *parent = NULL;
- struct rbd_spec *parent_spec;
- struct rbd_client *rbdc;
int ret;
if (!rbd_dev->parent_spec)
return 0;
- /*
- * We need to pass a reference to the client and the parent
- * spec when creating the parent rbd_dev. Images related by
- * parent/child relationships always share both.
- */
- parent_spec = rbd_spec_get(rbd_dev->parent_spec);
- rbdc = __rbd_get_client(rbd_dev->rbd_client);
- ret = -ENOMEM;
- parent = rbd_dev_create(rbdc, parent_spec, NULL);
- if (!parent)
+ if (++depth > RBD_MAX_PARENT_CHAIN_LEN) {
+ pr_info("parent chain is too long (%d)\n", depth);
+ ret = -EINVAL;
goto out_err;
+ }
- ret = rbd_dev_image_probe(parent, false);
+ parent = rbd_dev_create(rbd_dev->rbd_client, rbd_dev->parent_spec,
+ NULL);
+ if (!parent) {
+ ret = -ENOMEM;
+ goto out_err;
+ }
+
+ /*
+ * Images related by parent/child relationships always share
+ * rbd_client and spec/parent_spec, so bump their refcounts.
+ */
+ __rbd_get_client(rbd_dev->rbd_client);
+ rbd_spec_get(rbd_dev->parent_spec);
+
+ ret = rbd_dev_image_probe(parent, depth);
if (ret < 0)
goto out_err;
+
rbd_dev->parent = parent;
atomic_set(&rbd_dev->parent_ref, 1);
-
return 0;
+
out_err:
- if (parent) {
- rbd_dev_unparent(rbd_dev);
+ rbd_dev_unparent(rbd_dev);
+ if (parent)
rbd_dev_destroy(parent);
- } else {
- rbd_put_client(rbdc);
- rbd_spec_put(parent_spec);
- }
-
return ret;
}
@@ -5324,7 +5298,7 @@ static void rbd_dev_image_release(struct rbd_device *rbd_dev)
* parent), initiate a watch on its header object before using that
* object to get detailed information about the rbd image.
*/
-static int rbd_dev_image_probe(struct rbd_device *rbd_dev, bool mapping)
+static int rbd_dev_image_probe(struct rbd_device *rbd_dev, int depth)
{
int ret;
@@ -5342,7 +5316,7 @@ static int rbd_dev_image_probe(struct rbd_device *rbd_dev, bool mapping)
if (ret)
goto err_out_format;
- if (mapping) {
+ if (!depth) {
ret = rbd_dev_header_watch_sync(rbd_dev);
if (ret) {
if (ret == -ENOENT)
@@ -5363,7 +5337,7 @@ static int rbd_dev_image_probe(struct rbd_device *rbd_dev, bool mapping)
* Otherwise this is a parent image, identified by pool, image
* and snap ids - need to fill in names for those ids.
*/
- if (mapping)
+ if (!depth)
ret = rbd_spec_fill_snap_id(rbd_dev);
else
ret = rbd_spec_fill_names(rbd_dev);
@@ -5385,12 +5359,12 @@ static int rbd_dev_image_probe(struct rbd_device *rbd_dev, bool mapping)
* Need to warn users if this image is the one being
* mapped and has a parent.
*/
- if (mapping && rbd_dev->parent_spec)
+ if (!depth && rbd_dev->parent_spec)
rbd_warn(rbd_dev,
"WARNING: kernel layering is EXPERIMENTAL!");
}
- ret = rbd_dev_probe_parent(rbd_dev);
+ ret = rbd_dev_probe_parent(rbd_dev, depth);
if (ret)
goto err_out_probe;
@@ -5401,7 +5375,7 @@ static int rbd_dev_image_probe(struct rbd_device *rbd_dev, bool mapping)
err_out_probe:
rbd_dev_unprobe(rbd_dev);
err_out_watch:
- if (mapping)
+ if (!depth)
rbd_dev_header_unwatch_sync(rbd_dev);
out_header_name:
kfree(rbd_dev->header_name);
@@ -5464,7 +5438,7 @@ static ssize_t do_rbd_add(struct bus_type *bus,
spec = NULL; /* rbd_dev now owns this */
rbd_opts = NULL; /* rbd_dev now owns this */
- rc = rbd_dev_image_probe(rbd_dev, true);
+ rc = rbd_dev_image_probe(rbd_dev, 0);
if (rc < 0)
goto err_out_rbd_dev;