summaryrefslogtreecommitdiff
path: root/drivers/infiniband/ulp/srp/ib_srp.c
diff options
context:
space:
mode:
Diffstat (limited to 'drivers/infiniband/ulp/srp/ib_srp.c')
-rw-r--r--drivers/infiniband/ulp/srp/ib_srp.c226
1 files changed, 173 insertions, 53 deletions
diff --git a/drivers/infiniband/ulp/srp/ib_srp.c b/drivers/infiniband/ulp/srp/ib_srp.c
index 845ce90c2..3322ed750 100644
--- a/drivers/infiniband/ulp/srp/ib_srp.c
+++ b/drivers/infiniband/ulp/srp/ib_srp.c
@@ -70,6 +70,7 @@ static unsigned int indirect_sg_entries;
static bool allow_ext_sg;
static bool prefer_fr = true;
static bool register_always = true;
+static bool never_register;
static int topspin_workarounds = 1;
module_param(srp_sg_tablesize, uint, 0444);
@@ -81,7 +82,7 @@ MODULE_PARM_DESC(cmd_sg_entries,
module_param(indirect_sg_entries, uint, 0444);
MODULE_PARM_DESC(indirect_sg_entries,
- "Default max number of gather/scatter entries (default is 12, max is " __stringify(SCSI_MAX_SG_CHAIN_SEGMENTS) ")");
+ "Default max number of gather/scatter entries (default is 12, max is " __stringify(SG_MAX_SEGMENTS) ")");
module_param(allow_ext_sg, bool, 0444);
MODULE_PARM_DESC(allow_ext_sg,
@@ -99,6 +100,9 @@ module_param(register_always, bool, 0444);
MODULE_PARM_DESC(register_always,
"Use memory registration even for contiguous memory regions");
+module_param(never_register, bool, 0444);
+MODULE_PARM_DESC(never_register, "Never register memory");
+
static const struct kernel_param_ops srp_tmo_ops;
static int srp_reconnect_delay = 10;
@@ -316,7 +320,7 @@ static struct ib_fmr_pool *srp_alloc_fmr_pool(struct srp_target_port *target)
struct ib_fmr_pool_param fmr_param;
memset(&fmr_param, 0, sizeof(fmr_param));
- fmr_param.pool_size = target->scsi_host->can_queue;
+ fmr_param.pool_size = target->mr_pool_size;
fmr_param.dirty_watermark = fmr_param.pool_size / 4;
fmr_param.cache = 1;
fmr_param.max_pages_per_fmr = dev->max_pages_per_mr;
@@ -441,8 +445,7 @@ static struct srp_fr_pool *srp_alloc_fr_pool(struct srp_target_port *target)
{
struct srp_device *dev = target->srp_host->srp_dev;
- return srp_create_fr_pool(dev->dev, dev->pd,
- target->scsi_host->can_queue,
+ return srp_create_fr_pool(dev->dev, dev->pd, target->mr_pool_size,
dev->max_pages_per_mr);
}
@@ -469,7 +472,7 @@ static int srp_create_ch_ib(struct srp_rdma_ch *ch)
struct ib_qp *qp;
struct ib_fmr_pool *fmr_pool = NULL;
struct srp_fr_pool *fr_pool = NULL;
- const int m = dev->use_fast_reg ? 3 : 1;
+ const int m = 1 + dev->use_fast_reg * target->mr_per_cmd * 2;
int ret;
init_attr = kzalloc(sizeof *init_attr, GFP_KERNEL);
@@ -850,7 +853,7 @@ static int srp_alloc_req_data(struct srp_rdma_ch *ch)
for (i = 0; i < target->req_ring_size; ++i) {
req = &ch->req_ring[i];
- mr_list = kmalloc(target->cmd_sg_cnt * sizeof(void *),
+ mr_list = kmalloc(target->mr_per_cmd * sizeof(void *),
GFP_KERNEL);
if (!mr_list)
goto out;
@@ -1112,7 +1115,7 @@ static struct scsi_cmnd *srp_claim_req(struct srp_rdma_ch *ch,
}
/**
- * srp_free_req() - Unmap data and add request to the free request list.
+ * srp_free_req() - Unmap data and adjust ch->req_lim.
* @ch: SRP RDMA channel.
* @req: Request to be freed.
* @scmnd: SCSI command associated with @req.
@@ -1299,9 +1302,16 @@ static void srp_reg_mr_err_done(struct ib_cq *cq, struct ib_wc *wc)
srp_handle_qp_err(cq, wc, "FAST REG");
}
+/*
+ * Map up to sg_nents elements of state->sg where *sg_offset_p is the offset
+ * where to start in the first element. If sg_offset_p != NULL then
+ * *sg_offset_p is updated to the offset in state->sg[retval] of the first
+ * byte that has not yet been mapped.
+ */
static int srp_map_finish_fr(struct srp_map_state *state,
struct srp_request *req,
- struct srp_rdma_ch *ch, int sg_nents)
+ struct srp_rdma_ch *ch, int sg_nents,
+ unsigned int *sg_offset_p)
{
struct srp_target_port *target = ch->target;
struct srp_device *dev = target->srp_host->srp_dev;
@@ -1316,13 +1326,14 @@ static int srp_map_finish_fr(struct srp_map_state *state,
WARN_ON_ONCE(!dev->use_fast_reg);
- if (sg_nents == 0)
- return 0;
-
if (sg_nents == 1 && target->global_mr) {
- srp_map_desc(state, sg_dma_address(state->sg),
- sg_dma_len(state->sg),
+ unsigned int sg_offset = sg_offset_p ? *sg_offset_p : 0;
+
+ srp_map_desc(state, sg_dma_address(state->sg) + sg_offset,
+ sg_dma_len(state->sg) - sg_offset,
target->global_mr->rkey);
+ if (sg_offset_p)
+ *sg_offset_p = 0;
return 1;
}
@@ -1333,9 +1344,17 @@ static int srp_map_finish_fr(struct srp_map_state *state,
rkey = ib_inc_rkey(desc->mr->rkey);
ib_update_fast_reg_key(desc->mr, rkey);
- n = ib_map_mr_sg(desc->mr, state->sg, sg_nents, dev->mr_page_size);
- if (unlikely(n < 0))
+ n = ib_map_mr_sg(desc->mr, state->sg, sg_nents, sg_offset_p,
+ dev->mr_page_size);
+ if (unlikely(n < 0)) {
+ srp_fr_pool_put(ch->fr_pool, &desc, 1);
+ pr_debug("%s: ib_map_mr_sg(%d, %d) returned %d.\n",
+ dev_name(&req->scmnd->device->sdev_gendev), sg_nents,
+ sg_offset_p ? *sg_offset_p : -1, n);
return n;
+ }
+
+ WARN_ON_ONCE(desc->mr->length == 0);
req->reg_cqe.done = srp_reg_mr_err_done;
@@ -1357,8 +1376,10 @@ static int srp_map_finish_fr(struct srp_map_state *state,
desc->mr->length, desc->mr->rkey);
err = ib_post_send(ch->qp, &wr.wr, &bad_wr);
- if (unlikely(err))
+ if (unlikely(err)) {
+ WARN_ON_ONCE(err == -ENOMEM);
return err;
+ }
return n;
}
@@ -1398,7 +1419,7 @@ static int srp_map_sg_entry(struct srp_map_state *state,
/*
* If the last entry of the MR wasn't a full page, then we need to
* close it out and start a new one -- we can only merge at page
- * boundries.
+ * boundaries.
*/
ret = 0;
if (len != dev->mr_page_size)
@@ -1413,10 +1434,9 @@ static int srp_map_sg_fmr(struct srp_map_state *state, struct srp_rdma_ch *ch,
struct scatterlist *sg;
int i, ret;
- state->desc = req->indirect_desc;
state->pages = req->map_page;
state->fmr.next = req->fmr_list;
- state->fmr.end = req->fmr_list + ch->target->cmd_sg_cnt;
+ state->fmr.end = req->fmr_list + ch->target->mr_per_cmd;
for_each_sg(scat, sg, count, i) {
ret = srp_map_sg_entry(state, ch, sg, i);
@@ -1428,8 +1448,6 @@ static int srp_map_sg_fmr(struct srp_map_state *state, struct srp_rdma_ch *ch,
if (ret)
return ret;
- req->nmdesc = state->nmdesc;
-
return 0;
}
@@ -1437,15 +1455,19 @@ static int srp_map_sg_fr(struct srp_map_state *state, struct srp_rdma_ch *ch,
struct srp_request *req, struct scatterlist *scat,
int count)
{
- state->desc = req->indirect_desc;
+ unsigned int sg_offset = 0;
+
state->fr.next = req->fr_list;
- state->fr.end = req->fr_list + ch->target->cmd_sg_cnt;
+ state->fr.end = req->fr_list + ch->target->mr_per_cmd;
state->sg = scat;
+ if (count == 0)
+ return 0;
+
while (count) {
int i, n;
- n = srp_map_finish_fr(state, req, ch, count);
+ n = srp_map_finish_fr(state, req, ch, count, &sg_offset);
if (unlikely(n < 0))
return n;
@@ -1454,8 +1476,6 @@ static int srp_map_sg_fr(struct srp_map_state *state, struct srp_rdma_ch *ch,
state->sg = sg_next(state->sg);
}
- req->nmdesc = state->nmdesc;
-
return 0;
}
@@ -1468,15 +1488,12 @@ static int srp_map_sg_dma(struct srp_map_state *state, struct srp_rdma_ch *ch,
struct scatterlist *sg;
int i;
- state->desc = req->indirect_desc;
for_each_sg(scat, sg, count, i) {
srp_map_desc(state, ib_sg_dma_address(dev->dev, sg),
ib_sg_dma_len(dev->dev, sg),
target->global_mr->rkey);
}
- req->nmdesc = state->nmdesc;
-
return 0;
}
@@ -1514,9 +1531,10 @@ static int srp_map_idb(struct srp_rdma_ch *ch, struct srp_request *req,
#ifdef CONFIG_NEED_SG_DMA_LENGTH
idb_sg->dma_length = idb_sg->length; /* hack^2 */
#endif
- ret = srp_map_finish_fr(&state, req, ch, 1);
+ ret = srp_map_finish_fr(&state, req, ch, 1, NULL);
if (ret < 0)
return ret;
+ WARN_ON_ONCE(ret < 1);
} else if (dev->use_fmr) {
state.pages = idb_pages;
state.pages[0] = (req->indirect_dma_addr &
@@ -1534,6 +1552,41 @@ static int srp_map_idb(struct srp_rdma_ch *ch, struct srp_request *req,
return 0;
}
+#if defined(DYNAMIC_DATA_DEBUG)
+static void srp_check_mapping(struct srp_map_state *state,
+ struct srp_rdma_ch *ch, struct srp_request *req,
+ struct scatterlist *scat, int count)
+{
+ struct srp_device *dev = ch->target->srp_host->srp_dev;
+ struct srp_fr_desc **pfr;
+ u64 desc_len = 0, mr_len = 0;
+ int i;
+
+ for (i = 0; i < state->ndesc; i++)
+ desc_len += be32_to_cpu(req->indirect_desc[i].len);
+ if (dev->use_fast_reg)
+ for (i = 0, pfr = req->fr_list; i < state->nmdesc; i++, pfr++)
+ mr_len += (*pfr)->mr->length;
+ else if (dev->use_fmr)
+ for (i = 0; i < state->nmdesc; i++)
+ mr_len += be32_to_cpu(req->indirect_desc[i].len);
+ if (desc_len != scsi_bufflen(req->scmnd) ||
+ mr_len > scsi_bufflen(req->scmnd))
+ pr_err("Inconsistent: scsi len %d <> desc len %lld <> mr len %lld; ndesc %d; nmdesc = %d\n",
+ scsi_bufflen(req->scmnd), desc_len, mr_len,
+ state->ndesc, state->nmdesc);
+}
+#endif
+
+/**
+ * srp_map_data() - map SCSI data buffer onto an SRP request
+ * @scmnd: SCSI command to map
+ * @ch: SRP RDMA channel
+ * @req: SRP request
+ *
+ * Returns the length in bytes of the SRP_CMD IU or a negative value if
+ * mapping failed.
+ */
static int srp_map_data(struct scsi_cmnd *scmnd, struct srp_rdma_ch *ch,
struct srp_request *req)
{
@@ -1600,12 +1653,25 @@ static int srp_map_data(struct scsi_cmnd *scmnd, struct srp_rdma_ch *ch,
target->indirect_size, DMA_TO_DEVICE);
memset(&state, 0, sizeof(state));
+ state.desc = req->indirect_desc;
if (dev->use_fast_reg)
- srp_map_sg_fr(&state, ch, req, scat, count);
+ ret = srp_map_sg_fr(&state, ch, req, scat, count);
else if (dev->use_fmr)
- srp_map_sg_fmr(&state, ch, req, scat, count);
+ ret = srp_map_sg_fmr(&state, ch, req, scat, count);
else
- srp_map_sg_dma(&state, ch, req, scat, count);
+ ret = srp_map_sg_dma(&state, ch, req, scat, count);
+ req->nmdesc = state.nmdesc;
+ if (ret < 0)
+ goto unmap;
+
+#if defined(DYNAMIC_DEBUG)
+ {
+ DEFINE_DYNAMIC_DEBUG_METADATA(ddm,
+ "Memory mapping consistency check");
+ if (unlikely(ddm.flags & _DPRINTK_FLAGS_PRINT))
+ srp_check_mapping(&state, ch, req, scat, count);
+ }
+#endif
/* We've mapped the request, now pull as much of the indirect
* descriptor table as we can into the command buffer. If this
@@ -1628,7 +1694,8 @@ static int srp_map_data(struct scsi_cmnd *scmnd, struct srp_rdma_ch *ch,
!target->allow_ext_sg)) {
shost_printk(KERN_ERR, target->scsi_host,
"Could not fit S/G list into SRP_CMD\n");
- return -EIO;
+ ret = -EIO;
+ goto unmap;
}
count = min(state.ndesc, target->cmd_sg_cnt);
@@ -1646,7 +1713,7 @@ static int srp_map_data(struct scsi_cmnd *scmnd, struct srp_rdma_ch *ch,
ret = srp_map_idb(ch, req, state.gen.next, state.gen.end,
idb_len, &idb_rkey);
if (ret < 0)
- return ret;
+ goto unmap;
req->nmdesc++;
} else {
idb_rkey = cpu_to_be32(target->global_mr->rkey);
@@ -1672,6 +1739,12 @@ map_complete:
cmd->buf_fmt = fmt;
return len;
+
+unmap:
+ srp_unmap_data(scmnd, ch, req);
+ if (ret == -ENOMEM && req->nmdesc >= target->mr_pool_size)
+ ret = -E2BIG;
+ return ret;
}
/*
@@ -2564,6 +2637,20 @@ static int srp_reset_host(struct scsi_cmnd *scmnd)
return srp_reconnect_rport(target->rport) == 0 ? SUCCESS : FAILED;
}
+static int srp_slave_alloc(struct scsi_device *sdev)
+{
+ struct Scsi_Host *shost = sdev->host;
+ struct srp_target_port *target = host_to_target(shost);
+ struct srp_device *srp_dev = target->srp_host->srp_dev;
+ struct ib_device *ibdev = srp_dev->dev;
+
+ if (!(ibdev->attrs.device_cap_flags & IB_DEVICE_SG_GAPS_REG))
+ blk_queue_virt_boundary(sdev->request_queue,
+ ~srp_dev->mr_page_mask);
+
+ return 0;
+}
+
static int srp_slave_configure(struct scsi_device *sdev)
{
struct Scsi_Host *shost = sdev->host;
@@ -2755,6 +2842,7 @@ static struct scsi_host_template srp_template = {
.module = THIS_MODULE,
.name = "InfiniBand SRP initiator",
.proc_name = DRV_NAME,
+ .slave_alloc = srp_slave_alloc,
.slave_configure = srp_slave_configure,
.info = srp_target_info,
.queuecommand = srp_queuecommand,
@@ -2819,7 +2907,7 @@ static int srp_add_target(struct srp_host *host, struct srp_target_port *target)
spin_unlock(&host->target_lock);
scsi_scan_target(&target->scsi_host->shost_gendev,
- 0, target->scsi_id, SCAN_WILD_CARD, 0);
+ 0, target->scsi_id, SCAN_WILD_CARD, SCSI_SCAN_INITIAL);
if (srp_connected_ch(target) < target->ch_count ||
target->qp_in_error) {
@@ -2829,7 +2917,7 @@ static int srp_add_target(struct srp_host *host, struct srp_target_port *target)
goto out;
}
- pr_debug(PFX "%s: SCSI scan succeeded - detected %d LUNs\n",
+ pr_debug("%s: SCSI scan succeeded - detected %d LUNs\n",
dev_name(&target->scsi_host->shost_gendev),
srp_sdev_count(target->scsi_host));
@@ -3097,7 +3185,7 @@ static int srp_parse_options(const char *buf, struct srp_target_port *target)
case SRP_OPT_SG_TABLESIZE:
if (match_int(args, &token) || token < 1 ||
- token > SCSI_MAX_SG_CHAIN_SEGMENTS) {
+ token > SG_MAX_SEGMENTS) {
pr_warn("bad max sg_tablesize parameter '%s'\n",
p);
goto out;
@@ -3161,6 +3249,7 @@ static ssize_t srp_create_target(struct device *dev,
struct srp_device *srp_dev = host->srp_dev;
struct ib_device *ibdev = srp_dev->dev;
int ret, node_idx, node, cpu, i;
+ unsigned int max_sectors_per_mr, mr_per_cmd = 0;
bool multich = false;
target_host = scsi_host_alloc(&srp_template,
@@ -3217,7 +3306,33 @@ static ssize_t srp_create_target(struct device *dev,
target->sg_tablesize = target->cmd_sg_cnt;
}
+ if (srp_dev->use_fast_reg || srp_dev->use_fmr) {
+ /*
+ * FR and FMR can only map one HCA page per entry. If the
+ * start address is not aligned on a HCA page boundary two
+ * entries will be used for the head and the tail although
+ * these two entries combined contain at most one HCA page of
+ * data. Hence the "+ 1" in the calculation below.
+ *
+ * The indirect data buffer descriptor is contiguous so the
+ * memory for that buffer will only be registered if
+ * register_always is true. Hence add one to mr_per_cmd if
+ * register_always has been set.
+ */
+ max_sectors_per_mr = srp_dev->max_pages_per_mr <<
+ (ilog2(srp_dev->mr_page_size) - 9);
+ mr_per_cmd = register_always +
+ (target->scsi_host->max_sectors + 1 +
+ max_sectors_per_mr - 1) / max_sectors_per_mr;
+ pr_debug("max_sectors = %u; max_pages_per_mr = %u; mr_page_size = %u; max_sectors_per_mr = %u; mr_per_cmd = %u\n",
+ target->scsi_host->max_sectors,
+ srp_dev->max_pages_per_mr, srp_dev->mr_page_size,
+ max_sectors_per_mr, mr_per_cmd);
+ }
+
target_host->sg_tablesize = target->sg_tablesize;
+ target->mr_pool_size = target->scsi_host->can_queue * mr_per_cmd;
+ target->mr_per_cmd = mr_per_cmd;
target->indirect_size = target->sg_tablesize *
sizeof (struct srp_direct_buf);
target->max_iu_len = sizeof (struct srp_cmd) +
@@ -3410,21 +3525,10 @@ static void srp_add_one(struct ib_device *device)
int mr_page_shift, p;
u64 max_pages_per_mr;
- srp_dev = kmalloc(sizeof *srp_dev, GFP_KERNEL);
+ srp_dev = kzalloc(sizeof(*srp_dev), GFP_KERNEL);
if (!srp_dev)
return;
- srp_dev->has_fmr = (device->alloc_fmr && device->dealloc_fmr &&
- device->map_phys_fmr && device->unmap_fmr);
- srp_dev->has_fr = (device->attrs.device_cap_flags &
- IB_DEVICE_MEM_MGT_EXTENSIONS);
- if (!srp_dev->has_fmr && !srp_dev->has_fr)
- dev_warn(&device->dev, "neither FMR nor FR is supported\n");
-
- srp_dev->use_fast_reg = (srp_dev->has_fr &&
- (!srp_dev->has_fmr || prefer_fr));
- srp_dev->use_fmr = !srp_dev->use_fast_reg && srp_dev->has_fmr;
-
/*
* Use the smallest page size supported by the HCA, down to a
* minimum of 4096 bytes. We're unlikely to build large sglists
@@ -3435,8 +3539,25 @@ static void srp_add_one(struct ib_device *device)
srp_dev->mr_page_mask = ~((u64) srp_dev->mr_page_size - 1);
max_pages_per_mr = device->attrs.max_mr_size;
do_div(max_pages_per_mr, srp_dev->mr_page_size);
+ pr_debug("%s: %llu / %u = %llu <> %u\n", __func__,
+ device->attrs.max_mr_size, srp_dev->mr_page_size,
+ max_pages_per_mr, SRP_MAX_PAGES_PER_MR);
srp_dev->max_pages_per_mr = min_t(u64, SRP_MAX_PAGES_PER_MR,
max_pages_per_mr);
+
+ srp_dev->has_fmr = (device->alloc_fmr && device->dealloc_fmr &&
+ device->map_phys_fmr && device->unmap_fmr);
+ srp_dev->has_fr = (device->attrs.device_cap_flags &
+ IB_DEVICE_MEM_MGT_EXTENSIONS);
+ if (!never_register && !srp_dev->has_fmr && !srp_dev->has_fr) {
+ dev_warn(&device->dev, "neither FMR nor FR is supported\n");
+ } else if (!never_register &&
+ device->attrs.max_mr_size >= 2 * srp_dev->mr_page_size) {
+ srp_dev->use_fast_reg = (srp_dev->has_fr &&
+ (!srp_dev->has_fmr || prefer_fr));
+ srp_dev->use_fmr = !srp_dev->use_fast_reg && srp_dev->has_fmr;
+ }
+
if (srp_dev->use_fast_reg) {
srp_dev->max_pages_per_mr =
min_t(u32, srp_dev->max_pages_per_mr,
@@ -3456,15 +3577,14 @@ static void srp_add_one(struct ib_device *device)
if (IS_ERR(srp_dev->pd))
goto free_dev;
- if (!register_always || (!srp_dev->has_fmr && !srp_dev->has_fr)) {
+ if (never_register || !register_always ||
+ (!srp_dev->has_fmr && !srp_dev->has_fr)) {
srp_dev->global_mr = ib_get_dma_mr(srp_dev->pd,
IB_ACCESS_LOCAL_WRITE |
IB_ACCESS_REMOTE_READ |
IB_ACCESS_REMOTE_WRITE);
if (IS_ERR(srp_dev->global_mr))
goto err_pd;
- } else {
- srp_dev->global_mr = NULL;
}
for (p = rdma_start_port(device); p <= rdma_end_port(device); ++p) {