diff options
author | André Fabian Silva Delgado <emulatorman@parabola.nu> | 2016-03-25 03:53:42 -0300 |
---|---|---|
committer | André Fabian Silva Delgado <emulatorman@parabola.nu> | 2016-03-25 03:53:42 -0300 |
commit | 03dd4cb26d967f9588437b0fc9cc0e8353322bb7 (patch) | |
tree | fa581f6dc1c0596391690d1f67eceef3af8246dc /drivers/infiniband/ulp/iser | |
parent | d4e493caf788ef44982e131ff9c786546904d934 (diff) |
Linux-libre 4.5-gnu
Diffstat (limited to 'drivers/infiniband/ulp/iser')
-rw-r--r-- | drivers/infiniband/ulp/iser/iscsi_iser.c | 13 | ||||
-rw-r--r-- | drivers/infiniband/ulp/iser/iscsi_iser.h | 156 | ||||
-rw-r--r-- | drivers/infiniband/ulp/iser/iser_initiator.c | 323 | ||||
-rw-r--r-- | drivers/infiniband/ulp/iser/iser_memory.c | 179 | ||||
-rw-r--r-- | drivers/infiniband/ulp/iser/iser_verbs.c | 337 |
5 files changed, 463 insertions, 545 deletions
diff --git a/drivers/infiniband/ulp/iser/iscsi_iser.c b/drivers/infiniband/ulp/iser/iscsi_iser.c index 9080161e0..c827c93f4 100644 --- a/drivers/infiniband/ulp/iser/iscsi_iser.c +++ b/drivers/infiniband/ulp/iser/iscsi_iser.c @@ -644,7 +644,7 @@ iscsi_iser_session_create(struct iscsi_endpoint *ep, ib_conn = &iser_conn->ib_conn; if (ib_conn->pi_support) { - u32 sig_caps = ib_conn->device->dev_attr.sig_prot_cap; + u32 sig_caps = ib_conn->device->ib_device->attrs.sig_prot_cap; scsi_host_set_prot(shost, iser_dif_prot_caps(sig_caps)); scsi_host_set_guard(shost, SHOST_DIX_GUARD_IP | @@ -656,7 +656,7 @@ iscsi_iser_session_create(struct iscsi_endpoint *ep, * max fastreg page list length. */ shost->sg_tablesize = min_t(unsigned short, shost->sg_tablesize, - ib_conn->device->dev_attr.max_fast_reg_page_list_len); + ib_conn->device->ib_device->attrs.max_fast_reg_page_list_len); shost->max_sectors = min_t(unsigned int, 1024, (shost->sg_tablesize * PAGE_SIZE) >> 9); @@ -1059,7 +1059,8 @@ static int __init iser_init(void) release_wq = alloc_workqueue("release workqueue", 0, 0); if (!release_wq) { iser_err("failed to allocate release workqueue\n"); - return -ENOMEM; + err = -ENOMEM; + goto err_alloc_wq; } iscsi_iser_scsi_transport = iscsi_register_transport( @@ -1067,12 +1068,14 @@ static int __init iser_init(void) if (!iscsi_iser_scsi_transport) { iser_err("iscsi_register_transport failed\n"); err = -EINVAL; - goto register_transport_failure; + goto err_reg; } return 0; -register_transport_failure: +err_reg: + destroy_workqueue(release_wq); +err_alloc_wq: kmem_cache_destroy(ig.desc_cache); return err; diff --git a/drivers/infiniband/ulp/iser/iscsi_iser.h b/drivers/infiniband/ulp/iser/iscsi_iser.h index 8a5998e6a..95f0a64e0 100644 --- a/drivers/infiniband/ulp/iser/iscsi_iser.h +++ b/drivers/infiniband/ulp/iser/iscsi_iser.h @@ -48,6 +48,7 @@ #include <scsi/scsi_transport_iscsi.h> #include <scsi/scsi_cmnd.h> #include <scsi/scsi_device.h> +#include <scsi/iser.h> #include <linux/interrupt.h> #include <linux/wait.h> @@ -151,46 +152,10 @@ - ISER_MAX_RX_MISC_PDUS) / \ (1 + ISER_INFLIGHT_DATAOUTS)) -#define ISER_WC_BATCH_COUNT 16 #define ISER_SIGNAL_CMD_COUNT 32 -#define ISER_VER 0x10 -#define ISER_WSV 0x08 -#define ISER_RSV 0x04 - -#define ISER_FASTREG_LI_WRID 0xffffffffffffffffULL -#define ISER_BEACON_WRID 0xfffffffffffffffeULL - -/** - * struct iser_hdr - iSER header - * - * @flags: flags support (zbva, remote_inv) - * @rsvd: reserved - * @write_stag: write rkey - * @write_va: write virtual address - * @reaf_stag: read rkey - * @read_va: read virtual address - */ -struct iser_hdr { - u8 flags; - u8 rsvd[3]; - __be32 write_stag; - __be64 write_va; - __be32 read_stag; - __be64 read_va; -} __attribute__((packed)); - - -#define ISER_ZBVA_NOT_SUPPORTED 0x80 -#define ISER_SEND_W_INV_NOT_SUPPORTED 0x40 - -struct iser_cm_hdr { - u8 flags; - u8 rsvd[3]; -} __packed; - /* Constant PDU lengths calculations */ -#define ISER_HEADERS_LEN (sizeof(struct iser_hdr) + sizeof(struct iscsi_hdr)) +#define ISER_HEADERS_LEN (sizeof(struct iser_ctrl) + sizeof(struct iscsi_hdr)) #define ISER_RECV_DATA_SEG_LEN 128 #define ISER_RX_PAYLOAD_SIZE (ISER_HEADERS_LEN + ISER_RECV_DATA_SEG_LEN) @@ -269,7 +234,7 @@ enum iser_desc_type { #define ISER_MAX_WRS 7 /** - * struct iser_tx_desc - iSER TX descriptor (for send wr_id) + * struct iser_tx_desc - iSER TX descriptor * * @iser_header: iser header * @iscsi_header: iscsi header @@ -287,12 +252,13 @@ enum iser_desc_type { * @sig_attrs: Signature attributes */ struct iser_tx_desc { - struct iser_hdr iser_header; + struct iser_ctrl iser_header; struct iscsi_hdr iscsi_header; enum iser_desc_type type; u64 dma_addr; struct ib_sge tx_sg[2]; int num_sge; + struct ib_cqe cqe; bool mapped; u8 wr_idx; union iser_wr { @@ -306,9 +272,10 @@ struct iser_tx_desc { }; #define ISER_RX_PAD_SIZE (256 - (ISER_RX_PAYLOAD_SIZE + \ - sizeof(u64) + sizeof(struct ib_sge))) + sizeof(u64) + sizeof(struct ib_sge) + \ + sizeof(struct ib_cqe))) /** - * struct iser_rx_desc - iSER RX descriptor (for recv wr_id) + * struct iser_rx_desc - iSER RX descriptor * * @iser_header: iser header * @iscsi_header: iscsi header @@ -318,12 +285,32 @@ struct iser_tx_desc { * @pad: for sense data TODO: Modify to maximum sense length supported */ struct iser_rx_desc { - struct iser_hdr iser_header; + struct iser_ctrl iser_header; struct iscsi_hdr iscsi_header; char data[ISER_RECV_DATA_SEG_LEN]; u64 dma_addr; struct ib_sge rx_sg; + struct ib_cqe cqe; char pad[ISER_RX_PAD_SIZE]; +} __packed; + +/** + * struct iser_login_desc - iSER login descriptor + * + * @req: pointer to login request buffer + * @resp: pointer to login response buffer + * @req_dma: DMA address of login request buffer + * @rsp_dma: DMA address of login response buffer + * @sge: IB sge for login post recv + * @cqe: completion handler + */ +struct iser_login_desc { + void *req; + void *rsp; + u64 req_dma; + u64 rsp_dma; + struct ib_sge sge; + struct ib_cqe cqe; } __attribute__((packed)); struct iser_conn; @@ -333,18 +320,12 @@ struct iscsi_iser_task; /** * struct iser_comp - iSER completion context * - * @device: pointer to device handle * @cq: completion queue - * @wcs: work completion array - * @tasklet: Tasklet handle * @active_qps: Number of active QPs attached * to completion context */ struct iser_comp { - struct iser_device *device; struct ib_cq *cq; - struct ib_wc wcs[ISER_WC_BATCH_COUNT]; - struct tasklet_struct tasklet; int active_qps; }; @@ -380,7 +361,6 @@ struct iser_reg_ops { * * @ib_device: RDMA device * @pd: Protection Domain for this device - * @dev_attr: Device attributes container * @mr: Global DMA memory region * @event_handler: IB events handle routine * @ig_list: entry in devices list @@ -389,18 +369,19 @@ struct iser_reg_ops { * cpus and device max completion vectors * @comps: Dinamically allocated array of completion handlers * @reg_ops: Registration ops + * @remote_inv_sup: Remote invalidate is supported on this device */ struct iser_device { struct ib_device *ib_device; struct ib_pd *pd; - struct ib_device_attr dev_attr; struct ib_mr *mr; struct ib_event_handler event_handler; struct list_head ig_list; int refcount; int comps_used; struct iser_comp *comps; - struct iser_reg_ops *reg_ops; + const struct iser_reg_ops *reg_ops; + bool remote_inv_sup; }; #define ISER_CHECK_GUARD 0xc0 @@ -475,10 +456,11 @@ struct iser_fr_pool { * @rx_wr: receive work request for batch posts * @device: reference to iser device * @comp: iser completion context - * @pi_support: Indicate device T10-PI support - * @beacon: beacon send wr to signal all flush errors were drained - * @flush_comp: completes when all connection completions consumed * @fr_pool: connection fast registration poool + * @pi_support: Indicate device T10-PI support + * @last: last send wr to signal all flush errors were drained + * @last_cqe: cqe handler for last wr + * @last_comp: completes when all connection completions consumed */ struct ib_conn { struct rdma_cm_id *cma_id; @@ -488,10 +470,12 @@ struct ib_conn { struct ib_recv_wr rx_wr[ISER_MIN_POSTED_RX]; struct iser_device *device; struct iser_comp *comp; - bool pi_support; - struct ib_send_wr beacon; - struct completion flush_comp; struct iser_fr_pool fr_pool; + bool pi_support; + struct ib_send_wr last; + struct ib_cqe last_cqe; + struct ib_cqe reg_cqe; + struct completion last_comp; }; /** @@ -514,11 +498,7 @@ struct ib_conn { * @up_completion: connection establishment completed * (state is ISER_CONN_UP) * @conn_list: entry in ig conn list - * @login_buf: login data buffer (stores login parameters) - * @login_req_buf: login request buffer - * @login_req_dma: login request buffer dma address - * @login_resp_buf: login response buffer - * @login_resp_dma: login response buffer dma address + * @login_desc: login descriptor * @rx_desc_head: head of rx_descs cyclic buffer * @rx_descs: rx buffers array (cyclic buffer) * @num_rx_descs: number of rx descriptors @@ -541,15 +521,13 @@ struct iser_conn { struct completion ib_completion; struct completion up_completion; struct list_head conn_list; - - char *login_buf; - char *login_req_buf, *login_resp_buf; - u64 login_req_dma, login_resp_dma; + struct iser_login_desc login_desc; unsigned int rx_desc_head; struct iser_rx_desc *rx_descs; u32 num_rx_descs; unsigned short scsi_sg_tablesize; unsigned int scsi_max_sectors; + bool snd_w_inv; }; /** @@ -579,9 +557,8 @@ struct iscsi_iser_task { struct iser_page_vec { u64 *pages; - int length; - int offset; - int data_size; + int npages; + struct ib_mr fake_mr; }; /** @@ -633,12 +610,14 @@ int iser_conn_terminate(struct iser_conn *iser_conn); void iser_release_work(struct work_struct *work); -void iser_rcv_completion(struct iser_rx_desc *desc, - unsigned long dto_xfer_len, - struct ib_conn *ib_conn); - -void iser_snd_completion(struct iser_tx_desc *desc, - struct ib_conn *ib_conn); +void iser_err_comp(struct ib_wc *wc, const char *type); +void iser_login_rsp(struct ib_cq *cq, struct ib_wc *wc); +void iser_task_rsp(struct ib_cq *cq, struct ib_wc *wc); +void iser_cmd_comp(struct ib_cq *cq, struct ib_wc *wc); +void iser_ctrl_comp(struct ib_cq *cq, struct ib_wc *wc); +void iser_dataout_comp(struct ib_cq *cq, struct ib_wc *wc); +void iser_reg_comp(struct ib_cq *cq, struct ib_wc *wc); +void iser_last_comp(struct ib_cq *cq, struct ib_wc *wc); void iser_task_rdma_init(struct iscsi_iser_task *task); @@ -651,7 +630,8 @@ void iser_finalize_rdma_unaligned_sg(struct iscsi_iser_task *iser_task, enum iser_data_dir cmd_dir); int iser_reg_rdma_mem(struct iscsi_iser_task *task, - enum iser_data_dir dir); + enum iser_data_dir dir, + bool all_imm); void iser_unreg_rdma_mem(struct iscsi_iser_task *task, enum iser_data_dir dir); @@ -719,4 +699,28 @@ iser_tx_next_wr(struct iser_tx_desc *tx_desc) return cur_wr; } +static inline struct iser_conn * +to_iser_conn(struct ib_conn *ib_conn) +{ + return container_of(ib_conn, struct iser_conn, ib_conn); +} + +static inline struct iser_rx_desc * +iser_rx(struct ib_cqe *cqe) +{ + return container_of(cqe, struct iser_rx_desc, cqe); +} + +static inline struct iser_tx_desc * +iser_tx(struct ib_cqe *cqe) +{ + return container_of(cqe, struct iser_tx_desc, cqe); +} + +static inline struct iser_login_desc * +iser_login(struct ib_cqe *cqe) +{ + return container_of(cqe, struct iser_login_desc, cqe); +} + #endif diff --git a/drivers/infiniband/ulp/iser/iser_initiator.c b/drivers/infiniband/ulp/iser/iser_initiator.c index ffd00c420..ed54b388e 100644 --- a/drivers/infiniband/ulp/iser/iser_initiator.c +++ b/drivers/infiniband/ulp/iser/iser_initiator.c @@ -51,7 +51,7 @@ static int iser_prepare_read_cmd(struct iscsi_task *task) struct iscsi_iser_task *iser_task = task->dd_data; struct iser_mem_reg *mem_reg; int err; - struct iser_hdr *hdr = &iser_task->desc.iser_header; + struct iser_ctrl *hdr = &iser_task->desc.iser_header; struct iser_data_buf *buf_in = &iser_task->data[ISER_DIR_IN]; err = iser_dma_map_task_data(iser_task, @@ -72,7 +72,7 @@ static int iser_prepare_read_cmd(struct iscsi_task *task) return err; } - err = iser_reg_rdma_mem(iser_task, ISER_DIR_IN); + err = iser_reg_rdma_mem(iser_task, ISER_DIR_IN, false); if (err) { iser_err("Failed to set up Data-IN RDMA\n"); return err; @@ -104,7 +104,7 @@ iser_prepare_write_cmd(struct iscsi_task *task, struct iscsi_iser_task *iser_task = task->dd_data; struct iser_mem_reg *mem_reg; int err; - struct iser_hdr *hdr = &iser_task->desc.iser_header; + struct iser_ctrl *hdr = &iser_task->desc.iser_header; struct iser_data_buf *buf_out = &iser_task->data[ISER_DIR_OUT]; struct ib_sge *tx_dsg = &iser_task->desc.tx_sg[1]; @@ -126,7 +126,8 @@ iser_prepare_write_cmd(struct iscsi_task *task, return err; } - err = iser_reg_rdma_mem(iser_task, ISER_DIR_OUT); + err = iser_reg_rdma_mem(iser_task, ISER_DIR_OUT, + buf_out->data_len == imm_sz); if (err != 0) { iser_err("Failed to register write cmd RDMA mem\n"); return err; @@ -166,7 +167,7 @@ static void iser_create_send_desc(struct iser_conn *iser_conn, ib_dma_sync_single_for_cpu(device->ib_device, tx_desc->dma_addr, ISER_HEADERS_LEN, DMA_TO_DEVICE); - memset(&tx_desc->iser_header, 0, sizeof(struct iser_hdr)); + memset(&tx_desc->iser_header, 0, sizeof(struct iser_ctrl)); tx_desc->iser_header.flags = ISER_VER; tx_desc->num_sge = 1; } @@ -174,73 +175,63 @@ static void iser_create_send_desc(struct iser_conn *iser_conn, static void iser_free_login_buf(struct iser_conn *iser_conn) { struct iser_device *device = iser_conn->ib_conn.device; + struct iser_login_desc *desc = &iser_conn->login_desc; - if (!iser_conn->login_buf) + if (!desc->req) return; - if (iser_conn->login_req_dma) - ib_dma_unmap_single(device->ib_device, - iser_conn->login_req_dma, - ISCSI_DEF_MAX_RECV_SEG_LEN, DMA_TO_DEVICE); + ib_dma_unmap_single(device->ib_device, desc->req_dma, + ISCSI_DEF_MAX_RECV_SEG_LEN, DMA_TO_DEVICE); - if (iser_conn->login_resp_dma) - ib_dma_unmap_single(device->ib_device, - iser_conn->login_resp_dma, - ISER_RX_LOGIN_SIZE, DMA_FROM_DEVICE); + ib_dma_unmap_single(device->ib_device, desc->rsp_dma, + ISER_RX_LOGIN_SIZE, DMA_FROM_DEVICE); - kfree(iser_conn->login_buf); + kfree(desc->req); + kfree(desc->rsp); /* make sure we never redo any unmapping */ - iser_conn->login_req_dma = 0; - iser_conn->login_resp_dma = 0; - iser_conn->login_buf = NULL; + desc->req = NULL; + desc->rsp = NULL; } static int iser_alloc_login_buf(struct iser_conn *iser_conn) { struct iser_device *device = iser_conn->ib_conn.device; - int req_err, resp_err; - - BUG_ON(device == NULL); - - iser_conn->login_buf = kmalloc(ISCSI_DEF_MAX_RECV_SEG_LEN + - ISER_RX_LOGIN_SIZE, GFP_KERNEL); - if (!iser_conn->login_buf) - goto out_err; - - iser_conn->login_req_buf = iser_conn->login_buf; - iser_conn->login_resp_buf = iser_conn->login_buf + - ISCSI_DEF_MAX_RECV_SEG_LEN; - - iser_conn->login_req_dma = ib_dma_map_single(device->ib_device, - iser_conn->login_req_buf, - ISCSI_DEF_MAX_RECV_SEG_LEN, - DMA_TO_DEVICE); - - iser_conn->login_resp_dma = ib_dma_map_single(device->ib_device, - iser_conn->login_resp_buf, - ISER_RX_LOGIN_SIZE, - DMA_FROM_DEVICE); - - req_err = ib_dma_mapping_error(device->ib_device, - iser_conn->login_req_dma); - resp_err = ib_dma_mapping_error(device->ib_device, - iser_conn->login_resp_dma); - - if (req_err || resp_err) { - if (req_err) - iser_conn->login_req_dma = 0; - if (resp_err) - iser_conn->login_resp_dma = 0; - goto free_login_buf; - } + struct iser_login_desc *desc = &iser_conn->login_desc; + + desc->req = kmalloc(ISCSI_DEF_MAX_RECV_SEG_LEN, GFP_KERNEL); + if (!desc->req) + return -ENOMEM; + + desc->req_dma = ib_dma_map_single(device->ib_device, desc->req, + ISCSI_DEF_MAX_RECV_SEG_LEN, + DMA_TO_DEVICE); + if (ib_dma_mapping_error(device->ib_device, + desc->req_dma)) + goto free_req; + + desc->rsp = kmalloc(ISER_RX_LOGIN_SIZE, GFP_KERNEL); + if (!desc->rsp) + goto unmap_req; + + desc->rsp_dma = ib_dma_map_single(device->ib_device, desc->rsp, + ISER_RX_LOGIN_SIZE, + DMA_FROM_DEVICE); + if (ib_dma_mapping_error(device->ib_device, + desc->rsp_dma)) + goto free_rsp; + return 0; -free_login_buf: - iser_free_login_buf(iser_conn); +free_rsp: + kfree(desc->rsp); +unmap_req: + ib_dma_unmap_single(device->ib_device, desc->req_dma, + ISCSI_DEF_MAX_RECV_SEG_LEN, + DMA_TO_DEVICE); +free_req: + kfree(desc->req); -out_err: - iser_err("unable to alloc or map login buf\n"); return -ENOMEM; } @@ -280,11 +271,11 @@ int iser_alloc_rx_descriptors(struct iser_conn *iser_conn, goto rx_desc_dma_map_failed; rx_desc->dma_addr = dma_addr; - + rx_desc->cqe.done = iser_task_rsp; rx_sg = &rx_desc->rx_sg; - rx_sg->addr = rx_desc->dma_addr; + rx_sg->addr = rx_desc->dma_addr; rx_sg->length = ISER_RX_PAYLOAD_SIZE; - rx_sg->lkey = device->pd->local_dma_lkey; + rx_sg->lkey = device->pd->local_dma_lkey; } iser_conn->rx_desc_head = 0; @@ -383,6 +374,7 @@ int iser_send_command(struct iscsi_conn *conn, /* build the tx desc regd header and add it to the tx desc dto */ tx_desc->type = ISCSI_TX_SCSI_COMMAND; + tx_desc->cqe.done = iser_cmd_comp; iser_create_send_desc(iser_conn, tx_desc); if (hdr->flags & ISCSI_FLAG_CMD_READ) { @@ -464,6 +456,7 @@ int iser_send_data_out(struct iscsi_conn *conn, } tx_desc->type = ISCSI_TX_DATAOUT; + tx_desc->cqe.done = iser_dataout_comp; tx_desc->iser_header.flags = ISER_VER; memcpy(&tx_desc->iscsi_header, hdr, sizeof(struct iscsi_hdr)); @@ -513,6 +506,7 @@ int iser_send_control(struct iscsi_conn *conn, /* build the tx desc regd header and add it to the tx desc dto */ mdesc->type = ISCSI_TX_CONTROL; + mdesc->cqe.done = iser_ctrl_comp; iser_create_send_desc(iser_conn, mdesc); device = iser_conn->ib_conn.device; @@ -520,25 +514,25 @@ int iser_send_control(struct iscsi_conn *conn, data_seg_len = ntoh24(task->hdr->dlength); if (data_seg_len > 0) { + struct iser_login_desc *desc = &iser_conn->login_desc; struct ib_sge *tx_dsg = &mdesc->tx_sg[1]; + if (task != conn->login_task) { iser_err("data present on non login task!!!\n"); goto send_control_error; } - ib_dma_sync_single_for_cpu(device->ib_device, - iser_conn->login_req_dma, task->data_count, - DMA_TO_DEVICE); + ib_dma_sync_single_for_cpu(device->ib_device, desc->req_dma, + task->data_count, DMA_TO_DEVICE); - memcpy(iser_conn->login_req_buf, task->data, task->data_count); + memcpy(desc->req, task->data, task->data_count); - ib_dma_sync_single_for_device(device->ib_device, - iser_conn->login_req_dma, task->data_count, - DMA_TO_DEVICE); + ib_dma_sync_single_for_device(device->ib_device, desc->req_dma, + task->data_count, DMA_TO_DEVICE); - tx_dsg->addr = iser_conn->login_req_dma; - tx_dsg->length = task->data_count; - tx_dsg->lkey = device->pd->local_dma_lkey; + tx_dsg->addr = desc->req_dma; + tx_dsg->length = task->data_count; + tx_dsg->lkey = device->pd->local_dma_lkey; mdesc->num_sge = 2; } @@ -562,41 +556,126 @@ send_control_error: return err; } -/** - * iser_rcv_dto_completion - recv DTO completion - */ -void iser_rcv_completion(struct iser_rx_desc *rx_desc, - unsigned long rx_xfer_len, - struct ib_conn *ib_conn) +void iser_login_rsp(struct ib_cq *cq, struct ib_wc *wc) { - struct iser_conn *iser_conn = container_of(ib_conn, struct iser_conn, - ib_conn); + struct ib_conn *ib_conn = wc->qp->qp_context; + struct iser_conn *iser_conn = to_iser_conn(ib_conn); + struct iser_login_desc *desc = iser_login(wc->wr_cqe); struct iscsi_hdr *hdr; - u64 rx_dma; - int rx_buflen, outstanding, count, err; + char *data; + int length; - /* differentiate between login to all other PDUs */ - if ((char *)rx_desc == iser_conn->login_resp_buf) { - rx_dma = iser_conn->login_resp_dma; - rx_buflen = ISER_RX_LOGIN_SIZE; - } else { - rx_dma = rx_desc->dma_addr; - rx_buflen = ISER_RX_PAYLOAD_SIZE; + if (unlikely(wc->status != IB_WC_SUCCESS)) { + iser_err_comp(wc, "login_rsp"); + return; + } + + ib_dma_sync_single_for_cpu(ib_conn->device->ib_device, + desc->rsp_dma, ISER_RX_LOGIN_SIZE, + DMA_FROM_DEVICE); + + hdr = desc->rsp + sizeof(struct iser_ctrl); + data = desc->rsp + ISER_HEADERS_LEN; + length = wc->byte_len - ISER_HEADERS_LEN; + + iser_dbg("op 0x%x itt 0x%x dlen %d\n", hdr->opcode, + hdr->itt, length); + + iscsi_iser_recv(iser_conn->iscsi_conn, hdr, data, length); + + ib_dma_sync_single_for_device(ib_conn->device->ib_device, + desc->rsp_dma, ISER_RX_LOGIN_SIZE, + DMA_FROM_DEVICE); + + ib_conn->post_recv_buf_count--; +} + +static inline void +iser_inv_desc(struct iser_fr_desc *desc, u32 rkey) +{ + if (likely(rkey == desc->rsc.mr->rkey)) + desc->rsc.mr_valid = 0; + else if (likely(rkey == desc->pi_ctx->sig_mr->rkey)) + desc->pi_ctx->sig_mr_valid = 0; +} + +static int +iser_check_remote_inv(struct iser_conn *iser_conn, + struct ib_wc *wc, + struct iscsi_hdr *hdr) +{ + if (wc->wc_flags & IB_WC_WITH_INVALIDATE) { + struct iscsi_task *task; + u32 rkey = wc->ex.invalidate_rkey; + + iser_dbg("conn %p: remote invalidation for rkey %#x\n", + iser_conn, rkey); + + if (unlikely(!iser_conn->snd_w_inv)) { + iser_err("conn %p: unexepected remote invalidation, " + "terminating connection\n", iser_conn); + return -EPROTO; + } + + task = iscsi_itt_to_ctask(iser_conn->iscsi_conn, hdr->itt); + if (likely(task)) { + struct iscsi_iser_task *iser_task = task->dd_data; + struct iser_fr_desc *desc; + + if (iser_task->dir[ISER_DIR_IN]) { + desc = iser_task->rdma_reg[ISER_DIR_IN].mem_h; + iser_inv_desc(desc, rkey); + } + + if (iser_task->dir[ISER_DIR_OUT]) { + desc = iser_task->rdma_reg[ISER_DIR_OUT].mem_h; + iser_inv_desc(desc, rkey); + } + } else { + iser_err("failed to get task for itt=%d\n", hdr->itt); + return -EINVAL; + } } - ib_dma_sync_single_for_cpu(ib_conn->device->ib_device, rx_dma, - rx_buflen, DMA_FROM_DEVICE); + return 0; +} - hdr = &rx_desc->iscsi_header; + +void iser_task_rsp(struct ib_cq *cq, struct ib_wc *wc) +{ + struct ib_conn *ib_conn = wc->qp->qp_context; + struct iser_conn *iser_conn = to_iser_conn(ib_conn); + struct iser_rx_desc *desc = iser_rx(wc->wr_cqe); + struct iscsi_hdr *hdr; + int length; + int outstanding, count, err; + + if (unlikely(wc->status != IB_WC_SUCCESS)) { + iser_err_comp(wc, "task_rsp"); + return; + } + + ib_dma_sync_single_for_cpu(ib_conn->device->ib_device, + desc->dma_addr, ISER_RX_PAYLOAD_SIZE, + DMA_FROM_DEVICE); + + hdr = &desc->iscsi_header; + length = wc->byte_len - ISER_HEADERS_LEN; iser_dbg("op 0x%x itt 0x%x dlen %d\n", hdr->opcode, - hdr->itt, (int)(rx_xfer_len - ISER_HEADERS_LEN)); + hdr->itt, length); + + if (iser_check_remote_inv(iser_conn, wc, hdr)) { + iscsi_conn_failure(iser_conn->iscsi_conn, + ISCSI_ERR_CONN_FAILED); + return; + } - iscsi_iser_recv(iser_conn->iscsi_conn, hdr, rx_desc->data, - rx_xfer_len - ISER_HEADERS_LEN); + iscsi_iser_recv(iser_conn->iscsi_conn, hdr, desc->data, length); - ib_dma_sync_single_for_device(ib_conn->device->ib_device, rx_dma, - rx_buflen, DMA_FROM_DEVICE); + ib_dma_sync_single_for_device(ib_conn->device->ib_device, + desc->dma_addr, ISER_RX_PAYLOAD_SIZE, + DMA_FROM_DEVICE); /* decrementing conn->post_recv_buf_count only --after-- freeing the * * task eliminates the need to worry on tasks which are completed in * @@ -604,9 +683,6 @@ void iser_rcv_completion(struct iser_rx_desc *rx_desc, * for the posted rx bufs refcount to become zero handles everything */ ib_conn->post_recv_buf_count--; - if (rx_dma == iser_conn->login_resp_dma) - return; - outstanding = ib_conn->post_recv_buf_count; if (outstanding + iser_conn->min_posted_rx <= iser_conn->qp_max_recv_dtos) { count = min(iser_conn->qp_max_recv_dtos - outstanding, @@ -617,26 +693,47 @@ void iser_rcv_completion(struct iser_rx_desc *rx_desc, } } -void iser_snd_completion(struct iser_tx_desc *tx_desc, - struct ib_conn *ib_conn) +void iser_cmd_comp(struct ib_cq *cq, struct ib_wc *wc) { + if (unlikely(wc->status != IB_WC_SUCCESS)) + iser_err_comp(wc, "command"); +} + +void iser_ctrl_comp(struct ib_cq *cq, struct ib_wc *wc) +{ + struct iser_tx_desc *desc = iser_tx(wc->wr_cqe); struct iscsi_task *task; - struct iser_device *device = ib_conn->device; - if (tx_desc->type == ISCSI_TX_DATAOUT) { - ib_dma_unmap_single(device->ib_device, tx_desc->dma_addr, - ISER_HEADERS_LEN, DMA_TO_DEVICE); - kmem_cache_free(ig.desc_cache, tx_desc); - tx_desc = NULL; + if (unlikely(wc->status != IB_WC_SUCCESS)) { + iser_err_comp(wc, "control"); + return; } - if (tx_desc && tx_desc->type == ISCSI_TX_CONTROL) { - /* this arithmetic is legal by libiscsi dd_data allocation */ - task = (void *) ((long)(void *)tx_desc - - sizeof(struct iscsi_task)); - if (task->hdr->itt == RESERVED_ITT) - iscsi_put_task(task); - } + /* this arithmetic is legal by libiscsi dd_data allocation */ + task = (void *)desc - sizeof(struct iscsi_task); + if (task->hdr->itt == RESERVED_ITT) + iscsi_put_task(task); +} + +void iser_dataout_comp(struct ib_cq *cq, struct ib_wc *wc) +{ + struct iser_tx_desc *desc = iser_tx(wc->wr_cqe); + struct ib_conn *ib_conn = wc->qp->qp_context; + struct iser_device *device = ib_conn->device; + + if (unlikely(wc->status != IB_WC_SUCCESS)) + iser_err_comp(wc, "dataout"); + + ib_dma_unmap_single(device->ib_device, desc->dma_addr, + ISER_HEADERS_LEN, DMA_TO_DEVICE); + kmem_cache_free(ig.desc_cache, desc); +} + +void iser_last_comp(struct ib_cq *cq, struct ib_wc *wc) +{ + struct ib_conn *ib_conn = wc->qp->qp_context; + + complete(&ib_conn->last_comp); } void iser_task_rdma_init(struct iscsi_iser_task *iser_task) diff --git a/drivers/infiniband/ulp/iser/iser_memory.c b/drivers/infiniband/ulp/iser/iser_memory.c index ea765fb96..9a391cc5b 100644 --- a/drivers/infiniband/ulp/iser/iser_memory.c +++ b/drivers/infiniband/ulp/iser/iser_memory.c @@ -49,7 +49,7 @@ int iser_fast_reg_mr(struct iscsi_iser_task *iser_task, struct iser_reg_resources *rsc, struct iser_mem_reg *mem_reg); -static struct iser_reg_ops fastreg_ops = { +static const struct iser_reg_ops fastreg_ops = { .alloc_reg_res = iser_alloc_fastreg_pool, .free_reg_res = iser_free_fastreg_pool, .reg_mem = iser_fast_reg_mr, @@ -58,7 +58,7 @@ static struct iser_reg_ops fastreg_ops = { .reg_desc_put = iser_reg_desc_put_fr, }; -static struct iser_reg_ops fmr_ops = { +static const struct iser_reg_ops fmr_ops = { .alloc_reg_res = iser_alloc_fmr_pool, .free_reg_res = iser_free_fmr_pool, .reg_mem = iser_fast_reg_fmr, @@ -67,19 +67,24 @@ static struct iser_reg_ops fmr_ops = { .reg_desc_put = iser_reg_desc_put_fmr, }; +void iser_reg_comp(struct ib_cq *cq, struct ib_wc *wc) +{ + iser_err_comp(wc, "memreg"); +} + int iser_assign_reg_ops(struct iser_device *device) { - struct ib_device_attr *dev_attr = &device->dev_attr; + struct ib_device *ib_dev = device->ib_device; /* Assign function handles - based on FMR support */ - if (device->ib_device->alloc_fmr && device->ib_device->dealloc_fmr && - device->ib_device->map_phys_fmr && device->ib_device->unmap_fmr) { + if (ib_dev->alloc_fmr && ib_dev->dealloc_fmr && + ib_dev->map_phys_fmr && ib_dev->unmap_fmr) { iser_info("FMR supported, using FMR for registration\n"); device->reg_ops = &fmr_ops; - } else - if (dev_attr->device_cap_flags & IB_DEVICE_MEM_MGT_EXTENSIONS) { + } else if (ib_dev->attrs.device_cap_flags & IB_DEVICE_MEM_MGT_EXTENSIONS) { iser_info("FastReg supported, using FastReg for registration\n"); device->reg_ops = &fastreg_ops; + device->remote_inv_sup = iser_always_reg; } else { iser_err("IB device does not support FMRs nor FastRegs, can't register memory\n"); return -1; @@ -131,67 +136,6 @@ iser_reg_desc_put_fmr(struct ib_conn *ib_conn, { } -#define IS_4K_ALIGNED(addr) ((((unsigned long)addr) & ~MASK_4K) == 0) - -/** - * iser_sg_to_page_vec - Translates scatterlist entries to physical addresses - * and returns the length of resulting physical address array (may be less than - * the original due to possible compaction). - * - * we build a "page vec" under the assumption that the SG meets the RDMA - * alignment requirements. Other then the first and last SG elements, all - * the "internal" elements can be compacted into a list whose elements are - * dma addresses of physical pages. The code supports also the weird case - * where --few fragments of the same page-- are present in the SG as - * consecutive elements. Also, it handles one entry SG. - */ - -static int iser_sg_to_page_vec(struct iser_data_buf *data, - struct ib_device *ibdev, u64 *pages, - int *offset, int *data_size) -{ - struct scatterlist *sg, *sgl = data->sg; - u64 start_addr, end_addr, page, chunk_start = 0; - unsigned long total_sz = 0; - unsigned int dma_len; - int i, new_chunk, cur_page, last_ent = data->dma_nents - 1; - - /* compute the offset of first element */ - *offset = (u64) sgl[0].offset & ~MASK_4K; - - new_chunk = 1; - cur_page = 0; - for_each_sg(sgl, sg, data->dma_nents, i) { - start_addr = ib_sg_dma_address(ibdev, sg); - if (new_chunk) - chunk_start = start_addr; - dma_len = ib_sg_dma_len(ibdev, sg); - end_addr = start_addr + dma_len; - total_sz += dma_len; - - /* collect page fragments until aligned or end of SG list */ - if (!IS_4K_ALIGNED(end_addr) && i < last_ent) { - new_chunk = 0; - continue; - } - new_chunk = 1; - - /* address of the first page in the contiguous chunk; - masking relevant for the very first SG entry, - which might be unaligned */ - page = chunk_start & MASK_4K; - do { - pages[cur_page++] = page; - page += SIZE_4K; - } while (page < end_addr); - } - - *data_size = total_sz; - iser_dbg("page_vec->data_size:%d cur_page %d\n", - *data_size, cur_page); - return cur_page; -} - static void iser_data_buf_dump(struct iser_data_buf *data, struct ib_device *ibdev) { @@ -210,10 +154,10 @@ static void iser_dump_page_vec(struct iser_page_vec *page_vec) { int i; - iser_err("page vec length %d data size %d\n", - page_vec->length, page_vec->data_size); - for (i = 0; i < page_vec->length; i++) - iser_err("%d %lx\n",i,(unsigned long)page_vec->pages[i]); + iser_err("page vec npages %d data length %d\n", + page_vec->npages, page_vec->fake_mr.length); + for (i = 0; i < page_vec->npages; i++) + iser_err("vec[%d]: %llx\n", i, page_vec->pages[i]); } int iser_dma_map_task_data(struct iscsi_iser_task *iser_task, @@ -251,7 +195,11 @@ iser_reg_dma(struct iser_device *device, struct iser_data_buf *mem, struct scatterlist *sg = mem->sg; reg->sge.lkey = device->pd->local_dma_lkey; - reg->rkey = device->mr->rkey; + /* + * FIXME: rework the registration code path to differentiate + * rkey/lkey use cases + */ + reg->rkey = device->mr ? device->mr->rkey : 0; reg->sge.addr = ib_sg_dma_address(device->ib_device, &sg[0]); reg->sge.length = ib_sg_dma_len(device->ib_device, &sg[0]); @@ -262,11 +210,16 @@ iser_reg_dma(struct iser_device *device, struct iser_data_buf *mem, return 0; } -/** - * iser_reg_page_vec - Register physical memory - * - * returns: 0 on success, errno code on failure - */ +static int iser_set_page(struct ib_mr *mr, u64 addr) +{ + struct iser_page_vec *page_vec = + container_of(mr, struct iser_page_vec, fake_mr); + + page_vec->pages[page_vec->npages++] = addr; + + return 0; +} + static int iser_fast_reg_fmr(struct iscsi_iser_task *iser_task, struct iser_data_buf *mem, @@ -280,22 +233,19 @@ int iser_fast_reg_fmr(struct iscsi_iser_task *iser_task, struct ib_pool_fmr *fmr; int ret, plen; - plen = iser_sg_to_page_vec(mem, device->ib_device, - page_vec->pages, - &page_vec->offset, - &page_vec->data_size); - page_vec->length = plen; - if (plen * SIZE_4K < page_vec->data_size) { + page_vec->npages = 0; + page_vec->fake_mr.page_size = SIZE_4K; + plen = ib_sg_to_pages(&page_vec->fake_mr, mem->sg, + mem->size, iser_set_page); + if (unlikely(plen < mem->size)) { iser_err("page vec too short to hold this SG\n"); iser_data_buf_dump(mem, device->ib_device); iser_dump_page_vec(page_vec); return -EINVAL; } - fmr = ib_fmr_pool_map_phys(fmr_pool, - page_vec->pages, - page_vec->length, - page_vec->pages[0]); + fmr = ib_fmr_pool_map_phys(fmr_pool, page_vec->pages, + page_vec->npages, page_vec->pages[0]); if (IS_ERR(fmr)) { ret = PTR_ERR(fmr); iser_err("ib_fmr_pool_map_phys failed: %d\n", ret); @@ -304,8 +254,8 @@ int iser_fast_reg_fmr(struct iscsi_iser_task *iser_task, reg->sge.lkey = fmr->fmr->lkey; reg->rkey = fmr->fmr->rkey; - reg->sge.addr = page_vec->pages[0] + page_vec->offset; - reg->sge.length = page_vec->data_size; + reg->sge.addr = page_vec->fake_mr.iova; + reg->sge.length = page_vec->fake_mr.length; reg->mem_h = fmr; iser_dbg("fmr reg: lkey=0x%x, rkey=0x%x, addr=0x%llx," @@ -413,19 +363,16 @@ iser_set_prot_checks(struct scsi_cmnd *sc, u8 *mask) *mask |= ISER_CHECK_GUARD; } -static void -iser_inv_rkey(struct ib_send_wr *inv_wr, struct ib_mr *mr) +static inline void +iser_inv_rkey(struct ib_send_wr *inv_wr, + struct ib_mr *mr, + struct ib_cqe *cqe) { - u32 rkey; - inv_wr->opcode = IB_WR_LOCAL_INV; - inv_wr->wr_id = ISER_FASTREG_LI_WRID; + inv_wr->wr_cqe = cqe; inv_wr->ex.invalidate_rkey = mr->rkey; inv_wr->send_flags = 0; inv_wr->num_sge = 0; - - rkey = ib_inc_rkey(mr->rkey); - ib_update_fast_reg_key(mr, rkey); } static int @@ -437,7 +384,9 @@ iser_reg_sig_mr(struct iscsi_iser_task *iser_task, { struct iser_tx_desc *tx_desc = &iser_task->desc; struct ib_sig_attrs *sig_attrs = &tx_desc->sig_attrs; + struct ib_cqe *cqe = &iser_task->iser_conn->ib_conn.reg_cqe; struct ib_sig_handover_wr *wr; + struct ib_mr *mr = pi_ctx->sig_mr; int ret; memset(sig_attrs, 0, sizeof(*sig_attrs)); @@ -447,17 +396,19 @@ iser_reg_sig_mr(struct iscsi_iser_task *iser_task, iser_set_prot_checks(iser_task->sc, &sig_attrs->check_mask); - if (!pi_ctx->sig_mr_valid) - iser_inv_rkey(iser_tx_next_wr(tx_desc), pi_ctx->sig_mr); + if (pi_ctx->sig_mr_valid) + iser_inv_rkey(iser_tx_next_wr(tx_desc), mr, cqe); + + ib_update_fast_reg_key(mr, ib_inc_rkey(mr->rkey)); wr = sig_handover_wr(iser_tx_next_wr(tx_desc)); wr->wr.opcode = IB_WR_REG_SIG_MR; - wr->wr.wr_id = ISER_FASTREG_LI_WRID; + wr->wr.wr_cqe = cqe; wr->wr.sg_list = &data_reg->sge; wr->wr.num_sge = 1; wr->wr.send_flags = 0; wr->sig_attrs = sig_attrs; - wr->sig_mr = pi_ctx->sig_mr; + wr->sig_mr = mr; if (scsi_prot_sg_count(iser_task->sc)) wr->prot = &prot_reg->sge; else @@ -465,10 +416,10 @@ iser_reg_sig_mr(struct iscsi_iser_task *iser_task, wr->access_flags = IB_ACCESS_LOCAL_WRITE | IB_ACCESS_REMOTE_READ | IB_ACCESS_REMOTE_WRITE; - pi_ctx->sig_mr_valid = 0; + pi_ctx->sig_mr_valid = 1; - sig_reg->sge.lkey = pi_ctx->sig_mr->lkey; - sig_reg->rkey = pi_ctx->sig_mr->rkey; + sig_reg->sge.lkey = mr->lkey; + sig_reg->rkey = mr->rkey; sig_reg->sge.addr = 0; sig_reg->sge.length = scsi_transfer_length(iser_task->sc); @@ -485,12 +436,15 @@ static int iser_fast_reg_mr(struct iscsi_iser_task *iser_task, struct iser_mem_reg *reg) { struct iser_tx_desc *tx_desc = &iser_task->desc; + struct ib_cqe *cqe = &iser_task->iser_conn->ib_conn.reg_cqe; struct ib_mr *mr = rsc->mr; struct ib_reg_wr *wr; int n; - if (!rsc->mr_valid) - iser_inv_rkey(iser_tx_next_wr(tx_desc), mr); + if (rsc->mr_valid) + iser_inv_rkey(iser_tx_next_wr(tx_desc), mr, cqe); + + ib_update_fast_reg_key(mr, ib_inc_rkey(mr->rkey)); n = ib_map_mr_sg(mr, mem->sg, mem->size, SIZE_4K); if (unlikely(n != mem->size)) { @@ -501,7 +455,7 @@ static int iser_fast_reg_mr(struct iscsi_iser_task *iser_task, wr = reg_wr(iser_tx_next_wr(tx_desc)); wr->wr.opcode = IB_WR_REG_MR; - wr->wr.wr_id = ISER_FASTREG_LI_WRID; + wr->wr.wr_cqe = cqe; wr->wr.send_flags = 0; wr->wr.num_sge = 0; wr->mr = mr; @@ -510,7 +464,7 @@ static int iser_fast_reg_mr(struct iscsi_iser_task *iser_task, IB_ACCESS_REMOTE_WRITE | IB_ACCESS_REMOTE_READ; - rsc->mr_valid = 0; + rsc->mr_valid = 1; reg->sge.lkey = mr->lkey; reg->rkey = mr->rkey; @@ -554,7 +508,8 @@ iser_reg_data_sg(struct iscsi_iser_task *task, } int iser_reg_rdma_mem(struct iscsi_iser_task *task, - enum iser_data_dir dir) + enum iser_data_dir dir, + bool all_imm) { struct ib_conn *ib_conn = &task->iser_conn->ib_conn; struct iser_device *device = ib_conn->device; @@ -565,8 +520,8 @@ int iser_reg_rdma_mem(struct iscsi_iser_task *task, bool use_dma_key; int err; - use_dma_key = (mem->dma_nents == 1 && !iser_always_reg && - scsi_get_prot_op(task->sc) == SCSI_PROT_NORMAL); + use_dma_key = mem->dma_nents == 1 && (all_imm || !iser_always_reg) && + scsi_get_prot_op(task->sc) == SCSI_PROT_NORMAL; if (!use_dma_key) { desc = device->reg_ops->reg_desc_get(ib_conn); diff --git a/drivers/infiniband/ulp/iser/iser_verbs.c b/drivers/infiniband/ulp/iser/iser_verbs.c index 42f4da620..40c0f4978 100644 --- a/drivers/infiniband/ulp/iser/iser_verbs.c +++ b/drivers/infiniband/ulp/iser/iser_verbs.c @@ -44,17 +44,6 @@ #define ISER_MAX_CQ_LEN (ISER_MAX_RX_LEN + ISER_MAX_TX_LEN + \ ISCSI_ISER_MAX_CONN) -static int iser_cq_poll_limit = 512; - -static void iser_cq_tasklet_fn(unsigned long data); -static void iser_cq_callback(struct ib_cq *cq, void *cq_context); - -static void iser_cq_event_callback(struct ib_event *cause, void *context) -{ - iser_err("cq event %s (%d)\n", - ib_event_msg(cause->event), cause->event); -} - static void iser_qp_event_callback(struct ib_event *cause, void *context) { iser_err("qp event %s (%d)\n", @@ -78,59 +67,40 @@ static void iser_event_handler(struct ib_event_handler *handler, */ static int iser_create_device_ib_res(struct iser_device *device) { - struct ib_device_attr *dev_attr = &device->dev_attr; + struct ib_device *ib_dev = device->ib_device; int ret, i, max_cqe; - ret = ib_query_device(device->ib_device, dev_attr); - if (ret) { - pr_warn("Query device failed for %s\n", device->ib_device->name); - return ret; - } - ret = iser_assign_reg_ops(device); if (ret) return ret; device->comps_used = min_t(int, num_online_cpus(), - device->ib_device->num_comp_vectors); + ib_dev->num_comp_vectors); device->comps = kcalloc(device->comps_used, sizeof(*device->comps), GFP_KERNEL); if (!device->comps) goto comps_err; - max_cqe = min(ISER_MAX_CQ_LEN, dev_attr->max_cqe); + max_cqe = min(ISER_MAX_CQ_LEN, ib_dev->attrs.max_cqe); iser_info("using %d CQs, device %s supports %d vectors max_cqe %d\n", - device->comps_used, device->ib_device->name, - device->ib_device->num_comp_vectors, max_cqe); + device->comps_used, ib_dev->name, + ib_dev->num_comp_vectors, max_cqe); - device->pd = ib_alloc_pd(device->ib_device); + device->pd = ib_alloc_pd(ib_dev); if (IS_ERR(device->pd)) goto pd_err; for (i = 0; i < device->comps_used; i++) { - struct ib_cq_init_attr cq_attr = {}; struct iser_comp *comp = &device->comps[i]; - comp->device = device; - cq_attr.cqe = max_cqe; - cq_attr.comp_vector = i; - comp->cq = ib_create_cq(device->ib_device, - iser_cq_callback, - iser_cq_event_callback, - (void *)comp, - &cq_attr); + comp->cq = ib_alloc_cq(ib_dev, comp, max_cqe, i, + IB_POLL_SOFTIRQ); if (IS_ERR(comp->cq)) { comp->cq = NULL; goto cq_err; } - - if (ib_req_notify_cq(comp->cq, IB_CQ_NEXT_COMP)) - goto cq_err; - - tasklet_init(&comp->tasklet, iser_cq_tasklet_fn, - (unsigned long)comp); } if (!iser_always_reg) { @@ -140,11 +110,11 @@ static int iser_create_device_ib_res(struct iser_device *device) device->mr = ib_get_dma_mr(device->pd, access); if (IS_ERR(device->mr)) - goto dma_mr_err; + goto cq_err; } - INIT_IB_EVENT_HANDLER(&device->event_handler, device->ib_device, - iser_event_handler); + INIT_IB_EVENT_HANDLER(&device->event_handler, ib_dev, + iser_event_handler); if (ib_register_event_handler(&device->event_handler)) goto handler_err; @@ -153,15 +123,12 @@ static int iser_create_device_ib_res(struct iser_device *device) handler_err: if (device->mr) ib_dereg_mr(device->mr); -dma_mr_err: - for (i = 0; i < device->comps_used; i++) - tasklet_kill(&device->comps[i].tasklet); cq_err: for (i = 0; i < device->comps_used; i++) { struct iser_comp *comp = &device->comps[i]; if (comp->cq) - ib_destroy_cq(comp->cq); + ib_free_cq(comp->cq); } ib_dealloc_pd(device->pd); pd_err: @@ -182,8 +149,7 @@ static void iser_free_device_ib_res(struct iser_device *device) for (i = 0; i < device->comps_used; i++) { struct iser_comp *comp = &device->comps[i]; - tasklet_kill(&comp->tasklet); - ib_destroy_cq(comp->cq); + ib_free_cq(comp->cq); comp->cq = NULL; } @@ -299,7 +265,7 @@ iser_alloc_reg_res(struct ib_device *ib_device, iser_err("Failed to allocate ib_fast_reg_mr err=%d\n", ret); return ret; } - res->mr_valid = 1; + res->mr_valid = 0; return 0; } @@ -336,7 +302,7 @@ iser_alloc_pi_ctx(struct ib_device *ib_device, ret = PTR_ERR(pi_ctx->sig_mr); goto sig_mr_failure; } - pi_ctx->sig_mr_valid = 1; + pi_ctx->sig_mr_valid = 0; desc->pi_ctx->sig_protected = 0; return 0; @@ -461,10 +427,9 @@ void iser_free_fastreg_pool(struct ib_conn *ib_conn) */ static int iser_create_ib_conn_res(struct ib_conn *ib_conn) { - struct iser_conn *iser_conn = container_of(ib_conn, struct iser_conn, - ib_conn); + struct iser_conn *iser_conn = to_iser_conn(ib_conn); struct iser_device *device; - struct ib_device_attr *dev_attr; + struct ib_device *ib_dev; struct ib_qp_init_attr init_attr; int ret = -ENOMEM; int index, min_index = 0; @@ -472,7 +437,7 @@ static int iser_create_ib_conn_res(struct ib_conn *ib_conn) BUG_ON(ib_conn->device == NULL); device = ib_conn->device; - dev_attr = &device->dev_attr; + ib_dev = device->ib_device; memset(&init_attr, 0, sizeof init_attr); @@ -503,16 +468,16 @@ static int iser_create_ib_conn_res(struct ib_conn *ib_conn) iser_conn->max_cmds = ISER_GET_MAX_XMIT_CMDS(ISER_QP_SIG_MAX_REQ_DTOS); } else { - if (dev_attr->max_qp_wr > ISER_QP_MAX_REQ_DTOS) { + if (ib_dev->attrs.max_qp_wr > ISER_QP_MAX_REQ_DTOS) { init_attr.cap.max_send_wr = ISER_QP_MAX_REQ_DTOS + 1; iser_conn->max_cmds = ISER_GET_MAX_XMIT_CMDS(ISER_QP_MAX_REQ_DTOS); } else { - init_attr.cap.max_send_wr = dev_attr->max_qp_wr; + init_attr.cap.max_send_wr = ib_dev->attrs.max_qp_wr; iser_conn->max_cmds = - ISER_GET_MAX_XMIT_CMDS(dev_attr->max_qp_wr); + ISER_GET_MAX_XMIT_CMDS(ib_dev->attrs.max_qp_wr); iser_dbg("device %s supports max_send_wr %d\n", - device->ib_device->name, dev_attr->max_qp_wr); + device->ib_device->name, ib_dev->attrs.max_qp_wr); } } @@ -724,13 +689,13 @@ int iser_conn_terminate(struct iser_conn *iser_conn) iser_conn, err); /* post an indication that all flush errors were consumed */ - err = ib_post_send(ib_conn->qp, &ib_conn->beacon, &bad_wr); + err = ib_post_send(ib_conn->qp, &ib_conn->last, &bad_wr); if (err) { - iser_err("conn %p failed to post beacon", ib_conn); + iser_err("conn %p failed to post last wr", ib_conn); return 1; } - wait_for_completion(&ib_conn->flush_comp); + wait_for_completion(&ib_conn->last_comp); } return 1; @@ -756,7 +721,7 @@ iser_calc_scsi_params(struct iser_conn *iser_conn, sg_tablesize = DIV_ROUND_UP(max_sectors * 512, SIZE_4K); sup_sg_tablesize = min_t(unsigned, ISCSI_ISER_MAX_SG_TABLESIZE, - device->dev_attr.max_fast_reg_page_list_len); + device->ib_device->attrs.max_fast_reg_page_list_len); if (sg_tablesize > sup_sg_tablesize) { sg_tablesize = sup_sg_tablesize; @@ -799,7 +764,7 @@ static void iser_addr_handler(struct rdma_cm_id *cma_id) /* connection T10-PI support */ if (iser_pi_enable) { - if (!(device->dev_attr.device_cap_flags & + if (!(device->ib_device->attrs.device_cap_flags & IB_DEVICE_SIGNATURE_HANDOVER)) { iser_warn("T10-PI requested but not supported on %s, " "continue without T10-PI\n", @@ -841,16 +806,17 @@ static void iser_route_handler(struct rdma_cm_id *cma_id) goto failure; memset(&conn_param, 0, sizeof conn_param); - conn_param.responder_resources = device->dev_attr.max_qp_rd_atom; + conn_param.responder_resources = device->ib_device->attrs.max_qp_rd_atom; conn_param.initiator_depth = 1; conn_param.retry_count = 7; conn_param.rnr_retry_count = 6; memset(&req_hdr, 0, sizeof(req_hdr)); - req_hdr.flags = (ISER_ZBVA_NOT_SUPPORTED | - ISER_SEND_W_INV_NOT_SUPPORTED); - conn_param.private_data = (void *)&req_hdr; - conn_param.private_data_len = sizeof(struct iser_cm_hdr); + req_hdr.flags = ISER_ZBVA_NOT_SUP; + if (!device->remote_inv_sup) + req_hdr.flags |= ISER_SEND_W_INV_NOT_SUP; + conn_param.private_data = (void *)&req_hdr; + conn_param.private_data_len = sizeof(struct iser_cm_hdr); ret = rdma_connect(cma_id, &conn_param); if (ret) { @@ -863,7 +829,8 @@ failure: iser_connect_error(cma_id); } -static void iser_connected_handler(struct rdma_cm_id *cma_id) +static void iser_connected_handler(struct rdma_cm_id *cma_id, + const void *private_data) { struct iser_conn *iser_conn; struct ib_qp_attr attr; @@ -877,6 +844,15 @@ static void iser_connected_handler(struct rdma_cm_id *cma_id) (void)ib_query_qp(cma_id->qp, &attr, ~0, &init_attr); iser_info("remote qpn:%x my qpn:%x\n", attr.dest_qp_num, cma_id->qp->qp_num); + if (private_data) { + u8 flags = *(u8 *)private_data; + + iser_conn->snd_w_inv = !(flags & ISER_SEND_W_INV_NOT_SUP); + } + + iser_info("conn %p: negotiated %s invalidation\n", + iser_conn, iser_conn->snd_w_inv ? "remote" : "local"); + iser_conn->state = ISER_CONN_UP; complete(&iser_conn->up_completion); } @@ -928,7 +904,7 @@ static int iser_cma_handler(struct rdma_cm_id *cma_id, struct rdma_cm_event *eve iser_route_handler(cma_id); break; case RDMA_CM_EVENT_ESTABLISHED: - iser_connected_handler(cma_id); + iser_connected_handler(cma_id, event->param.conn.private_data); break; case RDMA_CM_EVENT_ADDR_ERROR: case RDMA_CM_EVENT_ROUTE_ERROR: @@ -967,14 +943,21 @@ static int iser_cma_handler(struct rdma_cm_id *cma_id, struct rdma_cm_event *eve void iser_conn_init(struct iser_conn *iser_conn) { + struct ib_conn *ib_conn = &iser_conn->ib_conn; + iser_conn->state = ISER_CONN_INIT; - iser_conn->ib_conn.post_recv_buf_count = 0; - init_completion(&iser_conn->ib_conn.flush_comp); init_completion(&iser_conn->stop_completion); init_completion(&iser_conn->ib_completion); init_completion(&iser_conn->up_completion); INIT_LIST_HEAD(&iser_conn->conn_list); mutex_init(&iser_conn->state_mutex); + + ib_conn->post_recv_buf_count = 0; + ib_conn->reg_cqe.done = iser_reg_comp; + ib_conn->last_cqe.done = iser_last_comp; + ib_conn->last.wr_cqe = &ib_conn->last_cqe; + ib_conn->last.opcode = IB_WR_SEND; + init_completion(&ib_conn->last_comp); } /** @@ -1000,9 +983,6 @@ int iser_connect(struct iser_conn *iser_conn, iser_conn->state = ISER_CONN_PENDING; - ib_conn->beacon.wr_id = ISER_BEACON_WRID; - ib_conn->beacon.opcode = IB_WR_SEND; - ib_conn->cma_id = rdma_create_id(&init_net, iser_cma_handler, (void *)iser_conn, RDMA_PS_TCP, IB_QPT_RC); @@ -1045,56 +1025,60 @@ connect_failure: int iser_post_recvl(struct iser_conn *iser_conn) { - struct ib_recv_wr rx_wr, *rx_wr_failed; struct ib_conn *ib_conn = &iser_conn->ib_conn; - struct ib_sge sge; + struct iser_login_desc *desc = &iser_conn->login_desc; + struct ib_recv_wr wr, *wr_failed; int ib_ret; - sge.addr = iser_conn->login_resp_dma; - sge.length = ISER_RX_LOGIN_SIZE; - sge.lkey = ib_conn->device->pd->local_dma_lkey; + desc->sge.addr = desc->rsp_dma; + desc->sge.length = ISER_RX_LOGIN_SIZE; + desc->sge.lkey = ib_conn->device->pd->local_dma_lkey; - rx_wr.wr_id = (uintptr_t)iser_conn->login_resp_buf; - rx_wr.sg_list = &sge; - rx_wr.num_sge = 1; - rx_wr.next = NULL; + desc->cqe.done = iser_login_rsp; + wr.wr_cqe = &desc->cqe; + wr.sg_list = &desc->sge; + wr.num_sge = 1; + wr.next = NULL; ib_conn->post_recv_buf_count++; - ib_ret = ib_post_recv(ib_conn->qp, &rx_wr, &rx_wr_failed); + ib_ret = ib_post_recv(ib_conn->qp, &wr, &wr_failed); if (ib_ret) { iser_err("ib_post_recv failed ret=%d\n", ib_ret); ib_conn->post_recv_buf_count--; } + return ib_ret; } int iser_post_recvm(struct iser_conn *iser_conn, int count) { - struct ib_recv_wr *rx_wr, *rx_wr_failed; - int i, ib_ret; struct ib_conn *ib_conn = &iser_conn->ib_conn; unsigned int my_rx_head = iser_conn->rx_desc_head; struct iser_rx_desc *rx_desc; + struct ib_recv_wr *wr, *wr_failed; + int i, ib_ret; - for (rx_wr = ib_conn->rx_wr, i = 0; i < count; i++, rx_wr++) { - rx_desc = &iser_conn->rx_descs[my_rx_head]; - rx_wr->wr_id = (uintptr_t)rx_desc; - rx_wr->sg_list = &rx_desc->rx_sg; - rx_wr->num_sge = 1; - rx_wr->next = rx_wr + 1; + for (wr = ib_conn->rx_wr, i = 0; i < count; i++, wr++) { + rx_desc = &iser_conn->rx_descs[my_rx_head]; + rx_desc->cqe.done = iser_task_rsp; + wr->wr_cqe = &rx_desc->cqe; + wr->sg_list = &rx_desc->rx_sg; + wr->num_sge = 1; + wr->next = wr + 1; my_rx_head = (my_rx_head + 1) & iser_conn->qp_max_recv_dtos_mask; } - rx_wr--; - rx_wr->next = NULL; /* mark end of work requests list */ + wr--; + wr->next = NULL; /* mark end of work requests list */ ib_conn->post_recv_buf_count += count; - ib_ret = ib_post_recv(ib_conn->qp, ib_conn->rx_wr, &rx_wr_failed); + ib_ret = ib_post_recv(ib_conn->qp, ib_conn->rx_wr, &wr_failed); if (ib_ret) { iser_err("ib_post_recv failed ret=%d\n", ib_ret); ib_conn->post_recv_buf_count -= count; } else iser_conn->rx_desc_head = my_rx_head; + return ib_ret; } @@ -1115,7 +1099,7 @@ int iser_post_send(struct ib_conn *ib_conn, struct iser_tx_desc *tx_desc, DMA_TO_DEVICE); wr->next = NULL; - wr->wr_id = (uintptr_t)tx_desc; + wr->wr_cqe = &tx_desc->cqe; wr->sg_list = tx_desc->tx_sg; wr->num_sge = tx_desc->num_sge; wr->opcode = IB_WR_SEND; @@ -1129,149 +1113,6 @@ int iser_post_send(struct ib_conn *ib_conn, struct iser_tx_desc *tx_desc, return ib_ret; } -/** - * is_iser_tx_desc - Indicate if the completion wr_id - * is a TX descriptor or not. - * @iser_conn: iser connection - * @wr_id: completion WR identifier - * - * Since we cannot rely on wc opcode in FLUSH errors - * we must work around it by checking if the wr_id address - * falls in the iser connection rx_descs buffer. If so - * it is an RX descriptor, otherwize it is a TX. - */ -static inline bool -is_iser_tx_desc(struct iser_conn *iser_conn, void *wr_id) -{ - void *start = iser_conn->rx_descs; - int len = iser_conn->num_rx_descs * sizeof(*iser_conn->rx_descs); - - if (wr_id >= start && wr_id < start + len) - return false; - - return true; -} - -/** - * iser_handle_comp_error() - Handle error completion - * @ib_conn: connection RDMA resources - * @wc: work completion - * - * Notes: We may handle a FLUSH error completion and in this case - * we only cleanup in case TX type was DATAOUT. For non-FLUSH - * error completion we should also notify iscsi layer that - * connection is failed (in case we passed bind stage). - */ -static void -iser_handle_comp_error(struct ib_conn *ib_conn, - struct ib_wc *wc) -{ - void *wr_id = (void *)(uintptr_t)wc->wr_id; - struct iser_conn *iser_conn = container_of(ib_conn, struct iser_conn, - ib_conn); - - if (wc->status != IB_WC_WR_FLUSH_ERR) - if (iser_conn->iscsi_conn) - iscsi_conn_failure(iser_conn->iscsi_conn, - ISCSI_ERR_CONN_FAILED); - - if (wc->wr_id == ISER_FASTREG_LI_WRID) - return; - - if (is_iser_tx_desc(iser_conn, wr_id)) { - struct iser_tx_desc *desc = wr_id; - - if (desc->type == ISCSI_TX_DATAOUT) - kmem_cache_free(ig.desc_cache, desc); - } else { - ib_conn->post_recv_buf_count--; - } -} - -/** - * iser_handle_wc - handle a single work completion - * @wc: work completion - * - * Soft-IRQ context, work completion can be either - * SEND or RECV, and can turn out successful or - * with error (or flush error). - */ -static void iser_handle_wc(struct ib_wc *wc) -{ - struct ib_conn *ib_conn; - struct iser_tx_desc *tx_desc; - struct iser_rx_desc *rx_desc; - - ib_conn = wc->qp->qp_context; - if (likely(wc->status == IB_WC_SUCCESS)) { - if (wc->opcode == IB_WC_RECV) { - rx_desc = (struct iser_rx_desc *)(uintptr_t)wc->wr_id; - iser_rcv_completion(rx_desc, wc->byte_len, - ib_conn); - } else - if (wc->opcode == IB_WC_SEND) { - tx_desc = (struct iser_tx_desc *)(uintptr_t)wc->wr_id; - iser_snd_completion(tx_desc, ib_conn); - } else { - iser_err("Unknown wc opcode %d\n", wc->opcode); - } - } else { - if (wc->status != IB_WC_WR_FLUSH_ERR) - iser_err("%s (%d): wr id %llx vend_err %x\n", - ib_wc_status_msg(wc->status), wc->status, - wc->wr_id, wc->vendor_err); - else - iser_dbg("%s (%d): wr id %llx\n", - ib_wc_status_msg(wc->status), wc->status, - wc->wr_id); - - if (wc->wr_id == ISER_BEACON_WRID) - /* all flush errors were consumed */ - complete(&ib_conn->flush_comp); - else - iser_handle_comp_error(ib_conn, wc); - } -} - -/** - * iser_cq_tasklet_fn - iSER completion polling loop - * @data: iSER completion context - * - * Soft-IRQ context, polling connection CQ until - * either CQ was empty or we exausted polling budget - */ -static void iser_cq_tasklet_fn(unsigned long data) -{ - struct iser_comp *comp = (struct iser_comp *)data; - struct ib_cq *cq = comp->cq; - struct ib_wc *const wcs = comp->wcs; - int i, n, completed = 0; - - while ((n = ib_poll_cq(cq, ARRAY_SIZE(comp->wcs), wcs)) > 0) { - for (i = 0; i < n; i++) - iser_handle_wc(&wcs[i]); - - completed += n; - if (completed >= iser_cq_poll_limit) - break; - } - - /* - * It is assumed here that arming CQ only once its empty - * would not cause interrupts to be missed. - */ - ib_req_notify_cq(cq, IB_CQ_NEXT_COMP); - - iser_dbg("got %d completions\n", completed); -} - -static void iser_cq_callback(struct ib_cq *cq, void *cq_context) -{ - struct iser_comp *comp = cq_context; - - tasklet_schedule(&comp->tasklet); -} - u8 iser_check_task_pi_status(struct iscsi_iser_task *iser_task, enum iser_data_dir cmd_dir, sector_t *sector) { @@ -1319,3 +1160,21 @@ err: /* Not alot we can do here, return ambiguous guard error */ return 0x1; } + +void iser_err_comp(struct ib_wc *wc, const char *type) +{ + if (wc->status != IB_WC_WR_FLUSH_ERR) { + struct iser_conn *iser_conn = to_iser_conn(wc->qp->qp_context); + + iser_err("%s failure: %s (%d) vend_err %x\n", type, + ib_wc_status_msg(wc->status), wc->status, + wc->vendor_err); + + if (iser_conn->iscsi_conn) + iscsi_conn_failure(iser_conn->iscsi_conn, + ISCSI_ERR_CONN_FAILED); + } else { + iser_dbg("%s failure: %s (%d)\n", type, + ib_wc_status_msg(wc->status), wc->status); + } +} |