diff options
Diffstat (limited to 'net/rds/ib_recv.c')
-rw-r--r-- | net/rds/ib_recv.c | 140 |
1 files changed, 39 insertions, 101 deletions
diff --git a/net/rds/ib_recv.c b/net/rds/ib_recv.c index f43831e41..977fb8606 100644 --- a/net/rds/ib_recv.c +++ b/net/rds/ib_recv.c @@ -305,7 +305,7 @@ static int rds_ib_recv_refill_one(struct rds_connection *conn, gfp_t slab_mask = GFP_NOWAIT; gfp_t page_mask = GFP_NOWAIT; - if (gfp & __GFP_WAIT) { + if (gfp & __GFP_DIRECT_RECLAIM) { slab_mask = GFP_KERNEL; page_mask = GFP_HIGHUSER; } @@ -379,7 +379,7 @@ void rds_ib_recv_refill(struct rds_connection *conn, int prefill, gfp_t gfp) struct ib_recv_wr *failed_wr; unsigned int posted = 0; int ret = 0; - bool can_wait = !!(gfp & __GFP_WAIT); + bool can_wait = !!(gfp & __GFP_DIRECT_RECLAIM); u32 pos; /* the goal here is to just make sure that someone, somewhere @@ -596,8 +596,7 @@ void rds_ib_recv_init_ack(struct rds_ib_connection *ic) * wr_id and avoids working with the ring in that case. */ #ifndef KERNEL_HAS_ATOMIC64 -static void rds_ib_set_ack(struct rds_ib_connection *ic, u64 seq, - int ack_required) +void rds_ib_set_ack(struct rds_ib_connection *ic, u64 seq, int ack_required) { unsigned long flags; @@ -622,8 +621,7 @@ static u64 rds_ib_get_ack(struct rds_ib_connection *ic) return seq; } #else -static void rds_ib_set_ack(struct rds_ib_connection *ic, u64 seq, - int ack_required) +void rds_ib_set_ack(struct rds_ib_connection *ic, u64 seq, int ack_required) { atomic64_set(&ic->i_ack_next, seq); if (ack_required) { @@ -830,20 +828,6 @@ static void rds_ib_cong_recv(struct rds_connection *conn, rds_cong_map_updated(map, uncongested); } -/* - * Rings are posted with all the allocations they'll need to queue the - * incoming message to the receiving socket so this can't fail. - * All fragments start with a header, so we can make sure we're not receiving - * garbage, and we can tell a small 8 byte fragment from an ACK frame. - */ -struct rds_ib_ack_state { - u64 ack_next; - u64 ack_recv; - unsigned int ack_required:1; - unsigned int ack_next_valid:1; - unsigned int ack_recv_valid:1; -}; - static void rds_ib_process_recv(struct rds_connection *conn, struct rds_ib_recv_work *recv, u32 data_len, struct rds_ib_ack_state *state) @@ -969,96 +953,50 @@ static void rds_ib_process_recv(struct rds_connection *conn, } } -/* - * Plucking the oldest entry from the ring can be done concurrently with - * the thread refilling the ring. Each ring operation is protected by - * spinlocks and the transient state of refilling doesn't change the - * recording of which entry is oldest. - * - * This relies on IB only calling one cq comp_handler for each cq so that - * there will only be one caller of rds_recv_incoming() per RDS connection. - */ -void rds_ib_recv_cq_comp_handler(struct ib_cq *cq, void *context) -{ - struct rds_connection *conn = context; - struct rds_ib_connection *ic = conn->c_transport_data; - - rdsdebug("conn %p cq %p\n", conn, cq); - - rds_ib_stats_inc(s_ib_rx_cq_call); - - tasklet_schedule(&ic->i_recv_tasklet); -} - -static inline void rds_poll_cq(struct rds_ib_connection *ic, - struct rds_ib_ack_state *state) +void rds_ib_recv_cqe_handler(struct rds_ib_connection *ic, + struct ib_wc *wc, + struct rds_ib_ack_state *state) { struct rds_connection *conn = ic->conn; - struct ib_wc wc; struct rds_ib_recv_work *recv; - while (ib_poll_cq(ic->i_recv_cq, 1, &wc) > 0) { - rdsdebug("wc wr_id 0x%llx status %u (%s) byte_len %u imm_data %u\n", - (unsigned long long)wc.wr_id, wc.status, - ib_wc_status_msg(wc.status), wc.byte_len, - be32_to_cpu(wc.ex.imm_data)); - rds_ib_stats_inc(s_ib_rx_cq_event); + rdsdebug("wc wr_id 0x%llx status %u (%s) byte_len %u imm_data %u\n", + (unsigned long long)wc->wr_id, wc->status, + ib_wc_status_msg(wc->status), wc->byte_len, + be32_to_cpu(wc->ex.imm_data)); - recv = &ic->i_recvs[rds_ib_ring_oldest(&ic->i_recv_ring)]; - - ib_dma_unmap_sg(ic->i_cm_id->device, &recv->r_frag->f_sg, 1, DMA_FROM_DEVICE); - - /* - * Also process recvs in connecting state because it is possible - * to get a recv completion _before_ the rdmacm ESTABLISHED - * event is processed. - */ - if (wc.status == IB_WC_SUCCESS) { - rds_ib_process_recv(conn, recv, wc.byte_len, state); - } else { - /* We expect errors as the qp is drained during shutdown */ - if (rds_conn_up(conn) || rds_conn_connecting(conn)) - rds_ib_conn_error(conn, "recv completion on %pI4 had " - "status %u (%s), disconnecting and " - "reconnecting\n", &conn->c_faddr, - wc.status, - ib_wc_status_msg(wc.status)); - } + rds_ib_stats_inc(s_ib_rx_cq_event); + recv = &ic->i_recvs[rds_ib_ring_oldest(&ic->i_recv_ring)]; + ib_dma_unmap_sg(ic->i_cm_id->device, &recv->r_frag->f_sg, 1, + DMA_FROM_DEVICE); - /* - * rds_ib_process_recv() doesn't always consume the frag, and - * we might not have called it at all if the wc didn't indicate - * success. We already unmapped the frag's pages, though, and - * the following rds_ib_ring_free() call tells the refill path - * that it will not find an allocated frag here. Make sure we - * keep that promise by freeing a frag that's still on the ring. - */ - if (recv->r_frag) { - rds_ib_frag_free(ic, recv->r_frag); - recv->r_frag = NULL; - } - rds_ib_ring_free(&ic->i_recv_ring, 1); + /* Also process recvs in connecting state because it is possible + * to get a recv completion _before_ the rdmacm ESTABLISHED + * event is processed. + */ + if (wc->status == IB_WC_SUCCESS) { + rds_ib_process_recv(conn, recv, wc->byte_len, state); + } else { + /* We expect errors as the qp is drained during shutdown */ + if (rds_conn_up(conn) || rds_conn_connecting(conn)) + rds_ib_conn_error(conn, "recv completion on %pI4 had status %u (%s), disconnecting and reconnecting\n", + &conn->c_faddr, + wc->status, + ib_wc_status_msg(wc->status)); } -} -void rds_ib_recv_tasklet_fn(unsigned long data) -{ - struct rds_ib_connection *ic = (struct rds_ib_connection *) data; - struct rds_connection *conn = ic->conn; - struct rds_ib_ack_state state = { 0, }; - - rds_poll_cq(ic, &state); - ib_req_notify_cq(ic->i_recv_cq, IB_CQ_SOLICITED); - rds_poll_cq(ic, &state); - - if (state.ack_next_valid) - rds_ib_set_ack(ic, state.ack_next, state.ack_required); - if (state.ack_recv_valid && state.ack_recv > ic->i_ack_recv) { - rds_send_drop_acked(conn, state.ack_recv, NULL); - ic->i_ack_recv = state.ack_recv; + /* rds_ib_process_recv() doesn't always consume the frag, and + * we might not have called it at all if the wc didn't indicate + * success. We already unmapped the frag's pages, though, and + * the following rds_ib_ring_free() call tells the refill path + * that it will not find an allocated frag here. Make sure we + * keep that promise by freeing a frag that's still on the ring. + */ + if (recv->r_frag) { + rds_ib_frag_free(ic, recv->r_frag); + recv->r_frag = NULL; } - if (rds_conn_up(conn)) - rds_ib_attempt_ack(ic); + rds_ib_ring_free(&ic->i_recv_ring, 1); /* If we ever end up with a really empty receive ring, we're * in deep trouble, as the sender will definitely see RNR |