diff options
Diffstat (limited to 'net/rds')
-rw-r--r-- | net/rds/ib_cm.c | 2 | ||||
-rw-r--r-- | net/rds/ib_frmr.c | 2 | ||||
-rw-r--r-- | net/rds/loop.c | 5 | ||||
-rw-r--r-- | net/rds/rds.h | 2 | ||||
-rw-r--r-- | net/rds/recv.c | 2 | ||||
-rw-r--r-- | net/rds/send.c | 1 | ||||
-rw-r--r-- | net/rds/sysctl.c | 3 | ||||
-rw-r--r-- | net/rds/tcp.c | 83 | ||||
-rw-r--r-- | net/rds/tcp.h | 3 | ||||
-rw-r--r-- | net/rds/tcp_connect.c | 30 | ||||
-rw-r--r-- | net/rds/tcp_listen.c | 31 | ||||
-rw-r--r-- | net/rds/tcp_recv.c | 20 | ||||
-rw-r--r-- | net/rds/tcp_send.c | 18 | ||||
-rw-r--r-- | net/rds/threads.c | 10 | ||||
-rw-r--r-- | net/rds/transport.c | 3 |
15 files changed, 146 insertions, 69 deletions
diff --git a/net/rds/ib_cm.c b/net/rds/ib_cm.c index 310cabce2..7c2a65a6a 100644 --- a/net/rds/ib_cm.c +++ b/net/rds/ib_cm.c @@ -111,7 +111,7 @@ void rds_ib_cm_connect_complete(struct rds_connection *conn, struct rdma_cm_even } } - if (conn->c_version < RDS_PROTOCOL(3,1)) { + if (conn->c_version < RDS_PROTOCOL(3, 1)) { printk(KERN_NOTICE "RDS/IB: Connection to %pI4 version %u.%u failed," " no longer supported\n", &conn->c_faddr, diff --git a/net/rds/ib_frmr.c b/net/rds/ib_frmr.c index 93ff038ea..d921adc62 100644 --- a/net/rds/ib_frmr.c +++ b/net/rds/ib_frmr.c @@ -111,7 +111,7 @@ static int rds_ib_post_reg_frmr(struct rds_ib_mr *ibmr) cpu_relax(); } - ret = ib_map_mr_sg_zbva(frmr->mr, ibmr->sg, ibmr->sg_len, PAGE_SIZE); + ret = ib_map_mr_sg_zbva(frmr->mr, ibmr->sg, ibmr->sg_len, 0, PAGE_SIZE); if (unlikely(ret != ibmr->sg_len)) return ret < 0 ? ret : -EINVAL; diff --git a/net/rds/loop.c b/net/rds/loop.c index 6b12b6854..814173b46 100644 --- a/net/rds/loop.c +++ b/net/rds/loop.c @@ -95,8 +95,9 @@ out: */ static void rds_loop_inc_free(struct rds_incoming *inc) { - struct rds_message *rm = container_of(inc, struct rds_message, m_inc); - rds_message_put(rm); + struct rds_message *rm = container_of(inc, struct rds_message, m_inc); + + rds_message_put(rm); } /* we need to at least give the thread something to succeed */ diff --git a/net/rds/rds.h b/net/rds/rds.h index 80256b08e..387df5f32 100644 --- a/net/rds/rds.h +++ b/net/rds/rds.h @@ -74,6 +74,7 @@ enum { RDS_CONN_CONNECTING, RDS_CONN_DISCONNECTING, RDS_CONN_UP, + RDS_CONN_RESETTING, RDS_CONN_ERROR, }; @@ -813,6 +814,7 @@ void rds_connect_worker(struct work_struct *); void rds_shutdown_worker(struct work_struct *); void rds_send_worker(struct work_struct *); void rds_recv_worker(struct work_struct *); +void rds_connect_path_complete(struct rds_connection *conn, int curr); void rds_connect_complete(struct rds_connection *conn); /* transport.c */ diff --git a/net/rds/recv.c b/net/rds/recv.c index c0be1ecd1..8413f6c99 100644 --- a/net/rds/recv.c +++ b/net/rds/recv.c @@ -561,5 +561,7 @@ void rds_inc_info_copy(struct rds_incoming *inc, minfo.fport = inc->i_hdr.h_dport; } + minfo.flags = 0; + rds_info_copy(iter, &minfo, sizeof(minfo)); } diff --git a/net/rds/send.c b/net/rds/send.c index c9cdb358e..b1962f8e3 100644 --- a/net/rds/send.c +++ b/net/rds/send.c @@ -99,6 +99,7 @@ void rds_send_reset(struct rds_connection *conn) list_splice_init(&conn->c_retrans, &conn->c_send_queue); spin_unlock_irqrestore(&conn->c_lock, flags); } +EXPORT_SYMBOL_GPL(rds_send_reset); static int acquire_in_xmit(struct rds_connection *conn) { diff --git a/net/rds/sysctl.c b/net/rds/sysctl.c index c173f69e1..e381bbcd9 100644 --- a/net/rds/sysctl.c +++ b/net/rds/sysctl.c @@ -102,7 +102,8 @@ int rds_sysctl_init(void) rds_sysctl_reconnect_min = msecs_to_jiffies(1); rds_sysctl_reconnect_min_jiffies = rds_sysctl_reconnect_min; - rds_sysctl_reg_table = register_net_sysctl(&init_net,"net/rds", rds_sysctl_rds_table); + rds_sysctl_reg_table = + register_net_sysctl(&init_net, "net/rds", rds_sysctl_rds_table); if (!rds_sysctl_reg_table) return -ENOMEM; return 0; diff --git a/net/rds/tcp.c b/net/rds/tcp.c index 86187dad1..c8a7b4c90 100644 --- a/net/rds/tcp.c +++ b/net/rds/tcp.c @@ -126,9 +126,81 @@ void rds_tcp_restore_callbacks(struct socket *sock, } /* - * This is the only path that sets tc->t_sock. Send and receive trust that - * it is set. The RDS_CONN_UP bit protects those paths from being - * called while it isn't set. + * rds_tcp_reset_callbacks() switches the to the new sock and + * returns the existing tc->t_sock. + * + * The only functions that set tc->t_sock are rds_tcp_set_callbacks + * and rds_tcp_reset_callbacks. Send and receive trust that + * it is set. The absence of RDS_CONN_UP bit protects those paths + * from being called while it isn't set. + */ +void rds_tcp_reset_callbacks(struct socket *sock, + struct rds_connection *conn) +{ + struct rds_tcp_connection *tc = conn->c_transport_data; + struct socket *osock = tc->t_sock; + + if (!osock) + goto newsock; + + /* Need to resolve a duelling SYN between peers. + * We have an outstanding SYN to this peer, which may + * potentially have transitioned to the RDS_CONN_UP state, + * so we must quiesce any send threads before resetting + * c_transport_data. We quiesce these threads by setting + * c_state to something other than RDS_CONN_UP, and then + * waiting for any existing threads in rds_send_xmit to + * complete release_in_xmit(). (Subsequent threads entering + * rds_send_xmit() will bail on !rds_conn_up(). + * + * However an incoming syn-ack at this point would end up + * marking the conn as RDS_CONN_UP, and would again permit + * rds_send_xmi() threads through, so ideally we would + * synchronize on RDS_CONN_UP after lock_sock(), but cannot + * do that: waiting on !RDS_IN_XMIT after lock_sock() may + * end up deadlocking with tcp_sendmsg(), and the RDS_IN_XMIT + * would not get set. As a result, we set c_state to + * RDS_CONN_RESETTTING, to ensure that rds_tcp_state_change + * cannot mark rds_conn_path_up() in the window before lock_sock() + */ + atomic_set(&conn->c_state, RDS_CONN_RESETTING); + wait_event(conn->c_waitq, !test_bit(RDS_IN_XMIT, &conn->c_flags)); + lock_sock(osock->sk); + /* reset receive side state for rds_tcp_data_recv() for osock */ + if (tc->t_tinc) { + rds_inc_put(&tc->t_tinc->ti_inc); + tc->t_tinc = NULL; + } + tc->t_tinc_hdr_rem = sizeof(struct rds_header); + tc->t_tinc_data_rem = 0; + tc->t_sock = NULL; + + write_lock_bh(&osock->sk->sk_callback_lock); + + osock->sk->sk_user_data = NULL; + osock->sk->sk_data_ready = tc->t_orig_data_ready; + osock->sk->sk_write_space = tc->t_orig_write_space; + osock->sk->sk_state_change = tc->t_orig_state_change; + write_unlock_bh(&osock->sk->sk_callback_lock); + release_sock(osock->sk); + sock_release(osock); +newsock: + rds_send_reset(conn); + lock_sock(sock->sk); + write_lock_bh(&sock->sk->sk_callback_lock); + tc->t_sock = sock; + sock->sk->sk_user_data = conn; + sock->sk->sk_data_ready = rds_tcp_data_ready; + sock->sk->sk_write_space = rds_tcp_write_space; + sock->sk->sk_state_change = rds_tcp_state_change; + + write_unlock_bh(&sock->sk->sk_callback_lock); + release_sock(sock->sk); +} + +/* Add tc to rds_tcp_tc_list and set tc->t_sock. See comments + * above rds_tcp_reset_callbacks for notes about synchronization + * with data path */ void rds_tcp_set_callbacks(struct socket *sock, struct rds_connection *conn) { @@ -544,7 +616,7 @@ static int rds_tcp_init(void) ret = rds_tcp_recv_init(); if (ret) - goto out_slab; + goto out_pernet; ret = rds_trans_register(&rds_tcp_transport); if (ret) @@ -556,8 +628,9 @@ static int rds_tcp_init(void) out_recv: rds_tcp_recv_exit(); -out_slab: +out_pernet: unregister_pernet_subsys(&rds_tcp_net_ops); +out_slab: kmem_cache_destroy(rds_tcp_conn_slab); out: return ret; diff --git a/net/rds/tcp.h b/net/rds/tcp.h index 41c228300..7940babf6 100644 --- a/net/rds/tcp.h +++ b/net/rds/tcp.h @@ -50,6 +50,7 @@ struct rds_tcp_statistics { void rds_tcp_tune(struct socket *sock); void rds_tcp_nonagle(struct socket *sock); void rds_tcp_set_callbacks(struct socket *sock, struct rds_connection *conn); +void rds_tcp_reset_callbacks(struct socket *sock, struct rds_connection *conn); void rds_tcp_restore_callbacks(struct socket *sock, struct rds_tcp_connection *tc); u32 rds_tcp_snd_nxt(struct rds_tcp_connection *tc); @@ -82,7 +83,7 @@ int rds_tcp_inc_copy_to_user(struct rds_incoming *inc, struct iov_iter *to); void rds_tcp_xmit_prepare(struct rds_connection *conn); void rds_tcp_xmit_complete(struct rds_connection *conn); int rds_tcp_xmit(struct rds_connection *conn, struct rds_message *rm, - unsigned int hdr_off, unsigned int sg, unsigned int off); + unsigned int hdr_off, unsigned int sg, unsigned int off); void rds_tcp_write_space(struct sock *sk); /* tcp_stats.c */ diff --git a/net/rds/tcp_connect.c b/net/rds/tcp_connect.c index 49a3fcfed..f6e95d60d 100644 --- a/net/rds/tcp_connect.c +++ b/net/rds/tcp_connect.c @@ -43,7 +43,7 @@ void rds_tcp_state_change(struct sock *sk) struct rds_connection *conn; struct rds_tcp_connection *tc; - read_lock(&sk->sk_callback_lock); + read_lock_bh(&sk->sk_callback_lock); conn = sk->sk_user_data; if (!conn) { state_change = sk->sk_state_change; @@ -54,22 +54,22 @@ void rds_tcp_state_change(struct sock *sk) rdsdebug("sock %p state_change to %d\n", tc->t_sock, sk->sk_state); - switch(sk->sk_state) { - /* ignore connecting sockets as they make progress */ - case TCP_SYN_SENT: - case TCP_SYN_RECV: - break; - case TCP_ESTABLISHED: - rds_connect_complete(conn); - break; - case TCP_CLOSE_WAIT: - case TCP_CLOSE: - rds_conn_drop(conn); - default: - break; + switch (sk->sk_state) { + /* ignore connecting sockets as they make progress */ + case TCP_SYN_SENT: + case TCP_SYN_RECV: + break; + case TCP_ESTABLISHED: + rds_connect_path_complete(conn, RDS_CONN_CONNECTING); + break; + case TCP_CLOSE_WAIT: + case TCP_CLOSE: + rds_conn_drop(conn); + default: + break; } out: - read_unlock(&sk->sk_callback_lock); + read_unlock_bh(&sk->sk_callback_lock); state_change(sk); } diff --git a/net/rds/tcp_listen.c b/net/rds/tcp_listen.c index be263cdf2..245542ca4 100644 --- a/net/rds/tcp_listen.c +++ b/net/rds/tcp_listen.c @@ -78,7 +78,9 @@ int rds_tcp_accept_one(struct socket *sock) struct inet_sock *inet; struct rds_tcp_connection *rs_tcp = NULL; int conn_state; - struct sock *nsk; + + if (!sock) /* module unload or netns delete in progress */ + return -ENETUNREACH; ret = sock_create_kern(sock_net(sock->sk), sock->sk->sk_family, sock->sk->sk_type, sock->sk->sk_protocol, @@ -129,28 +131,25 @@ int rds_tcp_accept_one(struct socket *sock) * so we must quiesce any send threads before resetting * c_transport_data. */ - wait_event(conn->c_waitq, - !test_bit(RDS_IN_XMIT, &conn->c_flags)); - if (ntohl(inet->inet_saddr) < ntohl(inet->inet_daddr)) { + if (ntohl(inet->inet_saddr) < ntohl(inet->inet_daddr) || + !conn->c_outgoing) { goto rst_nsk; - } else if (rs_tcp->t_sock) { - rds_tcp_restore_callbacks(rs_tcp->t_sock, rs_tcp); + } else { + rds_tcp_reset_callbacks(new_sock, conn); conn->c_outgoing = 0; + /* rds_connect_path_complete() marks RDS_CONN_UP */ + rds_connect_path_complete(conn, RDS_CONN_RESETTING); } + } else { + rds_tcp_set_callbacks(new_sock, conn); + rds_connect_path_complete(conn, RDS_CONN_CONNECTING); } - rds_tcp_set_callbacks(new_sock, conn); - rds_connect_complete(conn); /* marks RDS_CONN_UP */ new_sock = NULL; ret = 0; goto out; rst_nsk: /* reset the newly returned accept sock and bail */ - nsk = new_sock->sk; - rds_tcp_stats_inc(s_tcp_listen_closed_stale); - nsk->sk_user_data = NULL; - nsk->sk_prot->disconnect(nsk, 0); - tcp_done(nsk); - new_sock = NULL; + kernel_sock_shutdown(new_sock, SHUT_RDWR); ret = 0; out: if (rs_tcp) @@ -166,7 +165,7 @@ void rds_tcp_listen_data_ready(struct sock *sk) rdsdebug("listen data ready sk %p\n", sk); - read_lock(&sk->sk_callback_lock); + read_lock_bh(&sk->sk_callback_lock); ready = sk->sk_user_data; if (!ready) { /* check for teardown race */ ready = sk->sk_data_ready; @@ -183,7 +182,7 @@ void rds_tcp_listen_data_ready(struct sock *sk) rds_tcp_accept_work(sk); out: - read_unlock(&sk->sk_callback_lock); + read_unlock_bh(&sk->sk_callback_lock); ready(sk); } diff --git a/net/rds/tcp_recv.c b/net/rds/tcp_recv.c index 27a992154..6e6a7111a 100644 --- a/net/rds/tcp_recv.c +++ b/net/rds/tcp_recv.c @@ -171,7 +171,7 @@ static int rds_tcp_data_recv(read_descriptor_t *desc, struct sk_buff *skb, while (left) { if (!tinc) { tinc = kmem_cache_alloc(rds_tcp_incoming_slab, - arg->gfp); + arg->gfp); if (!tinc) { desc->error = -ENOMEM; goto out; @@ -207,22 +207,14 @@ static int rds_tcp_data_recv(read_descriptor_t *desc, struct sk_buff *skb, } if (left && tc->t_tinc_data_rem) { - clone = skb_clone(skb, arg->gfp); + to_copy = min(tc->t_tinc_data_rem, left); + + clone = pskb_extract(skb, offset, to_copy, arg->gfp); if (!clone) { desc->error = -ENOMEM; goto out; } - to_copy = min(tc->t_tinc_data_rem, left); - if (!pskb_pull(clone, offset) || - pskb_trim(clone, to_copy)) { - pr_warn("rds_tcp_data_recv: pull/trim failed " - "left %zu data_rem %zu skb_len %d\n", - left, tc->t_tinc_data_rem, skb->len); - kfree_skb(clone); - desc->error = -ENOMEM; - goto out; - } skb_queue_tail(&tinc->ti_skb_list, clone); rdsdebug("skb %p data %p len %d off %u to_copy %zu -> " @@ -309,7 +301,7 @@ void rds_tcp_data_ready(struct sock *sk) rdsdebug("data ready sk %p\n", sk); - read_lock(&sk->sk_callback_lock); + read_lock_bh(&sk->sk_callback_lock); conn = sk->sk_user_data; if (!conn) { /* check for teardown race */ ready = sk->sk_data_ready; @@ -323,7 +315,7 @@ void rds_tcp_data_ready(struct sock *sk) if (rds_tcp_read_sock(conn, GFP_ATOMIC) == -ENOMEM) queue_delayed_work(rds_wq, &conn->c_recv_w, 0); out: - read_unlock(&sk->sk_callback_lock); + read_unlock_bh(&sk->sk_callback_lock); ready(sk); } diff --git a/net/rds/tcp_send.c b/net/rds/tcp_send.c index 2894e6095..618be69c9 100644 --- a/net/rds/tcp_send.c +++ b/net/rds/tcp_send.c @@ -66,19 +66,19 @@ void rds_tcp_xmit_complete(struct rds_connection *conn) static int rds_tcp_sendmsg(struct socket *sock, void *data, unsigned int len) { struct kvec vec = { - .iov_base = data, - .iov_len = len, + .iov_base = data, + .iov_len = len, + }; + struct msghdr msg = { + .msg_flags = MSG_DONTWAIT | MSG_NOSIGNAL, }; - struct msghdr msg = { - .msg_flags = MSG_DONTWAIT | MSG_NOSIGNAL, - }; return kernel_sendmsg(sock, &msg, &vec, 1, vec.iov_len); } /* the core send_sem serializes this with other xmit and shutdown */ int rds_tcp_xmit(struct rds_connection *conn, struct rds_message *rm, - unsigned int hdr_off, unsigned int sg, unsigned int off) + unsigned int hdr_off, unsigned int sg, unsigned int off) { struct rds_tcp_connection *tc = conn->c_transport_data; int done = 0; @@ -180,7 +180,7 @@ void rds_tcp_write_space(struct sock *sk) struct rds_connection *conn; struct rds_tcp_connection *tc; - read_lock(&sk->sk_callback_lock); + read_lock_bh(&sk->sk_callback_lock); conn = sk->sk_user_data; if (!conn) { write_space = sk->sk_write_space; @@ -196,11 +196,11 @@ void rds_tcp_write_space(struct sock *sk) tc->t_last_seen_una = rds_tcp_snd_una(tc); rds_send_drop_acked(conn, rds_tcp_snd_una(tc), rds_tcp_is_acked); - if ((atomic_read(&sk->sk_wmem_alloc) << 1) <= sk->sk_sndbuf) + if ((atomic_read(&sk->sk_wmem_alloc) << 1) <= sk->sk_sndbuf) queue_delayed_work(rds_wq, &conn->c_send_w, 0); out: - read_unlock(&sk->sk_callback_lock); + read_unlock_bh(&sk->sk_callback_lock); /* * write_space is only called when data leaves tcp's send queue if diff --git a/net/rds/threads.c b/net/rds/threads.c index 454aa6d23..4a3230457 100644 --- a/net/rds/threads.c +++ b/net/rds/threads.c @@ -71,9 +71,9 @@ struct workqueue_struct *rds_wq; EXPORT_SYMBOL_GPL(rds_wq); -void rds_connect_complete(struct rds_connection *conn) +void rds_connect_path_complete(struct rds_connection *conn, int curr) { - if (!rds_conn_transition(conn, RDS_CONN_CONNECTING, RDS_CONN_UP)) { + if (!rds_conn_transition(conn, curr, RDS_CONN_UP)) { printk(KERN_WARNING "%s: Cannot transition to state UP, " "current state is %d\n", __func__, @@ -90,6 +90,12 @@ void rds_connect_complete(struct rds_connection *conn) queue_delayed_work(rds_wq, &conn->c_send_w, 0); queue_delayed_work(rds_wq, &conn->c_recv_w, 0); } +EXPORT_SYMBOL_GPL(rds_connect_path_complete); + +void rds_connect_complete(struct rds_connection *conn) +{ + rds_connect_path_complete(conn, RDS_CONN_CONNECTING); +} EXPORT_SYMBOL_GPL(rds_connect_complete); /* diff --git a/net/rds/transport.c b/net/rds/transport.c index f3afd1d60..2ffd3e30c 100644 --- a/net/rds/transport.c +++ b/net/rds/transport.c @@ -140,8 +140,7 @@ unsigned int rds_trans_stats_info_copy(struct rds_info_iterator *iter, rds_info_iter_unmap(iter); down_read(&rds_trans_sem); - for (i = 0; i < RDS_TRANS_COUNT; i++) - { + for (i = 0; i < RDS_TRANS_COUNT; i++) { trans = transports[i]; if (!trans || !trans->stats_info_copy) continue; |