diff options
Diffstat (limited to 'net')
-rw-r--r-- | net/batman-adv/originator.c | 2 | ||||
-rw-r--r-- | net/ceph/ceph_fs.c | 3 | ||||
-rw-r--r-- | net/core/dev.c | 2 | ||||
-rw-r--r-- | net/core/flow_dissector.c | 11 | ||||
-rw-r--r-- | net/core/sock.c | 6 | ||||
-rw-r--r-- | net/dccp/ipv4.c | 16 | ||||
-rw-r--r-- | net/dccp/ipv6.c | 19 | ||||
-rw-r--r-- | net/dccp/proto.c | 4 | ||||
-rw-r--r-- | net/ipv4/fib_trie.c | 21 | ||||
-rw-r--r-- | net/ipv4/icmp.c | 4 | ||||
-rw-r--r-- | net/ipv4/ip_forward.c | 2 | ||||
-rw-r--r-- | net/ipv4/ip_output.c | 6 | ||||
-rw-r--r-- | net/ipv4/ip_tunnel_core.c | 11 | ||||
-rw-r--r-- | net/ipv4/ipmr.c | 2 | ||||
-rw-r--r-- | net/ipv4/route.c | 4 | ||||
-rw-r--r-- | net/ipv4/tcp.c | 4 | ||||
-rw-r--r-- | net/ipv4/tcp_dctcp.c | 13 | ||||
-rw-r--r-- | net/ipv4/tcp_ipv4.c | 19 | ||||
-rw-r--r-- | net/ipv6/icmp.c | 2 | ||||
-rw-r--r-- | net/ipv6/tcp_ipv6.c | 6 | ||||
-rw-r--r-- | net/netfilter/nf_log.c | 6 | ||||
-rw-r--r-- | net/sctp/socket.c | 22 | ||||
-rw-r--r-- | net/socket.c | 2 | ||||
-rw-r--r-- | net/sunrpc/xprtrdma/frwr_ops.c | 39 | ||||
-rw-r--r-- | net/sunrpc/xprtrdma/svc_rdma_backchannel.c | 2 | ||||
-rw-r--r-- | net/sunrpc/xprtrdma/svc_rdma_recvfrom.c | 2 | ||||
-rw-r--r-- | net/sunrpc/xprtrdma/svc_rdma_sendto.c | 24 | ||||
-rw-r--r-- | net/sunrpc/xprtrdma/svc_rdma_transport.c | 18 | ||||
-rw-r--r-- | net/sunrpc/xprtrdma/xprt_rdma.h | 3 |
29 files changed, 157 insertions, 118 deletions
diff --git a/net/batman-adv/originator.c b/net/batman-adv/originator.c index 3940b5d24..3e9667e46 100644 --- a/net/batman-adv/originator.c +++ b/net/batman-adv/originator.c @@ -537,7 +537,7 @@ batadv_hardif_neigh_create(struct batadv_hard_iface *hard_iface, if (bat_priv->algo_ops->neigh.hardif_init) bat_priv->algo_ops->neigh.hardif_init(hardif_neigh); - hlist_add_head(&hardif_neigh->list, &hard_iface->neigh_list); + hlist_add_head_rcu(&hardif_neigh->list, &hard_iface->neigh_list); out: spin_unlock_bh(&hard_iface->neigh_list_lock); diff --git a/net/ceph/ceph_fs.c b/net/ceph/ceph_fs.c index 7d54e944d..dcbe67ff3 100644 --- a/net/ceph/ceph_fs.c +++ b/net/ceph/ceph_fs.c @@ -34,7 +34,8 @@ void ceph_file_layout_from_legacy(struct ceph_file_layout *fl, fl->stripe_count = le32_to_cpu(legacy->fl_stripe_count); fl->object_size = le32_to_cpu(legacy->fl_object_size); fl->pool_id = le32_to_cpu(legacy->fl_pg_pool); - if (fl->pool_id == 0) + if (fl->pool_id == 0 && fl->stripe_unit == 0 && + fl->stripe_count == 0 && fl->object_size == 0) fl->pool_id = -1; } EXPORT_SYMBOL(ceph_file_layout_from_legacy); diff --git a/net/core/dev.c b/net/core/dev.c index 44b3ba462..9ce9d7284 100644 --- a/net/core/dev.c +++ b/net/core/dev.c @@ -2484,7 +2484,7 @@ int skb_checksum_help(struct sk_buff *skb) goto out; } - *(__sum16 *)(skb->data + offset) = csum_fold(csum); + *(__sum16 *)(skb->data + offset) = csum_fold(csum) ?: CSUM_MANGLED_0; out_set_summed: skb->ip_summed = CHECKSUM_NONE; out: diff --git a/net/core/flow_dissector.c b/net/core/flow_dissector.c index 52742a028..5550a86f7 100644 --- a/net/core/flow_dissector.c +++ b/net/core/flow_dissector.c @@ -118,7 +118,7 @@ bool __skb_flow_dissect(const struct sk_buff *skb, struct flow_dissector_key_tags *key_tags; struct flow_dissector_key_keyid *key_keyid; u8 ip_proto = 0; - bool ret = false; + bool ret; if (!data) { data = skb->data; @@ -481,12 +481,17 @@ ip_proto_again: out_good: ret = true; -out_bad: + key_control->thoff = (u16)nhoff; +out: key_basic->n_proto = proto; key_basic->ip_proto = ip_proto; - key_control->thoff = (u16)nhoff; return ret; + +out_bad: + ret = false; + key_control->thoff = min_t(u16, nhoff, skb ? skb->len : hlen); + goto out; } EXPORT_SYMBOL(__skb_flow_dissect); diff --git a/net/core/sock.c b/net/core/sock.c index fd7b41edf..10acaccca 100644 --- a/net/core/sock.c +++ b/net/core/sock.c @@ -453,7 +453,7 @@ int sock_queue_rcv_skb(struct sock *sk, struct sk_buff *skb) EXPORT_SYMBOL(sock_queue_rcv_skb); int __sk_receive_skb(struct sock *sk, struct sk_buff *skb, - const int nested, unsigned int trim_cap) + const int nested, unsigned int trim_cap, bool refcounted) { int rc = NET_RX_SUCCESS; @@ -487,7 +487,8 @@ int __sk_receive_skb(struct sock *sk, struct sk_buff *skb, bh_unlock_sock(sk); out: - sock_put(sk); + if (refcounted) + sock_put(sk); return rc; discard_and_relse: kfree_skb(skb); @@ -1563,6 +1564,7 @@ struct sock *sk_clone_lock(const struct sock *sk, const gfp_t priority) RCU_INIT_POINTER(newsk->sk_reuseport_cb, NULL); newsk->sk_err = 0; + newsk->sk_err_soft = 0; newsk->sk_priority = 0; newsk->sk_incoming_cpu = raw_smp_processor_id(); atomic64_set(&newsk->sk_cookie, 0); diff --git a/net/dccp/ipv4.c b/net/dccp/ipv4.c index 345a3aeb8..b567c8725 100644 --- a/net/dccp/ipv4.c +++ b/net/dccp/ipv4.c @@ -235,7 +235,7 @@ static void dccp_v4_err(struct sk_buff *skb, u32 info) { const struct iphdr *iph = (struct iphdr *)skb->data; const u8 offset = iph->ihl << 2; - const struct dccp_hdr *dh = (struct dccp_hdr *)(skb->data + offset); + const struct dccp_hdr *dh; struct dccp_sock *dp; struct inet_sock *inet; const int type = icmp_hdr(skb)->type; @@ -245,11 +245,13 @@ static void dccp_v4_err(struct sk_buff *skb, u32 info) int err; struct net *net = dev_net(skb->dev); - if (skb->len < offset + sizeof(*dh) || - skb->len < offset + __dccp_basic_hdr_len(dh)) { - __ICMP_INC_STATS(net, ICMP_MIB_INERRORS); - return; - } + /* Only need dccph_dport & dccph_sport which are the first + * 4 bytes in dccp header. + * Our caller (icmp_socket_deliver()) already pulled 8 bytes for us. + */ + BUILD_BUG_ON(offsetofend(struct dccp_hdr, dccph_sport) > 8); + BUILD_BUG_ON(offsetofend(struct dccp_hdr, dccph_dport) > 8); + dh = (struct dccp_hdr *)(skb->data + offset); sk = __inet_lookup_established(net, &dccp_hashinfo, iph->daddr, dh->dccph_dport, @@ -868,7 +870,7 @@ lookup: goto discard_and_relse; nf_reset(skb); - return __sk_receive_skb(sk, skb, 1, dh->dccph_doff * 4); + return __sk_receive_skb(sk, skb, 1, dh->dccph_doff * 4, refcounted); no_dccp_socket: if (!xfrm4_policy_check(NULL, XFRM_POLICY_IN, skb)) diff --git a/net/dccp/ipv6.c b/net/dccp/ipv6.c index 3828f94b2..715e5d1dc 100644 --- a/net/dccp/ipv6.c +++ b/net/dccp/ipv6.c @@ -70,7 +70,7 @@ static void dccp_v6_err(struct sk_buff *skb, struct inet6_skb_parm *opt, u8 type, u8 code, int offset, __be32 info) { const struct ipv6hdr *hdr = (const struct ipv6hdr *)skb->data; - const struct dccp_hdr *dh = (struct dccp_hdr *)(skb->data + offset); + const struct dccp_hdr *dh; struct dccp_sock *dp; struct ipv6_pinfo *np; struct sock *sk; @@ -78,12 +78,13 @@ static void dccp_v6_err(struct sk_buff *skb, struct inet6_skb_parm *opt, __u64 seq; struct net *net = dev_net(skb->dev); - if (skb->len < offset + sizeof(*dh) || - skb->len < offset + __dccp_basic_hdr_len(dh)) { - __ICMP6_INC_STATS(net, __in6_dev_get(skb->dev), - ICMP6_MIB_INERRORS); - return; - } + /* Only need dccph_dport & dccph_sport which are the first + * 4 bytes in dccp header. + * Our caller (icmpv6_notify()) already pulled 8 bytes for us. + */ + BUILD_BUG_ON(offsetofend(struct dccp_hdr, dccph_sport) > 8); + BUILD_BUG_ON(offsetofend(struct dccp_hdr, dccph_dport) > 8); + dh = (struct dccp_hdr *)(skb->data + offset); sk = __inet6_lookup_established(net, &dccp_hashinfo, &hdr->daddr, dh->dccph_dport, @@ -738,7 +739,8 @@ lookup: if (!xfrm6_policy_check(sk, XFRM_POLICY_IN, skb)) goto discard_and_relse; - return __sk_receive_skb(sk, skb, 1, dh->dccph_doff * 4) ? -1 : 0; + return __sk_receive_skb(sk, skb, 1, dh->dccph_doff * 4, + refcounted) ? -1 : 0; no_dccp_socket: if (!xfrm6_policy_check(NULL, XFRM_POLICY_IN, skb)) @@ -956,6 +958,7 @@ static const struct inet_connection_sock_af_ops dccp_ipv6_mapped = { .getsockopt = ipv6_getsockopt, .addr2sockaddr = inet6_csk_addr2sockaddr, .sockaddr_len = sizeof(struct sockaddr_in6), + .bind_conflict = inet6_csk_bind_conflict, #ifdef CONFIG_COMPAT .compat_setsockopt = compat_ipv6_setsockopt, .compat_getsockopt = compat_ipv6_getsockopt, diff --git a/net/dccp/proto.c b/net/dccp/proto.c index 41e65804d..9fe25bf63 100644 --- a/net/dccp/proto.c +++ b/net/dccp/proto.c @@ -1009,6 +1009,10 @@ void dccp_close(struct sock *sk, long timeout) __kfree_skb(skb); } + /* If socket has been already reset kill it. */ + if (sk->sk_state == DCCP_CLOSED) + goto adjudge_to_death; + if (data_was_unread) { /* Unread data was tossed, send an appropriate Reset Code */ DCCP_WARN("ABORT with %u bytes unread\n", data_was_unread); diff --git a/net/ipv4/fib_trie.c b/net/ipv4/fib_trie.c index e2ffc2a5c..7ef703102 100644 --- a/net/ipv4/fib_trie.c +++ b/net/ipv4/fib_trie.c @@ -2455,22 +2455,19 @@ static struct key_vector *fib_route_get_idx(struct fib_route_iter *iter, struct key_vector *l, **tp = &iter->tnode; t_key key; - /* use cache location of next-to-find key */ + /* use cached location of previously found key */ if (iter->pos > 0 && pos >= iter->pos) { - pos -= iter->pos; key = iter->key; } else { - iter->pos = 0; + iter->pos = 1; key = 0; } - while ((l = leaf_walk_rcu(tp, key)) != NULL) { + pos -= iter->pos; + + while ((l = leaf_walk_rcu(tp, key)) && (pos-- > 0)) { key = l->key + 1; iter->pos++; - - if (--pos <= 0) - break; - l = NULL; /* handle unlikely case of a key wrap */ @@ -2479,7 +2476,7 @@ static struct key_vector *fib_route_get_idx(struct fib_route_iter *iter, } if (l) - iter->key = key; /* remember it */ + iter->key = l->key; /* remember it */ else iter->pos = 0; /* forget it */ @@ -2507,7 +2504,7 @@ static void *fib_route_seq_start(struct seq_file *seq, loff_t *pos) return fib_route_get_idx(iter, *pos); iter->pos = 0; - iter->key = 0; + iter->key = KEY_MAX; return SEQ_START_TOKEN; } @@ -2516,7 +2513,7 @@ static void *fib_route_seq_next(struct seq_file *seq, void *v, loff_t *pos) { struct fib_route_iter *iter = seq->private; struct key_vector *l = NULL; - t_key key = iter->key; + t_key key = iter->key + 1; ++*pos; @@ -2525,7 +2522,7 @@ static void *fib_route_seq_next(struct seq_file *seq, void *v, loff_t *pos) l = leaf_walk_rcu(&iter->tnode, key); if (l) { - iter->key = l->key + 1; + iter->key = l->key; iter->pos++; } else { iter->pos = 0; diff --git a/net/ipv4/icmp.c b/net/ipv4/icmp.c index 38abe70e5..48734ee62 100644 --- a/net/ipv4/icmp.c +++ b/net/ipv4/icmp.c @@ -477,7 +477,7 @@ static struct rtable *icmp_route_lookup(struct net *net, fl4->flowi4_proto = IPPROTO_ICMP; fl4->fl4_icmp_type = type; fl4->fl4_icmp_code = code; - fl4->flowi4_oif = l3mdev_master_ifindex(skb_in->dev); + fl4->flowi4_oif = l3mdev_master_ifindex(skb_dst(skb_in)->dev); security_skb_classify_flow(skb_in, flowi4_to_flowi(fl4)); rt = __ip_route_output_key_hash(net, fl4, @@ -502,7 +502,7 @@ static struct rtable *icmp_route_lookup(struct net *net, if (err) goto relookup_failed; - if (inet_addr_type_dev_table(net, skb_in->dev, + if (inet_addr_type_dev_table(net, skb_dst(skb_in)->dev, fl4_dec.saddr) == RTN_LOCAL) { rt2 = __ip_route_output_key(net, &fl4_dec); if (IS_ERR(rt2)) diff --git a/net/ipv4/ip_forward.c b/net/ipv4/ip_forward.c index 8b4ffd216..9f0a7b966 100644 --- a/net/ipv4/ip_forward.c +++ b/net/ipv4/ip_forward.c @@ -117,7 +117,7 @@ int ip_forward(struct sk_buff *skb) if (opt->is_strictroute && rt->rt_uses_gateway) goto sr_failed; - IPCB(skb)->flags |= IPSKB_FORWARDED | IPSKB_FRAG_SEGS; + IPCB(skb)->flags |= IPSKB_FORWARDED; mtu = ip_dst_mtu_maybe_forward(&rt->dst, true); if (ip_exceeds_mtu(skb, mtu)) { IP_INC_STATS(net, IPSTATS_MIB_FRAGFAILS); diff --git a/net/ipv4/ip_output.c b/net/ipv4/ip_output.c index dde37fb34..307daed9a 100644 --- a/net/ipv4/ip_output.c +++ b/net/ipv4/ip_output.c @@ -223,11 +223,9 @@ static int ip_finish_output_gso(struct net *net, struct sock *sk, struct sk_buff *segs; int ret = 0; - /* common case: fragmentation of segments is not allowed, - * or seglen is <= mtu + /* common case: seglen is <= mtu */ - if (((IPCB(skb)->flags & IPSKB_FRAG_SEGS) == 0) || - skb_gso_validate_mtu(skb, mtu)) + if (skb_gso_validate_mtu(skb, mtu)) return ip_finish_output2(net, sk, skb); /* Slowpath - GSO segment length is exceeding the dst MTU. diff --git a/net/ipv4/ip_tunnel_core.c b/net/ipv4/ip_tunnel_core.c index 0f227db0e..afd6b5968 100644 --- a/net/ipv4/ip_tunnel_core.c +++ b/net/ipv4/ip_tunnel_core.c @@ -63,7 +63,6 @@ void iptunnel_xmit(struct sock *sk, struct rtable *rt, struct sk_buff *skb, int pkt_len = skb->len - skb_inner_network_offset(skb); struct net *net = dev_net(rt->dst.dev); struct net_device *dev = skb->dev; - int skb_iif = skb->skb_iif; struct iphdr *iph; int err; @@ -73,16 +72,6 @@ void iptunnel_xmit(struct sock *sk, struct rtable *rt, struct sk_buff *skb, skb_dst_set(skb, &rt->dst); memset(IPCB(skb), 0, sizeof(*IPCB(skb))); - if (skb_iif && !(df & htons(IP_DF))) { - /* Arrived from an ingress interface, got encapsulated, with - * fragmentation of encapulating frames allowed. - * If skb is gso, the resulting encapsulated network segments - * may exceed dst mtu. - * Allow IP Fragmentation of segments. - */ - IPCB(skb)->flags |= IPSKB_FRAG_SEGS; - } - /* Push down and install the IP header. */ skb_push(skb, sizeof(struct iphdr)); skb_reset_network_header(skb); diff --git a/net/ipv4/ipmr.c b/net/ipv4/ipmr.c index 5f006e13d..27089f5eb 100644 --- a/net/ipv4/ipmr.c +++ b/net/ipv4/ipmr.c @@ -1749,7 +1749,7 @@ static void ipmr_queue_xmit(struct net *net, struct mr_table *mrt, vif->dev->stats.tx_bytes += skb->len; } - IPCB(skb)->flags |= IPSKB_FORWARDED | IPSKB_FRAG_SEGS; + IPCB(skb)->flags |= IPSKB_FORWARDED; /* RFC1584 teaches, that DVMRP/PIM router must deliver packets locally * not only before forwarding, but after forwarding on all output diff --git a/net/ipv4/route.c b/net/ipv4/route.c index 62c3ed0b7..2f23ef1a8 100644 --- a/net/ipv4/route.c +++ b/net/ipv4/route.c @@ -753,7 +753,9 @@ static void __ip_do_redirect(struct rtable *rt, struct sk_buff *skb, struct flow goto reject_redirect; } - n = ipv4_neigh_lookup(&rt->dst, NULL, &new_gw); + n = __ipv4_neigh_lookup(rt->dst.dev, new_gw); + if (!n) + n = neigh_create(&arp_tbl, &new_gw, rt->dst.dev); if (!IS_ERR(n)) { if (!(n->nud_state & NUD_VALID)) { neigh_event_send(n, NULL); diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c index 2fa1a4a6d..ccf8390ec 100644 --- a/net/ipv4/tcp.c +++ b/net/ipv4/tcp.c @@ -1146,7 +1146,7 @@ restart: err = -EPIPE; if (sk->sk_err || (sk->sk_shutdown & SEND_SHUTDOWN)) - goto out_err; + goto do_error; sg = !!(sk->sk_route_caps & NETIF_F_SG); @@ -1220,7 +1220,7 @@ new_segment: if (!skb_can_coalesce(skb, i, pfrag->page, pfrag->offset)) { - if (i == sysctl_max_skb_frags || !sg) { + if (i >= sysctl_max_skb_frags || !sg) { tcp_mark_push(tp, skb); goto new_segment; } diff --git a/net/ipv4/tcp_dctcp.c b/net/ipv4/tcp_dctcp.c index 10d728b68..ab37c6775 100644 --- a/net/ipv4/tcp_dctcp.c +++ b/net/ipv4/tcp_dctcp.c @@ -56,6 +56,7 @@ struct dctcp { u32 next_seq; u32 ce_state; u32 delayed_ack_reserved; + u32 loss_cwnd; }; static unsigned int dctcp_shift_g __read_mostly = 4; /* g = 1/2^4 */ @@ -96,6 +97,7 @@ static void dctcp_init(struct sock *sk) ca->dctcp_alpha = min(dctcp_alpha_on_init, DCTCP_MAX_ALPHA); ca->delayed_ack_reserved = 0; + ca->loss_cwnd = 0; ca->ce_state = 0; dctcp_reset(tp, ca); @@ -111,9 +113,10 @@ static void dctcp_init(struct sock *sk) static u32 dctcp_ssthresh(struct sock *sk) { - const struct dctcp *ca = inet_csk_ca(sk); + struct dctcp *ca = inet_csk_ca(sk); struct tcp_sock *tp = tcp_sk(sk); + ca->loss_cwnd = tp->snd_cwnd; return max(tp->snd_cwnd - ((tp->snd_cwnd * ca->dctcp_alpha) >> 11U), 2U); } @@ -308,12 +311,20 @@ static size_t dctcp_get_info(struct sock *sk, u32 ext, int *attr, return 0; } +static u32 dctcp_cwnd_undo(struct sock *sk) +{ + const struct dctcp *ca = inet_csk_ca(sk); + + return max(tcp_sk(sk)->snd_cwnd, ca->loss_cwnd); +} + static struct tcp_congestion_ops dctcp __read_mostly = { .init = dctcp_init, .in_ack_event = dctcp_update_alpha, .cwnd_event = dctcp_cwnd_event, .ssthresh = dctcp_ssthresh, .cong_avoid = tcp_reno_cong_avoid, + .undo_cwnd = dctcp_cwnd_undo, .set_state = dctcp_state, .get_info = dctcp_get_info, .flags = TCP_CONG_NEEDS_ECN, diff --git a/net/ipv4/tcp_ipv4.c b/net/ipv4/tcp_ipv4.c index ba2ab6fcf..349ac2a8c 100644 --- a/net/ipv4/tcp_ipv4.c +++ b/net/ipv4/tcp_ipv4.c @@ -1564,6 +1564,21 @@ bool tcp_prequeue(struct sock *sk, struct sk_buff *skb) } EXPORT_SYMBOL(tcp_prequeue); +int tcp_filter(struct sock *sk, struct sk_buff *skb) +{ + struct tcphdr *th = (struct tcphdr *)skb->data; + unsigned int eaten = skb->len; + int err; + + err = sk_filter_trim_cap(sk, skb, th->doff * 4); + if (!err) { + eaten -= skb->len; + TCP_SKB_CB(skb)->end_seq -= eaten; + } + return err; +} +EXPORT_SYMBOL(tcp_filter); + /* * From tcp_input.c */ @@ -1675,8 +1690,10 @@ process: nf_reset(skb); - if (sk_filter(sk, skb)) + if (tcp_filter(sk, skb)) goto discard_and_relse; + th = (const struct tcphdr *)skb->data; + iph = ip_hdr(skb); skb->dev = NULL; diff --git a/net/ipv6/icmp.c b/net/ipv6/icmp.c index bd59c343d..7370ad2e6 100644 --- a/net/ipv6/icmp.c +++ b/net/ipv6/icmp.c @@ -448,7 +448,7 @@ static void icmp6_send(struct sk_buff *skb, u8 type, u8 code, __u32 info, if (__ipv6_addr_needs_scope_id(addr_type)) iif = skb->dev->ifindex; else - iif = l3mdev_master_ifindex(skb->dev); + iif = l3mdev_master_ifindex(skb_dst(skb)->dev); /* * Must not send error if the source does not uniquely diff --git a/net/ipv6/tcp_ipv6.c b/net/ipv6/tcp_ipv6.c index 40a58d7d1..162044030 100644 --- a/net/ipv6/tcp_ipv6.c +++ b/net/ipv6/tcp_ipv6.c @@ -1245,7 +1245,7 @@ static int tcp_v6_do_rcv(struct sock *sk, struct sk_buff *skb) if (skb->protocol == htons(ETH_P_IP)) return tcp_v4_do_rcv(sk, skb); - if (sk_filter(sk, skb)) + if (tcp_filter(sk, skb)) goto discard; /* @@ -1479,8 +1479,10 @@ process: if (tcp_v6_inbound_md5_hash(sk, skb)) goto discard_and_relse; - if (sk_filter(sk, skb)) + if (tcp_filter(sk, skb)) goto discard_and_relse; + th = (const struct tcphdr *)skb->data; + hdr = ipv6_hdr(skb); skb->dev = NULL; diff --git a/net/netfilter/nf_log.c b/net/netfilter/nf_log.c index aa5847a16..1df2c8dac 100644 --- a/net/netfilter/nf_log.c +++ b/net/netfilter/nf_log.c @@ -420,7 +420,7 @@ static int nf_log_proc_dostring(struct ctl_table *table, int write, char buf[NFLOGGER_NAME_LEN]; int r = 0; int tindex = (unsigned long)table->extra1; - struct net *net = current->nsproxy->net_ns; + struct net *net = table->extra2; if (write) { struct ctl_table tmp = *table; @@ -474,7 +474,6 @@ static int netfilter_log_sysctl_init(struct net *net) 3, "%d", i); nf_log_sysctl_table[i].procname = nf_log_sysctl_fnames[i]; - nf_log_sysctl_table[i].data = NULL; nf_log_sysctl_table[i].maxlen = NFLOGGER_NAME_LEN; nf_log_sysctl_table[i].mode = 0644; nf_log_sysctl_table[i].proc_handler = @@ -484,6 +483,9 @@ static int netfilter_log_sysctl_init(struct net *net) } } + for (i = NFPROTO_UNSPEC; i < NFPROTO_NUMPROTO; i++) + table[i].extra2 = net; + net->nf.nf_log_dir_header = register_net_sysctl(net, "net/netfilter/nf_log", table); diff --git a/net/sctp/socket.c b/net/sctp/socket.c index baccbf3c1..7b0e059bf 100644 --- a/net/sctp/socket.c +++ b/net/sctp/socket.c @@ -1214,9 +1214,12 @@ static int __sctp_connect(struct sock *sk, timeo = sock_sndtimeo(sk, f_flags & O_NONBLOCK); - err = sctp_wait_for_connect(asoc, &timeo); - if ((err == 0 || err == -EINPROGRESS) && assoc_id) + if (assoc_id) *assoc_id = asoc->assoc_id; + err = sctp_wait_for_connect(asoc, &timeo); + /* Note: the asoc may be freed after the return of + * sctp_wait_for_connect. + */ /* Don't free association on exit. */ asoc = NULL; @@ -4278,19 +4281,18 @@ static void sctp_shutdown(struct sock *sk, int how) { struct net *net = sock_net(sk); struct sctp_endpoint *ep; - struct sctp_association *asoc; if (!sctp_style(sk, TCP)) return; - if (how & SEND_SHUTDOWN) { + ep = sctp_sk(sk)->ep; + if (how & SEND_SHUTDOWN && !list_empty(&ep->asocs)) { + struct sctp_association *asoc; + sk->sk_state = SCTP_SS_CLOSING; - ep = sctp_sk(sk)->ep; - if (!list_empty(&ep->asocs)) { - asoc = list_entry(ep->asocs.next, - struct sctp_association, asocs); - sctp_primitive_SHUTDOWN(net, asoc, NULL); - } + asoc = list_entry(ep->asocs.next, + struct sctp_association, asocs); + sctp_primitive_SHUTDOWN(net, asoc, NULL); } } diff --git a/net/socket.c b/net/socket.c index a1bd16106..03bc2c289 100644 --- a/net/socket.c +++ b/net/socket.c @@ -2041,6 +2041,8 @@ int __sys_sendmmsg(int fd, struct mmsghdr __user *mmsg, unsigned int vlen, if (err) break; ++datagrams; + if (msg_data_left(&msg_sys)) + break; cond_resched(); } diff --git a/net/sunrpc/xprtrdma/frwr_ops.c b/net/sunrpc/xprtrdma/frwr_ops.c index 892b5e1d9..2761377dc 100644 --- a/net/sunrpc/xprtrdma/frwr_ops.c +++ b/net/sunrpc/xprtrdma/frwr_ops.c @@ -44,18 +44,20 @@ * being done. * * When the underlying transport disconnects, MRs are left in one of - * three states: + * four states: * * INVALID: The MR was not in use before the QP entered ERROR state. - * (Or, the LOCAL_INV WR has not completed or flushed yet). - * - * STALE: The MR was being registered or unregistered when the QP - * entered ERROR state, and the pending WR was flushed. * * VALID: The MR was registered before the QP entered ERROR state. * - * When frwr_op_map encounters STALE and VALID MRs, they are recovered - * with ib_dereg_mr and then are re-initialized. Beause MR recovery + * FLUSHED_FR: The MR was being registered when the QP entered ERROR + * state, and the pending WR was flushed. + * + * FLUSHED_LI: The MR was being invalidated when the QP entered ERROR + * state, and the pending WR was flushed. + * + * When frwr_op_map encounters FLUSHED and VALID MRs, they are recovered + * with ib_dereg_mr and then are re-initialized. Because MR recovery * allocates fresh resources, it is deferred to a workqueue, and the * recovered MRs are placed back on the rb_mws list when recovery is * complete. frwr_op_map allocates another MR for the current RPC while @@ -175,12 +177,15 @@ __frwr_reset_mr(struct rpcrdma_ia *ia, struct rpcrdma_mw *r) static void frwr_op_recover_mr(struct rpcrdma_mw *mw) { + enum rpcrdma_frmr_state state = mw->frmr.fr_state; struct rpcrdma_xprt *r_xprt = mw->mw_xprt; struct rpcrdma_ia *ia = &r_xprt->rx_ia; int rc; rc = __frwr_reset_mr(ia, mw); - ib_dma_unmap_sg(ia->ri_device, mw->mw_sg, mw->mw_nents, mw->mw_dir); + if (state != FRMR_FLUSHED_LI) + ib_dma_unmap_sg(ia->ri_device, + mw->mw_sg, mw->mw_nents, mw->mw_dir); if (rc) goto out_release; @@ -261,10 +266,8 @@ frwr_op_maxpages(struct rpcrdma_xprt *r_xprt) } static void -__frwr_sendcompletion_flush(struct ib_wc *wc, struct rpcrdma_frmr *frmr, - const char *wr) +__frwr_sendcompletion_flush(struct ib_wc *wc, const char *wr) { - frmr->fr_state = FRMR_IS_STALE; if (wc->status != IB_WC_WR_FLUSH_ERR) pr_err("rpcrdma: %s: %s (%u/0x%x)\n", wr, ib_wc_status_msg(wc->status), @@ -287,7 +290,8 @@ frwr_wc_fastreg(struct ib_cq *cq, struct ib_wc *wc) if (wc->status != IB_WC_SUCCESS) { cqe = wc->wr_cqe; frmr = container_of(cqe, struct rpcrdma_frmr, fr_cqe); - __frwr_sendcompletion_flush(wc, frmr, "fastreg"); + frmr->fr_state = FRMR_FLUSHED_FR; + __frwr_sendcompletion_flush(wc, "fastreg"); } } @@ -307,7 +311,8 @@ frwr_wc_localinv(struct ib_cq *cq, struct ib_wc *wc) if (wc->status != IB_WC_SUCCESS) { cqe = wc->wr_cqe; frmr = container_of(cqe, struct rpcrdma_frmr, fr_cqe); - __frwr_sendcompletion_flush(wc, frmr, "localinv"); + frmr->fr_state = FRMR_FLUSHED_LI; + __frwr_sendcompletion_flush(wc, "localinv"); } } @@ -327,9 +332,11 @@ frwr_wc_localinv_wake(struct ib_cq *cq, struct ib_wc *wc) /* WARNING: Only wr_cqe and status are reliable at this point */ cqe = wc->wr_cqe; frmr = container_of(cqe, struct rpcrdma_frmr, fr_cqe); - if (wc->status != IB_WC_SUCCESS) - __frwr_sendcompletion_flush(wc, frmr, "localinv"); - complete_all(&frmr->fr_linv_done); + if (wc->status != IB_WC_SUCCESS) { + frmr->fr_state = FRMR_FLUSHED_LI; + __frwr_sendcompletion_flush(wc, "localinv"); + } + complete(&frmr->fr_linv_done); } /* Post a REG_MR Work Request to register a memory region diff --git a/net/sunrpc/xprtrdma/svc_rdma_backchannel.c b/net/sunrpc/xprtrdma/svc_rdma_backchannel.c index a2a7519b0..cd0c55814 100644 --- a/net/sunrpc/xprtrdma/svc_rdma_backchannel.c +++ b/net/sunrpc/xprtrdma/svc_rdma_backchannel.c @@ -129,7 +129,7 @@ static int svc_rdma_bc_sendto(struct svcxprt_rdma *rdma, ret = -EIO; goto out_unmap; } - atomic_inc(&rdma->sc_dma_used); + svc_rdma_count_mappings(rdma, ctxt); memset(&send_wr, 0, sizeof(send_wr)); ctxt->cqe.done = svc_rdma_wc_send; diff --git a/net/sunrpc/xprtrdma/svc_rdma_recvfrom.c b/net/sunrpc/xprtrdma/svc_rdma_recvfrom.c index 2c25606f2..ad1df979b 100644 --- a/net/sunrpc/xprtrdma/svc_rdma_recvfrom.c +++ b/net/sunrpc/xprtrdma/svc_rdma_recvfrom.c @@ -159,7 +159,7 @@ int rdma_read_chunk_lcl(struct svcxprt_rdma *xprt, ctxt->sge[pno].addr); if (ret) goto err; - atomic_inc(&xprt->sc_dma_used); + svc_rdma_count_mappings(xprt, ctxt); ctxt->sge[pno].lkey = xprt->sc_pd->local_dma_lkey; ctxt->sge[pno].length = len; diff --git a/net/sunrpc/xprtrdma/svc_rdma_sendto.c b/net/sunrpc/xprtrdma/svc_rdma_sendto.c index 54d533300..3b95b19fc 100644 --- a/net/sunrpc/xprtrdma/svc_rdma_sendto.c +++ b/net/sunrpc/xprtrdma/svc_rdma_sendto.c @@ -280,7 +280,7 @@ static int send_write(struct svcxprt_rdma *xprt, struct svc_rqst *rqstp, if (ib_dma_mapping_error(xprt->sc_cm_id->device, sge[sge_no].addr)) goto err; - atomic_inc(&xprt->sc_dma_used); + svc_rdma_count_mappings(xprt, ctxt); sge[sge_no].lkey = xprt->sc_pd->local_dma_lkey; ctxt->count++; sge_off = 0; @@ -489,7 +489,7 @@ static int send_reply(struct svcxprt_rdma *rdma, ctxt->sge[0].length, DMA_TO_DEVICE); if (ib_dma_mapping_error(rdma->sc_cm_id->device, ctxt->sge[0].addr)) goto err; - atomic_inc(&rdma->sc_dma_used); + svc_rdma_count_mappings(rdma, ctxt); ctxt->direction = DMA_TO_DEVICE; @@ -505,7 +505,7 @@ static int send_reply(struct svcxprt_rdma *rdma, if (ib_dma_mapping_error(rdma->sc_cm_id->device, ctxt->sge[sge_no].addr)) goto err; - atomic_inc(&rdma->sc_dma_used); + svc_rdma_count_mappings(rdma, ctxt); ctxt->sge[sge_no].lkey = rdma->sc_pd->local_dma_lkey; ctxt->sge[sge_no].length = sge_bytes; } @@ -523,23 +523,9 @@ static int send_reply(struct svcxprt_rdma *rdma, ctxt->pages[page_no+1] = rqstp->rq_respages[page_no]; ctxt->count++; rqstp->rq_respages[page_no] = NULL; - /* - * If there are more pages than SGE, terminate SGE - * list so that svc_rdma_unmap_dma doesn't attempt to - * unmap garbage. - */ - if (page_no+1 >= sge_no) - ctxt->sge[page_no+1].length = 0; } rqstp->rq_next_page = rqstp->rq_respages + 1; - /* The loop above bumps sc_dma_used for each sge. The - * xdr_buf.tail gets a separate sge, but resides in the - * same page as xdr_buf.head. Don't count it twice. - */ - if (sge_no > ctxt->count) - atomic_dec(&rdma->sc_dma_used); - if (sge_no > rdma->sc_max_sge) { pr_err("svcrdma: Too many sges (%d)\n", sge_no); goto err; @@ -635,7 +621,7 @@ int svc_rdma_sendto(struct svc_rqst *rqstp) ret = send_reply(rdma, rqstp, res_page, rdma_resp, vec, inline_bytes); if (ret < 0) - goto err1; + goto err0; svc_rdma_put_req_map(rdma, vec); dprintk("svcrdma: send_reply returns %d\n", ret); @@ -692,7 +678,7 @@ void svc_rdma_send_error(struct svcxprt_rdma *xprt, struct rpcrdma_msg *rmsgp, svc_rdma_put_context(ctxt, 1); return; } - atomic_inc(&xprt->sc_dma_used); + svc_rdma_count_mappings(xprt, ctxt); /* Prepare SEND WR */ memset(&err_wr, 0, sizeof(err_wr)); diff --git a/net/sunrpc/xprtrdma/svc_rdma_transport.c b/net/sunrpc/xprtrdma/svc_rdma_transport.c index dd9440137..924271c9e 100644 --- a/net/sunrpc/xprtrdma/svc_rdma_transport.c +++ b/net/sunrpc/xprtrdma/svc_rdma_transport.c @@ -198,6 +198,7 @@ struct svc_rdma_op_ctxt *svc_rdma_get_context(struct svcxprt_rdma *xprt) out: ctxt->count = 0; + ctxt->mapped_sges = 0; ctxt->frmr = NULL; return ctxt; @@ -221,22 +222,27 @@ out_empty: void svc_rdma_unmap_dma(struct svc_rdma_op_ctxt *ctxt) { struct svcxprt_rdma *xprt = ctxt->xprt; - int i; - for (i = 0; i < ctxt->count && ctxt->sge[i].length; i++) { + struct ib_device *device = xprt->sc_cm_id->device; + u32 lkey = xprt->sc_pd->local_dma_lkey; + unsigned int i, count; + + for (count = 0, i = 0; i < ctxt->mapped_sges; i++) { /* * Unmap the DMA addr in the SGE if the lkey matches * the local_dma_lkey, otherwise, ignore it since it is * an FRMR lkey and will be unmapped later when the * last WR that uses it completes. */ - if (ctxt->sge[i].lkey == xprt->sc_pd->local_dma_lkey) { - atomic_dec(&xprt->sc_dma_used); - ib_dma_unmap_page(xprt->sc_cm_id->device, + if (ctxt->sge[i].lkey == lkey) { + count++; + ib_dma_unmap_page(device, ctxt->sge[i].addr, ctxt->sge[i].length, ctxt->direction); } } + ctxt->mapped_sges = 0; + atomic_sub(count, &xprt->sc_dma_used); } void svc_rdma_put_context(struct svc_rdma_op_ctxt *ctxt, int free_pages) @@ -600,7 +606,7 @@ int svc_rdma_post_recv(struct svcxprt_rdma *xprt, gfp_t flags) DMA_FROM_DEVICE); if (ib_dma_mapping_error(xprt->sc_cm_id->device, pa)) goto err_put_ctxt; - atomic_inc(&xprt->sc_dma_used); + svc_rdma_count_mappings(xprt, ctxt); ctxt->sge[sge_no].addr = pa; ctxt->sge[sge_no].length = PAGE_SIZE; ctxt->sge[sge_no].lkey = xprt->sc_pd->local_dma_lkey; diff --git a/net/sunrpc/xprtrdma/xprt_rdma.h b/net/sunrpc/xprtrdma/xprt_rdma.h index a71b0f589..edc03445b 100644 --- a/net/sunrpc/xprtrdma/xprt_rdma.h +++ b/net/sunrpc/xprtrdma/xprt_rdma.h @@ -207,7 +207,8 @@ struct rpcrdma_rep { enum rpcrdma_frmr_state { FRMR_IS_INVALID, /* ready to be used */ FRMR_IS_VALID, /* in use */ - FRMR_IS_STALE, /* failed completion */ + FRMR_FLUSHED_FR, /* flushed FASTREG WR */ + FRMR_FLUSHED_LI, /* flushed LOCALINV WR */ }; struct rpcrdma_frmr { |