diff options
Diffstat (limited to 'net/ipv6')
30 files changed, 961 insertions, 442 deletions
diff --git a/net/ipv6/Makefile b/net/ipv6/Makefile index 2e8c06108..0f3f19997 100644 --- a/net/ipv6/Makefile +++ b/net/ipv6/Makefile @@ -48,4 +48,5 @@ obj-$(subst m,y,$(CONFIG_IPV6)) += inet6_hashtables.o ifneq ($(CONFIG_IPV6),) obj-$(CONFIG_NET_UDP_TUNNEL) += ip6_udp_tunnel.o +obj-y += mcast_snoop.o endif diff --git a/net/ipv6/addrconf.c b/net/ipv6/addrconf.c index 37b70e82b..21c2c818d 100644 --- a/net/ipv6/addrconf.c +++ b/net/ipv6/addrconf.c @@ -2121,6 +2121,8 @@ static struct rt6_info *addrconf_get_prefix_route(const struct in6_addr *pfx, fn = fib6_locate(&table->tb6_root, pfx, plen, NULL, 0); if (!fn) goto out; + + noflags |= RTF_CACHE; for (rt = fn->leaf; rt; rt = rt->dst.rt6_next) { if (rt->dst.dev->ifindex != dev->ifindex) continue; diff --git a/net/ipv6/af_inet6.c b/net/ipv6/af_inet6.c index eef63b394..7de52b651 100644 --- a/net/ipv6/af_inet6.c +++ b/net/ipv6/af_inet6.c @@ -167,7 +167,7 @@ lookup_protocol: WARN_ON(!answer_prot->slab); err = -ENOBUFS; - sk = sk_alloc(net, PF_INET6, GFP_KERNEL, answer_prot); + sk = sk_alloc(net, PF_INET6, GFP_KERNEL, answer_prot, kern); if (!sk) goto out; @@ -362,7 +362,8 @@ int inet6_bind(struct socket *sock, struct sockaddr *uaddr, int addr_len) np->saddr = addr->sin6_addr; /* Make sure we are allowed to bind here. */ - if (sk->sk_prot->get_port(sk, snum)) { + if ((snum || !inet->bind_address_no_port) && + sk->sk_prot->get_port(sk, snum)) { inet_reset_saddr(sk); err = -EADDRINUSE; goto out; @@ -768,6 +769,7 @@ static int __net_init inet6_net_init(struct net *net) net->ipv6.sysctl.auto_flowlabels = 0; net->ipv6.sysctl.idgen_retries = 3; net->ipv6.sysctl.idgen_delay = 1 * HZ; + net->ipv6.sysctl.flowlabel_state_ranges = 1; atomic_set(&net->ipv6.fib6_sernum, 1); err = ipv6_init_mibs(net); diff --git a/net/ipv6/datagram.c b/net/ipv6/datagram.c index 62d908e64..b10a88986 100644 --- a/net/ipv6/datagram.c +++ b/net/ipv6/datagram.c @@ -40,7 +40,7 @@ static bool ipv6_mapped_addr_any(const struct in6_addr *a) return ipv6_addr_v4mapped(a) && (a->s6_addr32[3] == 0); } -int ip6_datagram_connect(struct sock *sk, struct sockaddr *uaddr, int addr_len) +static int __ip6_datagram_connect(struct sock *sk, struct sockaddr *uaddr, int addr_len) { struct sockaddr_in6 *usin = (struct sockaddr_in6 *) uaddr; struct inet_sock *inet = inet_sk(sk); @@ -56,7 +56,7 @@ int ip6_datagram_connect(struct sock *sk, struct sockaddr *uaddr, int addr_len) if (usin->sin6_family == AF_INET) { if (__ipv6_only_sock(sk)) return -EAFNOSUPPORT; - err = ip4_datagram_connect(sk, uaddr, addr_len); + err = __ip4_datagram_connect(sk, uaddr, addr_len); goto ipv4_connected; } @@ -98,9 +98,9 @@ int ip6_datagram_connect(struct sock *sk, struct sockaddr *uaddr, int addr_len) sin.sin_addr.s_addr = daddr->s6_addr32[3]; sin.sin_port = usin->sin6_port; - err = ip4_datagram_connect(sk, - (struct sockaddr *) &sin, - sizeof(sin)); + err = __ip4_datagram_connect(sk, + (struct sockaddr *) &sin, + sizeof(sin)); ipv4_connected: if (err) @@ -204,6 +204,16 @@ out: fl6_sock_release(flowlabel); return err; } + +int ip6_datagram_connect(struct sock *sk, struct sockaddr *uaddr, int addr_len) +{ + int res; + + lock_sock(sk); + res = __ip6_datagram_connect(sk, uaddr, addr_len); + release_sock(sk); + return res; +} EXPORT_SYMBOL_GPL(ip6_datagram_connect); int ip6_datagram_connect_v6_only(struct sock *sk, struct sockaddr *uaddr, diff --git a/net/ipv6/esp6.c b/net/ipv6/esp6.c index 7c07ce36a..060a60b2f 100644 --- a/net/ipv6/esp6.c +++ b/net/ipv6/esp6.c @@ -76,7 +76,7 @@ static void *esp_alloc_tmp(struct crypto_aead *aead, int nfrags, int seqihlen) len = ALIGN(len, crypto_tfm_ctx_alignment()); } - len += sizeof(struct aead_givcrypt_request) + crypto_aead_reqsize(aead); + len += sizeof(struct aead_request) + crypto_aead_reqsize(aead); len = ALIGN(len, __alignof__(struct scatterlist)); len += sizeof(struct scatterlist) * nfrags; @@ -96,17 +96,6 @@ static inline u8 *esp_tmp_iv(struct crypto_aead *aead, void *tmp, int seqhilen) crypto_aead_alignmask(aead) + 1) : tmp + seqhilen; } -static inline struct aead_givcrypt_request *esp_tmp_givreq( - struct crypto_aead *aead, u8 *iv) -{ - struct aead_givcrypt_request *req; - - req = (void *)PTR_ALIGN(iv + crypto_aead_ivsize(aead), - crypto_tfm_ctx_alignment()); - aead_givcrypt_set_tfm(req, aead); - return req; -} - static inline struct aead_request *esp_tmp_req(struct crypto_aead *aead, u8 *iv) { struct aead_request *req; @@ -125,14 +114,6 @@ static inline struct scatterlist *esp_req_sg(struct crypto_aead *aead, __alignof__(struct scatterlist)); } -static inline struct scatterlist *esp_givreq_sg( - struct crypto_aead *aead, struct aead_givcrypt_request *req) -{ - return (void *)ALIGN((unsigned long)(req + 1) + - crypto_aead_reqsize(aead), - __alignof__(struct scatterlist)); -} - static void esp_output_done(struct crypto_async_request *base, int err) { struct sk_buff *skb = base->data; @@ -141,32 +122,57 @@ static void esp_output_done(struct crypto_async_request *base, int err) xfrm_output_resume(skb, err); } +/* Move ESP header back into place. */ +static void esp_restore_header(struct sk_buff *skb, unsigned int offset) +{ + struct ip_esp_hdr *esph = (void *)(skb->data + offset); + void *tmp = ESP_SKB_CB(skb)->tmp; + __be32 *seqhi = esp_tmp_seqhi(tmp); + + esph->seq_no = esph->spi; + esph->spi = *seqhi; +} + +static void esp_output_restore_header(struct sk_buff *skb) +{ + esp_restore_header(skb, skb_transport_offset(skb) - sizeof(__be32)); +} + +static void esp_output_done_esn(struct crypto_async_request *base, int err) +{ + struct sk_buff *skb = base->data; + + esp_output_restore_header(skb); + esp_output_done(base, err); +} + static int esp6_output(struct xfrm_state *x, struct sk_buff *skb) { int err; struct ip_esp_hdr *esph; struct crypto_aead *aead; - struct aead_givcrypt_request *req; + struct aead_request *req; struct scatterlist *sg; - struct scatterlist *asg; struct sk_buff *trailer; void *tmp; int blksize; int clen; int alen; int plen; + int ivlen; int tfclen; int nfrags; int assoclen; - int sglists; int seqhilen; u8 *iv; u8 *tail; __be32 *seqhi; + __be64 seqno; /* skb is pure payload to encrypt */ aead = x->data; alen = crypto_aead_authsize(aead); + ivlen = crypto_aead_ivsize(aead); tfclen = 0; if (x->tfcpad) { @@ -187,16 +193,14 @@ static int esp6_output(struct xfrm_state *x, struct sk_buff *skb) nfrags = err; assoclen = sizeof(*esph); - sglists = 1; seqhilen = 0; if (x->props.flags & XFRM_STATE_ESN) { - sglists += 2; seqhilen += sizeof(__be32); assoclen += seqhilen; } - tmp = esp_alloc_tmp(aead, nfrags + sglists, seqhilen); + tmp = esp_alloc_tmp(aead, nfrags, seqhilen); if (!tmp) { err = -ENOMEM; goto error; @@ -204,9 +208,8 @@ static int esp6_output(struct xfrm_state *x, struct sk_buff *skb) seqhi = esp_tmp_seqhi(tmp); iv = esp_tmp_iv(aead, tmp, seqhilen); - req = esp_tmp_givreq(aead, iv); - asg = esp_givreq_sg(aead, req); - sg = asg + sglists; + req = esp_tmp_req(aead, iv); + sg = esp_req_sg(aead, req); /* Fill padding... */ tail = skb_tail_pointer(trailer); @@ -227,37 +230,53 @@ static int esp6_output(struct xfrm_state *x, struct sk_buff *skb) esph = ip_esp_hdr(skb); *skb_mac_header(skb) = IPPROTO_ESP; - esph->spi = x->id.spi; esph->seq_no = htonl(XFRM_SKB_CB(skb)->seq.output.low); + aead_request_set_callback(req, 0, esp_output_done, skb); + + /* For ESN we move the header forward by 4 bytes to + * accomodate the high bits. We will move it back after + * encryption. + */ + if ((x->props.flags & XFRM_STATE_ESN)) { + esph = (void *)(skb_transport_header(skb) - sizeof(__be32)); + *seqhi = esph->spi; + esph->seq_no = htonl(XFRM_SKB_CB(skb)->seq.output.hi); + aead_request_set_callback(req, 0, esp_output_done_esn, skb); + } + + esph->spi = x->id.spi; + sg_init_table(sg, nfrags); skb_to_sgvec(skb, sg, - esph->enc_data + crypto_aead_ivsize(aead) - skb->data, - clen + alen); + (unsigned char *)esph - skb->data, + assoclen + ivlen + clen + alen); - if ((x->props.flags & XFRM_STATE_ESN)) { - sg_init_table(asg, 3); - sg_set_buf(asg, &esph->spi, sizeof(__be32)); - *seqhi = htonl(XFRM_SKB_CB(skb)->seq.output.hi); - sg_set_buf(asg + 1, seqhi, seqhilen); - sg_set_buf(asg + 2, &esph->seq_no, sizeof(__be32)); - } else - sg_init_one(asg, esph, sizeof(*esph)); - - aead_givcrypt_set_callback(req, 0, esp_output_done, skb); - aead_givcrypt_set_crypt(req, sg, sg, clen, iv); - aead_givcrypt_set_assoc(req, asg, assoclen); - aead_givcrypt_set_giv(req, esph->enc_data, - XFRM_SKB_CB(skb)->seq.output.low + - ((u64)XFRM_SKB_CB(skb)->seq.output.hi << 32)); + aead_request_set_crypt(req, sg, sg, ivlen + clen, iv); + aead_request_set_ad(req, assoclen); + + seqno = cpu_to_be64(XFRM_SKB_CB(skb)->seq.output.low + + ((u64)XFRM_SKB_CB(skb)->seq.output.hi << 32)); + + memset(iv, 0, ivlen); + memcpy(iv + ivlen - min(ivlen, 8), (u8 *)&seqno + 8 - min(ivlen, 8), + min(ivlen, 8)); ESP_SKB_CB(skb)->tmp = tmp; - err = crypto_aead_givencrypt(req); - if (err == -EINPROGRESS) + err = crypto_aead_encrypt(req); + + switch (err) { + case -EINPROGRESS: goto error; - if (err == -EBUSY) + case -EBUSY: err = NET_XMIT_DROP; + break; + + case 0: + if ((x->props.flags & XFRM_STATE_ESN)) + esp_output_restore_header(skb); + } kfree(tmp); @@ -318,25 +337,38 @@ static void esp_input_done(struct crypto_async_request *base, int err) xfrm_input_resume(skb, esp_input_done2(skb, err)); } +static void esp_input_restore_header(struct sk_buff *skb) +{ + esp_restore_header(skb, 0); + __skb_pull(skb, 4); +} + +static void esp_input_done_esn(struct crypto_async_request *base, int err) +{ + struct sk_buff *skb = base->data; + + esp_input_restore_header(skb); + esp_input_done(base, err); +} + static int esp6_input(struct xfrm_state *x, struct sk_buff *skb) { struct ip_esp_hdr *esph; struct crypto_aead *aead = x->data; struct aead_request *req; struct sk_buff *trailer; - int elen = skb->len - sizeof(*esph) - crypto_aead_ivsize(aead); + int ivlen = crypto_aead_ivsize(aead); + int elen = skb->len - sizeof(*esph) - ivlen; int nfrags; int assoclen; - int sglists; int seqhilen; int ret = 0; void *tmp; __be32 *seqhi; u8 *iv; struct scatterlist *sg; - struct scatterlist *asg; - if (!pskb_may_pull(skb, sizeof(*esph) + crypto_aead_ivsize(aead))) { + if (!pskb_may_pull(skb, sizeof(*esph) + ivlen)) { ret = -EINVAL; goto out; } @@ -355,16 +387,14 @@ static int esp6_input(struct xfrm_state *x, struct sk_buff *skb) ret = -ENOMEM; assoclen = sizeof(*esph); - sglists = 1; seqhilen = 0; if (x->props.flags & XFRM_STATE_ESN) { - sglists += 2; seqhilen += sizeof(__be32); assoclen += seqhilen; } - tmp = esp_alloc_tmp(aead, nfrags + sglists, seqhilen); + tmp = esp_alloc_tmp(aead, nfrags, seqhilen); if (!tmp) goto out; @@ -372,36 +402,39 @@ static int esp6_input(struct xfrm_state *x, struct sk_buff *skb) seqhi = esp_tmp_seqhi(tmp); iv = esp_tmp_iv(aead, tmp, seqhilen); req = esp_tmp_req(aead, iv); - asg = esp_req_sg(aead, req); - sg = asg + sglists; + sg = esp_req_sg(aead, req); skb->ip_summed = CHECKSUM_NONE; esph = (struct ip_esp_hdr *)skb->data; - /* Get ivec. This can be wrong, check against another impls. */ - iv = esph->enc_data; - - sg_init_table(sg, nfrags); - skb_to_sgvec(skb, sg, sizeof(*esph) + crypto_aead_ivsize(aead), elen); + aead_request_set_callback(req, 0, esp_input_done, skb); + /* For ESN we move the header forward by 4 bytes to + * accomodate the high bits. We will move it back after + * decryption. + */ if ((x->props.flags & XFRM_STATE_ESN)) { - sg_init_table(asg, 3); - sg_set_buf(asg, &esph->spi, sizeof(__be32)); - *seqhi = XFRM_SKB_CB(skb)->seq.input.hi; - sg_set_buf(asg + 1, seqhi, seqhilen); - sg_set_buf(asg + 2, &esph->seq_no, sizeof(__be32)); - } else - sg_init_one(asg, esph, sizeof(*esph)); + esph = (void *)skb_push(skb, 4); + *seqhi = esph->spi; + esph->spi = esph->seq_no; + esph->seq_no = htonl(XFRM_SKB_CB(skb)->seq.input.hi); + aead_request_set_callback(req, 0, esp_input_done_esn, skb); + } - aead_request_set_callback(req, 0, esp_input_done, skb); - aead_request_set_crypt(req, sg, sg, elen, iv); - aead_request_set_assoc(req, asg, assoclen); + sg_init_table(sg, nfrags); + skb_to_sgvec(skb, sg, 0, skb->len); + + aead_request_set_crypt(req, sg, sg, elen + ivlen, iv); + aead_request_set_ad(req, assoclen); ret = crypto_aead_decrypt(req); if (ret == -EINPROGRESS) goto out; + if ((x->props.flags & XFRM_STATE_ESN)) + esp_input_restore_header(skb); + ret = esp_input_done2(skb, ret); out: @@ -461,10 +494,16 @@ static void esp6_destroy(struct xfrm_state *x) static int esp_init_aead(struct xfrm_state *x) { + char aead_name[CRYPTO_MAX_ALG_NAME]; struct crypto_aead *aead; int err; - aead = crypto_alloc_aead(x->aead->alg_name, 0, 0); + err = -ENAMETOOLONG; + if (snprintf(aead_name, CRYPTO_MAX_ALG_NAME, "%s(%s)", + x->geniv, x->aead->alg_name) >= CRYPTO_MAX_ALG_NAME) + goto error; + + aead = crypto_alloc_aead(aead_name, 0, 0); err = PTR_ERR(aead); if (IS_ERR(aead)) goto error; @@ -503,15 +542,19 @@ static int esp_init_authenc(struct xfrm_state *x) if ((x->props.flags & XFRM_STATE_ESN)) { if (snprintf(authenc_name, CRYPTO_MAX_ALG_NAME, - "authencesn(%s,%s)", + "%s%sauthencesn(%s,%s)%s", + x->geniv ?: "", x->geniv ? "(" : "", x->aalg ? x->aalg->alg_name : "digest_null", - x->ealg->alg_name) >= CRYPTO_MAX_ALG_NAME) + x->ealg->alg_name, + x->geniv ? ")" : "") >= CRYPTO_MAX_ALG_NAME) goto error; } else { if (snprintf(authenc_name, CRYPTO_MAX_ALG_NAME, - "authenc(%s,%s)", + "%s%sauthenc(%s,%s)%s", + x->geniv ?: "", x->geniv ? "(" : "", x->aalg ? x->aalg->alg_name : "digest_null", - x->ealg->alg_name) >= CRYPTO_MAX_ALG_NAME) + x->ealg->alg_name, + x->geniv ? ")" : "") >= CRYPTO_MAX_ALG_NAME) goto error; } diff --git a/net/ipv6/icmp.c b/net/ipv6/icmp.c index 2c2b5d51f..713d7434c 100644 --- a/net/ipv6/icmp.c +++ b/net/ipv6/icmp.c @@ -207,7 +207,7 @@ static bool icmpv6_xrlim_allow(struct sock *sk, u8 type, struct inet_peer *peer; peer = inet_getpeer_v6(net->ipv6.peers, - &rt->rt6i_dst.addr, 1); + &fl6->daddr, 1); res = inet_peer_xrlim_allow(peer, tmo); if (peer) inet_putpeer(peer); @@ -337,7 +337,7 @@ static struct dst_entry *icmpv6_route_lookup(struct net *net, * We won't send icmp if the destination is known * anycast. */ - if (((struct rt6_info *)dst)->rt6i_flags & RTF_ANYCAST) { + if (ipv6_anycast_destination(dst, &fl6->daddr)) { net_dbg_ratelimited("icmp6_send: acast source\n"); dst_release(dst); return ERR_PTR(-EINVAL); @@ -564,7 +564,7 @@ static void icmpv6_echo_reply(struct sk_buff *skb) if (!ipv6_unicast_destination(skb) && !(net->ipv6.sysctl.anycast_src_echo_reply && - ipv6_anycast_destination(skb))) + ipv6_anycast_destination(skb_dst(skb), saddr))) saddr = NULL; memcpy(&tmp_hdr, icmph, sizeof(tmp_hdr)); diff --git a/net/ipv6/inet6_hashtables.c b/net/ipv6/inet6_hashtables.c index 871641bc1..b4fd96de9 100644 --- a/net/ipv6/inet6_hashtables.c +++ b/net/ipv6/inet6_hashtables.c @@ -257,7 +257,7 @@ not_unique: return -EADDRNOTAVAIL; } -static inline u32 inet6_sk_port_offset(const struct sock *sk) +static u32 inet6_sk_port_offset(const struct sock *sk) { const struct inet_sock *inet = inet_sk(sk); @@ -269,7 +269,11 @@ static inline u32 inet6_sk_port_offset(const struct sock *sk) int inet6_hash_connect(struct inet_timewait_death_row *death_row, struct sock *sk) { - return __inet_hash_connect(death_row, sk, inet6_sk_port_offset(sk), + u32 port_offset = 0; + + if (!inet_sk(sk)->inet_num) + port_offset = inet6_sk_port_offset(sk); + return __inet_hash_connect(death_row, sk, port_offset, __inet6_check_established); } EXPORT_SYMBOL_GPL(inet6_hash_connect); diff --git a/net/ipv6/ip6_fib.c b/net/ipv6/ip6_fib.c index bde57b113..548c6237b 100644 --- a/net/ipv6/ip6_fib.c +++ b/net/ipv6/ip6_fib.c @@ -154,10 +154,34 @@ static void node_free(struct fib6_node *fn) kmem_cache_free(fib6_node_kmem, fn); } +static void rt6_free_pcpu(struct rt6_info *non_pcpu_rt) +{ + int cpu; + + if (!non_pcpu_rt->rt6i_pcpu) + return; + + for_each_possible_cpu(cpu) { + struct rt6_info **ppcpu_rt; + struct rt6_info *pcpu_rt; + + ppcpu_rt = per_cpu_ptr(non_pcpu_rt->rt6i_pcpu, cpu); + pcpu_rt = *ppcpu_rt; + if (pcpu_rt) { + dst_free(&pcpu_rt->dst); + *ppcpu_rt = NULL; + } + } + + non_pcpu_rt->rt6i_pcpu = NULL; +} + static void rt6_release(struct rt6_info *rt) { - if (atomic_dec_and_test(&rt->rt6i_ref)) + if (atomic_dec_and_test(&rt->rt6i_ref)) { + rt6_free_pcpu(rt); dst_free(&rt->dst); + } } static void fib6_link_table(struct net *net, struct fib6_table *tb) @@ -738,6 +762,7 @@ static int fib6_add_rt2node(struct fib6_node *fn, struct rt6_info *rt, rt6_clean_expires(iter); else rt6_set_expires(iter, rt->dst.expires); + iter->rt6i_pmtu = rt->rt6i_pmtu; return -EEXIST; } /* If we have the same destination and the same metric, diff --git a/net/ipv6/ip6_flowlabel.c b/net/ipv6/ip6_flowlabel.c index d49112501..1f9ebe3cb 100644 --- a/net/ipv6/ip6_flowlabel.c +++ b/net/ipv6/ip6_flowlabel.c @@ -595,6 +595,10 @@ int ipv6_flowlabel_opt(struct sock *sk, char __user *optval, int optlen) if (freq.flr_label & ~IPV6_FLOWLABEL_MASK) return -EINVAL; + if (net->ipv6.sysctl.flowlabel_state_ranges && + (freq.flr_label & IPV6_FLOWLABEL_STATELESS_FLAG)) + return -ERANGE; + fl = fl_create(net, sk, &freq, optval, optlen, &err); if (!fl) return err; diff --git a/net/ipv6/ip6_gre.c b/net/ipv6/ip6_gre.c index a38d3ac0f..69f4f689f 100644 --- a/net/ipv6/ip6_gre.c +++ b/net/ipv6/ip6_gre.c @@ -361,6 +361,7 @@ static void ip6gre_tunnel_uninit(struct net_device *dev) struct ip6gre_net *ign = net_generic(t->net, ip6gre_net_id); ip6gre_tunnel_unlink(ign, t); + ip6_tnl_dst_reset(t); dev_put(dev); } diff --git a/net/ipv6/ip6_input.c b/net/ipv6/ip6_input.c index f2e464eba..57990c929 100644 --- a/net/ipv6/ip6_input.c +++ b/net/ipv6/ip6_input.c @@ -331,10 +331,10 @@ int ip6_mc_input(struct sk_buff *skb) if (offset < 0) goto out; - if (!ipv6_is_mld(skb, nexthdr, offset)) - goto out; + if (ipv6_is_mld(skb, nexthdr, offset)) + deliver = true; - deliver = true; + goto out; } /* unknown RA - process it normally */ } diff --git a/net/ipv6/ip6_offload.c b/net/ipv6/ip6_offload.c index e893cd186..08b62047c 100644 --- a/net/ipv6/ip6_offload.c +++ b/net/ipv6/ip6_offload.c @@ -292,8 +292,6 @@ static struct packet_offload ipv6_packet_offload __read_mostly = { static const struct net_offload sit_offload = { .callbacks = { .gso_segment = ipv6_gso_segment, - .gro_receive = ipv6_gro_receive, - .gro_complete = ipv6_gro_complete, }, }; diff --git a/net/ipv6/ip6_output.c b/net/ipv6/ip6_output.c index bc09cb97b..d5f771666 100644 --- a/net/ipv6/ip6_output.c +++ b/net/ipv6/ip6_output.c @@ -105,7 +105,7 @@ static int ip6_finish_output2(struct sock *sk, struct sk_buff *skb) } rcu_read_lock_bh(); - nexthop = rt6_nexthop((struct rt6_info *)dst); + nexthop = rt6_nexthop((struct rt6_info *)dst, &ipv6_hdr(skb)->daddr); neigh = __ipv6_neigh_lookup_noref(dst->dev, nexthop); if (unlikely(!neigh)) neigh = __neigh_create(&nd_tbl, nexthop, dst->dev, false); @@ -459,7 +459,7 @@ int ip6_forward(struct sk_buff *skb) else target = &hdr->daddr; - peer = inet_getpeer_v6(net->ipv6.peers, &rt->rt6i_dst.addr, 1); + peer = inet_getpeer_v6(net->ipv6.peers, &hdr->daddr, 1); /* Limit redirects both by destination (here) and by source (inside ndisc_send_redirect) @@ -551,7 +551,7 @@ int ip6_fragment(struct sock *sk, struct sk_buff *skb, struct frag_hdr *fh; unsigned int mtu, hlen, left, len; int hroom, troom; - __be32 frag_id = 0; + __be32 frag_id; int ptr, offset = 0, err = 0; u8 *prevhdr, nexthdr = 0; struct net *net = dev_net(skb_dst(skb)->dev); @@ -564,18 +564,17 @@ int ip6_fragment(struct sock *sk, struct sk_buff *skb, /* We must not fragment if the socket is set to force MTU discovery * or if the skb it not generated by a local socket. */ - if (unlikely(!skb->ignore_df && skb->len > mtu) || - (IP6CB(skb)->frag_max_size && - IP6CB(skb)->frag_max_size > mtu)) { - if (skb->sk && dst_allfrag(skb_dst(skb))) - sk_nocaps_add(skb->sk, NETIF_F_GSO_MASK); + if (unlikely(!skb->ignore_df && skb->len > mtu)) + goto fail_toobig; - skb->dev = skb_dst(skb)->dev; - icmpv6_send(skb, ICMPV6_PKT_TOOBIG, 0, mtu); - IP6_INC_STATS(net, ip6_dst_idev(skb_dst(skb)), - IPSTATS_MIB_FRAGFAILS); - kfree_skb(skb); - return -EMSGSIZE; + if (IP6CB(skb)->frag_max_size) { + if (IP6CB(skb)->frag_max_size > mtu) + goto fail_toobig; + + /* don't send fragments larger than what we received */ + mtu = IP6CB(skb)->frag_max_size; + if (mtu < IPV6_MIN_MTU) + mtu = IPV6_MIN_MTU; } if (np && np->frag_size < mtu) { @@ -584,6 +583,9 @@ int ip6_fragment(struct sock *sk, struct sk_buff *skb, } mtu -= hlen + sizeof(struct frag_hdr); + frag_id = ipv6_select_ident(net, &ipv6_hdr(skb)->daddr, + &ipv6_hdr(skb)->saddr); + if (skb_has_frag_list(skb)) { int first_len = skb_pagelen(skb); struct sk_buff *frag2; @@ -632,11 +634,10 @@ int ip6_fragment(struct sock *sk, struct sk_buff *skb, skb_reset_network_header(skb); memcpy(skb_network_header(skb), tmp_hdr, hlen); - ipv6_select_ident(net, fh, rt); fh->nexthdr = nexthdr; fh->reserved = 0; fh->frag_off = htons(IP6_MF); - frag_id = fh->identification; + fh->identification = frag_id; first_len = skb_pagelen(skb); skb->data_len = first_len - skb_headlen(skb); @@ -778,11 +779,7 @@ slow_path: */ fh->nexthdr = nexthdr; fh->reserved = 0; - if (!frag_id) { - ipv6_select_ident(net, fh, rt); - frag_id = fh->identification; - } else - fh->identification = frag_id; + fh->identification = frag_id; /* * Copy a block of the IP datagram. @@ -815,6 +812,14 @@ slow_path: consume_skb(skb); return err; +fail_toobig: + if (skb->sk && dst_allfrag(skb_dst(skb))) + sk_nocaps_add(skb->sk, NETIF_F_GSO_MASK); + + skb->dev = skb_dst(skb)->dev; + icmpv6_send(skb, ICMPV6_PKT_TOOBIG, 0, mtu); + err = -EMSGSIZE; + fail: IP6_INC_STATS(net, ip6_dst_idev(skb_dst(skb)), IPSTATS_MIB_FRAGFAILS); @@ -936,7 +941,8 @@ static int ip6_dst_lookup_tail(struct sock *sk, */ rt = (struct rt6_info *) *dst; rcu_read_lock_bh(); - n = __ipv6_neigh_lookup_noref(rt->dst.dev, rt6_nexthop(rt)); + n = __ipv6_neigh_lookup_noref(rt->dst.dev, + rt6_nexthop(rt, &fl6->daddr)); err = n && !(n->nud_state & NUD_VALID) ? -EINVAL : 0; rcu_read_unlock_bh(); @@ -1060,11 +1066,10 @@ static inline int ip6_ufo_append_data(struct sock *sk, int odd, struct sk_buff *skb), void *from, int length, int hh_len, int fragheaderlen, int transhdrlen, int mtu, unsigned int flags, - struct rt6_info *rt) + const struct flowi6 *fl6) { struct sk_buff *skb; - struct frag_hdr fhdr; int err; /* There is support for UDP large send offload by network @@ -1106,8 +1111,9 @@ static inline int ip6_ufo_append_data(struct sock *sk, skb_shinfo(skb)->gso_size = (mtu - fragheaderlen - sizeof(struct frag_hdr)) & ~7; skb_shinfo(skb)->gso_type = SKB_GSO_UDP; - ipv6_select_ident(sock_net(sk), &fhdr, rt); - skb_shinfo(skb)->ip6_frag_id = fhdr.identification; + skb_shinfo(skb)->ip6_frag_id = ipv6_select_ident(sock_net(sk), + &fl6->daddr, + &fl6->saddr); append: return skb_append_datato_frags(sk, skb, getfrag, from, @@ -1332,7 +1338,7 @@ emsgsize: (sk->sk_type == SOCK_DGRAM)) { err = ip6_ufo_append_data(sk, queue, getfrag, from, length, hh_len, fragheaderlen, - transhdrlen, mtu, flags, rt); + transhdrlen, mtu, flags, fl6); if (err) goto error; return 0; diff --git a/net/ipv6/ip6_tunnel.c b/net/ipv6/ip6_tunnel.c index 5cafd92c2..2e67b6601 100644 --- a/net/ipv6/ip6_tunnel.c +++ b/net/ipv6/ip6_tunnel.c @@ -151,7 +151,7 @@ EXPORT_SYMBOL_GPL(ip6_tnl_dst_reset); void ip6_tnl_dst_store(struct ip6_tnl *t, struct dst_entry *dst) { struct rt6_info *rt = (struct rt6_info *) dst; - t->dst_cookie = rt->rt6i_node ? rt->rt6i_node->fn_sernum : 0; + t->dst_cookie = rt6_get_cookie(rt); dst_release(t->dst_cache); t->dst_cache = dst; } diff --git a/net/ipv6/ip6_udp_tunnel.c b/net/ipv6/ip6_udp_tunnel.c index bba8903e8..e1a1136bd 100644 --- a/net/ipv6/ip6_udp_tunnel.c +++ b/net/ipv6/ip6_udp_tunnel.c @@ -19,12 +19,10 @@ int udp_sock_create6(struct net *net, struct udp_port_cfg *cfg, int err; struct socket *sock = NULL; - err = sock_create_kern(AF_INET6, SOCK_DGRAM, 0, &sock); + err = sock_create_kern(net, AF_INET6, SOCK_DGRAM, 0, &sock); if (err < 0) goto error; - sk_change_net(sock->sk, net); - udp6_addr.sin6_family = AF_INET6; memcpy(&udp6_addr.sin6_addr, &cfg->local_ip6, sizeof(udp6_addr.sin6_addr)); @@ -55,7 +53,7 @@ int udp_sock_create6(struct net *net, struct udp_port_cfg *cfg, error: if (sock) { kernel_sock_shutdown(sock, SHUT_RDWR); - sk_release_kernel(sock->sk); + sock_release(sock); } *sockp = NULL; return err; diff --git a/net/ipv6/mcast_snoop.c b/net/ipv6/mcast_snoop.c new file mode 100644 index 000000000..9405b04ee --- /dev/null +++ b/net/ipv6/mcast_snoop.c @@ -0,0 +1,216 @@ +/* Copyright (C) 2010: YOSHIFUJI Hideaki <yoshfuji@linux-ipv6.org> + * Copyright (C) 2015: Linus Lüssing <linus.luessing@c0d3.blue> + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of version 2 of the GNU General Public + * License as published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, see <http://www.gnu.org/licenses/>. + * + * + * Based on the MLD support added to br_multicast.c by YOSHIFUJI Hideaki. + */ + +#include <linux/skbuff.h> +#include <net/ipv6.h> +#include <net/mld.h> +#include <net/addrconf.h> +#include <net/ip6_checksum.h> + +static int ipv6_mc_check_ip6hdr(struct sk_buff *skb) +{ + const struct ipv6hdr *ip6h; + unsigned int len; + unsigned int offset = skb_network_offset(skb) + sizeof(*ip6h); + + if (!pskb_may_pull(skb, offset)) + return -EINVAL; + + ip6h = ipv6_hdr(skb); + + if (ip6h->version != 6) + return -EINVAL; + + len = offset + ntohs(ip6h->payload_len); + if (skb->len < len || len <= offset) + return -EINVAL; + + return 0; +} + +static int ipv6_mc_check_exthdrs(struct sk_buff *skb) +{ + const struct ipv6hdr *ip6h; + int offset; + u8 nexthdr; + __be16 frag_off; + + ip6h = ipv6_hdr(skb); + + if (ip6h->nexthdr != IPPROTO_HOPOPTS) + return -ENOMSG; + + nexthdr = ip6h->nexthdr; + offset = skb_network_offset(skb) + sizeof(*ip6h); + offset = ipv6_skip_exthdr(skb, offset, &nexthdr, &frag_off); + + if (offset < 0) + return -EINVAL; + + if (nexthdr != IPPROTO_ICMPV6) + return -ENOMSG; + + skb_set_transport_header(skb, offset); + + return 0; +} + +static int ipv6_mc_check_mld_reportv2(struct sk_buff *skb) +{ + unsigned int len = skb_transport_offset(skb); + + len += sizeof(struct mld2_report); + + return pskb_may_pull(skb, len) ? 0 : -EINVAL; +} + +static int ipv6_mc_check_mld_query(struct sk_buff *skb) +{ + struct mld_msg *mld; + unsigned int len = skb_transport_offset(skb); + + /* RFC2710+RFC3810 (MLDv1+MLDv2) require link-local source addresses */ + if (!(ipv6_addr_type(&ipv6_hdr(skb)->saddr) & IPV6_ADDR_LINKLOCAL)) + return -EINVAL; + + len += sizeof(struct mld_msg); + if (skb->len < len) + return -EINVAL; + + /* MLDv1? */ + if (skb->len != len) { + /* or MLDv2? */ + len += sizeof(struct mld2_query) - sizeof(struct mld_msg); + if (skb->len < len || !pskb_may_pull(skb, len)) + return -EINVAL; + } + + mld = (struct mld_msg *)skb_transport_header(skb); + + /* RFC2710+RFC3810 (MLDv1+MLDv2) require the multicast link layer + * all-nodes destination address (ff02::1) for general queries + */ + if (ipv6_addr_any(&mld->mld_mca) && + !ipv6_addr_is_ll_all_nodes(&ipv6_hdr(skb)->daddr)) + return -EINVAL; + + return 0; +} + +static int ipv6_mc_check_mld_msg(struct sk_buff *skb) +{ + struct mld_msg *mld = (struct mld_msg *)skb_transport_header(skb); + + switch (mld->mld_type) { + case ICMPV6_MGM_REDUCTION: + case ICMPV6_MGM_REPORT: + /* fall through */ + return 0; + case ICMPV6_MLD2_REPORT: + return ipv6_mc_check_mld_reportv2(skb); + case ICMPV6_MGM_QUERY: + return ipv6_mc_check_mld_query(skb); + default: + return -ENOMSG; + } +} + +static inline __sum16 ipv6_mc_validate_checksum(struct sk_buff *skb) +{ + return skb_checksum_validate(skb, IPPROTO_ICMPV6, ip6_compute_pseudo); +} + +static int __ipv6_mc_check_mld(struct sk_buff *skb, + struct sk_buff **skb_trimmed) + +{ + struct sk_buff *skb_chk = NULL; + unsigned int transport_len; + unsigned int len = skb_transport_offset(skb) + sizeof(struct mld_msg); + int ret = -EINVAL; + + transport_len = ntohs(ipv6_hdr(skb)->payload_len); + transport_len -= skb_transport_offset(skb) - sizeof(struct ipv6hdr); + + skb_chk = skb_checksum_trimmed(skb, transport_len, + ipv6_mc_validate_checksum); + if (!skb_chk) + goto err; + + if (!pskb_may_pull(skb_chk, len)) + goto err; + + ret = ipv6_mc_check_mld_msg(skb_chk); + if (ret) + goto err; + + if (skb_trimmed) + *skb_trimmed = skb_chk; + /* free now unneeded clone */ + else if (skb_chk != skb) + kfree_skb(skb_chk); + + ret = 0; + +err: + if (ret && skb_chk && skb_chk != skb) + kfree_skb(skb_chk); + + return ret; +} + +/** + * ipv6_mc_check_mld - checks whether this is a sane MLD packet + * @skb: the skb to validate + * @skb_trimmed: to store an skb pointer trimmed to IPv6 packet tail (optional) + * + * Checks whether an IPv6 packet is a valid MLD packet. If so sets + * skb transport header accordingly and returns zero. + * + * -EINVAL: A broken packet was detected, i.e. it violates some internet + * standard + * -ENOMSG: IP header validation succeeded but it is not an MLD packet. + * -ENOMEM: A memory allocation failure happened. + * + * Optionally, an skb pointer might be provided via skb_trimmed (or set it + * to NULL): After parsing an MLD packet successfully it will point to + * an skb which has its tail aligned to the IP packet end. This might + * either be the originally provided skb or a trimmed, cloned version if + * the skb frame had data beyond the IP packet. A cloned skb allows us + * to leave the original skb and its full frame unchanged (which might be + * desirable for layer 2 frame jugglers). + * + * Caller needs to set the skb network header and free any returned skb if it + * differs from the provided skb. + */ +int ipv6_mc_check_mld(struct sk_buff *skb, struct sk_buff **skb_trimmed) +{ + int ret; + + ret = ipv6_mc_check_ip6hdr(skb); + if (ret < 0) + return ret; + + ret = ipv6_mc_check_exthdrs(skb); + if (ret < 0) + return ret; + + return __ipv6_mc_check_mld(skb, skb_trimmed); +} +EXPORT_SYMBOL(ipv6_mc_check_mld); diff --git a/net/ipv6/ndisc.c b/net/ipv6/ndisc.c index 96f153c08..c53331cfe 100644 --- a/net/ipv6/ndisc.c +++ b/net/ipv6/ndisc.c @@ -1506,7 +1506,7 @@ void ndisc_send_redirect(struct sk_buff *skb, const struct in6_addr *target) "Redirect: destination is not a neighbour\n"); goto release; } - peer = inet_getpeer_v6(net->ipv6.peers, &rt->rt6i_dst.addr, 1); + peer = inet_getpeer_v6(net->ipv6.peers, &ipv6_hdr(skb)->saddr, 1); ret = inet_peer_xrlim_allow(peer, 1*HZ); if (peer) inet_putpeer(peer); @@ -1650,6 +1650,7 @@ int ndisc_rcv(struct sk_buff *skb) static int ndisc_netdev_event(struct notifier_block *this, unsigned long event, void *ptr) { struct net_device *dev = netdev_notifier_info_to_dev(ptr); + struct netdev_notifier_change_info *change_info; struct net *net = dev_net(dev); struct inet6_dev *idev; @@ -1664,6 +1665,11 @@ static int ndisc_netdev_event(struct notifier_block *this, unsigned long event, ndisc_send_unsol_na(dev); in6_dev_put(idev); break; + case NETDEV_CHANGE: + change_info = ptr; + if (change_info->flags_changed & IFF_NOARP) + neigh_changeaddr(&nd_tbl, dev); + break; case NETDEV_DOWN: neigh_ifdown(&nd_tbl, dev); fib6_run_gc(0, net, false); diff --git a/net/ipv6/netfilter.c b/net/ipv6/netfilter.c index d958718b5..b4de08a83 100644 --- a/net/ipv6/netfilter.c +++ b/net/ipv6/netfilter.c @@ -191,6 +191,8 @@ static __sum16 nf_ip6_checksum_partial(struct sk_buff *skb, unsigned int hook, static const struct nf_ipv6_ops ipv6ops = { .chk_addr = ipv6_chk_addr, + .route_input = ip6_route_input, + .fragment = ip6_fragment }; static const struct nf_afinfo nf_ip6_afinfo = { diff --git a/net/ipv6/netfilter/Kconfig b/net/ipv6/netfilter/Kconfig index ca6998345..b552cf0d6 100644 --- a/net/ipv6/netfilter/Kconfig +++ b/net/ipv6/netfilter/Kconfig @@ -186,7 +186,8 @@ config IP6_NF_MATCH_MH config IP6_NF_MATCH_RPFILTER tristate '"rpfilter" reverse path filter match support' - depends on NETFILTER_ADVANCED && (IP6_NF_MANGLE || IP6_NF_RAW) + depends on NETFILTER_ADVANCED + depends on IP6_NF_MANGLE || IP6_NF_RAW ---help--- This option allows you to match packets whose replies would go out via the interface the packet came in. diff --git a/net/ipv6/netfilter/ip6_tables.c b/net/ipv6/netfilter/ip6_tables.c index 62f5b0d0b..3c35ced39 100644 --- a/net/ipv6/netfilter/ip6_tables.c +++ b/net/ipv6/netfilter/ip6_tables.c @@ -283,15 +283,13 @@ static void trace_packet(const struct sk_buff *skb, const struct xt_table_info *private, const struct ip6t_entry *e) { - const void *table_base; const struct ip6t_entry *root; const char *hookname, *chainname, *comment; const struct ip6t_entry *iter; unsigned int rulenum = 0; struct net *net = dev_net(in ? in : out); - table_base = private->entries[smp_processor_id()]; - root = get_entry(table_base, private->hook_entry[hook]); + root = get_entry(private->entries, private->hook_entry[hook]); hookname = chainname = hooknames[hook]; comment = comments[NF_IP6_TRACE_COMMENT_RULE]; @@ -357,7 +355,7 @@ ip6t_do_table(struct sk_buff *skb, */ smp_read_barrier_depends(); cpu = smp_processor_id(); - table_base = private->entries[cpu]; + table_base = private->entries; jumpstack = (struct ip6t_entry **)private->jumpstack[cpu]; stackptr = per_cpu_ptr(private->stackptr, cpu); origptr = *stackptr; @@ -367,6 +365,7 @@ ip6t_do_table(struct sk_buff *skb, do { const struct xt_entry_target *t; const struct xt_entry_match *ematch; + struct xt_counters *counter; IP_NF_ASSERT(e); acpar.thoff = 0; @@ -384,7 +383,8 @@ ip6t_do_table(struct sk_buff *skb, goto no_match; } - ADD_COUNTER(e->counters, skb->len, 1); + counter = xt_get_this_cpu_counter(&e->counters); + ADD_COUNTER(*counter, skb->len, 1); t = ip6t_get_target_c(e); IP_NF_ASSERT(t->u.kernel.target); @@ -679,6 +679,10 @@ find_check_entry(struct ip6t_entry *e, struct net *net, const char *name, if (ret) return ret; + e->counters.pcnt = xt_percpu_counter_alloc(); + if (IS_ERR_VALUE(e->counters.pcnt)) + return -ENOMEM; + j = 0; mtpar.net = net; mtpar.table = name; @@ -714,6 +718,9 @@ find_check_entry(struct ip6t_entry *e, struct net *net, const char *name, break; cleanup_match(ematch, net); } + + xt_percpu_counter_free(e->counters.pcnt); + return ret; } @@ -797,6 +804,8 @@ static void cleanup_entry(struct ip6t_entry *e, struct net *net) if (par.target->destroy != NULL) par.target->destroy(&par); module_put(par.target->me); + + xt_percpu_counter_free(e->counters.pcnt); } /* Checks and translates the user-supplied table segment (held in @@ -879,12 +888,6 @@ translate_table(struct net *net, struct xt_table_info *newinfo, void *entry0, return ret; } - /* And one copy for every other CPU */ - for_each_possible_cpu(i) { - if (newinfo->entries[i] && newinfo->entries[i] != entry0) - memcpy(newinfo->entries[i], entry0, newinfo->size); - } - return ret; } @@ -900,14 +903,16 @@ get_counters(const struct xt_table_info *t, seqcount_t *s = &per_cpu(xt_recseq, cpu); i = 0; - xt_entry_foreach(iter, t->entries[cpu], t->size) { + xt_entry_foreach(iter, t->entries, t->size) { + struct xt_counters *tmp; u64 bcnt, pcnt; unsigned int start; + tmp = xt_get_per_cpu_counter(&iter->counters, cpu); do { start = read_seqcount_begin(s); - bcnt = iter->counters.bcnt; - pcnt = iter->counters.pcnt; + bcnt = tmp->bcnt; + pcnt = tmp->pcnt; } while (read_seqcount_retry(s, start)); ADD_COUNTER(counters[i], bcnt, pcnt); @@ -952,11 +957,7 @@ copy_entries_to_user(unsigned int total_size, if (IS_ERR(counters)) return PTR_ERR(counters); - /* choose the copy that is on our node/cpu, ... - * This choice is lazy (because current thread is - * allowed to migrate to another cpu) - */ - loc_cpu_entry = private->entries[raw_smp_processor_id()]; + loc_cpu_entry = private->entries; if (copy_to_user(userptr, loc_cpu_entry, total_size) != 0) { ret = -EFAULT; goto free_counters; @@ -1064,16 +1065,16 @@ static int compat_table_info(const struct xt_table_info *info, struct xt_table_info *newinfo) { struct ip6t_entry *iter; - void *loc_cpu_entry; + const void *loc_cpu_entry; int ret; if (!newinfo || !info) return -EINVAL; - /* we dont care about newinfo->entries[] */ + /* we dont care about newinfo->entries */ memcpy(newinfo, info, offsetof(struct xt_table_info, entries)); newinfo->initial_entries = 0; - loc_cpu_entry = info->entries[raw_smp_processor_id()]; + loc_cpu_entry = info->entries; xt_compat_init_offsets(AF_INET6, info->number); xt_entry_foreach(iter, loc_cpu_entry, info->size) { ret = compat_calc_entry(iter, info, loc_cpu_entry, newinfo); @@ -1194,7 +1195,6 @@ __do_replace(struct net *net, const char *name, unsigned int valid_hooks, struct xt_table *t; struct xt_table_info *oldinfo; struct xt_counters *counters; - const void *loc_cpu_old_entry; struct ip6t_entry *iter; ret = 0; @@ -1237,8 +1237,7 @@ __do_replace(struct net *net, const char *name, unsigned int valid_hooks, get_counters(oldinfo, counters); /* Decrease module usage counts and free resource */ - loc_cpu_old_entry = oldinfo->entries[raw_smp_processor_id()]; - xt_entry_foreach(iter, loc_cpu_old_entry, oldinfo->size) + xt_entry_foreach(iter, oldinfo->entries, oldinfo->size) cleanup_entry(iter, net); xt_free_table_info(oldinfo); @@ -1284,8 +1283,7 @@ do_replace(struct net *net, const void __user *user, unsigned int len) if (!newinfo) return -ENOMEM; - /* choose the copy that is on our node/cpu */ - loc_cpu_entry = newinfo->entries[raw_smp_processor_id()]; + loc_cpu_entry = newinfo->entries; if (copy_from_user(loc_cpu_entry, user + sizeof(tmp), tmp.size) != 0) { ret = -EFAULT; @@ -1316,7 +1314,7 @@ static int do_add_counters(struct net *net, const void __user *user, unsigned int len, int compat) { - unsigned int i, curcpu; + unsigned int i; struct xt_counters_info tmp; struct xt_counters *paddc; unsigned int num_counters; @@ -1326,7 +1324,6 @@ do_add_counters(struct net *net, const void __user *user, unsigned int len, struct xt_table *t; const struct xt_table_info *private; int ret = 0; - const void *loc_cpu_entry; struct ip6t_entry *iter; unsigned int addend; #ifdef CONFIG_COMPAT @@ -1374,7 +1371,6 @@ do_add_counters(struct net *net, const void __user *user, unsigned int len, goto free; } - local_bh_disable(); private = t->private; if (private->number != num_counters) { @@ -1383,16 +1379,15 @@ do_add_counters(struct net *net, const void __user *user, unsigned int len, } i = 0; - /* Choose the copy that is on our node */ - curcpu = smp_processor_id(); addend = xt_write_recseq_begin(); - loc_cpu_entry = private->entries[curcpu]; - xt_entry_foreach(iter, loc_cpu_entry, private->size) { - ADD_COUNTER(iter->counters, paddc[i].bcnt, paddc[i].pcnt); + xt_entry_foreach(iter, private->entries, private->size) { + struct xt_counters *tmp; + + tmp = xt_get_this_cpu_counter(&iter->counters); + ADD_COUNTER(*tmp, paddc[i].bcnt, paddc[i].pcnt); ++i; } xt_write_recseq_end(addend); - unlock_up_free: local_bh_enable(); xt_table_unlock(t); @@ -1459,7 +1454,6 @@ static int compat_find_calc_match(struct xt_entry_match *m, const char *name, const struct ip6t_ip6 *ipv6, - unsigned int hookmask, int *size) { struct xt_match *match; @@ -1528,8 +1522,7 @@ check_compat_entry_size_and_hooks(struct compat_ip6t_entry *e, entry_offset = (void *)e - (void *)base; j = 0; xt_ematch_foreach(ematch, e) { - ret = compat_find_calc_match(ematch, name, - &e->ipv6, e->comefrom, &off); + ret = compat_find_calc_match(ematch, name, &e->ipv6, &off); if (ret != 0) goto release_matches; ++j; @@ -1623,6 +1616,9 @@ static int compat_check_entry(struct ip6t_entry *e, struct net *net, struct xt_mtchk_param mtpar; struct xt_entry_match *ematch; + e->counters.pcnt = xt_percpu_counter_alloc(); + if (IS_ERR_VALUE(e->counters.pcnt)) + return -ENOMEM; j = 0; mtpar.net = net; mtpar.table = name; @@ -1647,6 +1643,9 @@ static int compat_check_entry(struct ip6t_entry *e, struct net *net, break; cleanup_match(ematch, net); } + + xt_percpu_counter_free(e->counters.pcnt); + return ret; } @@ -1731,7 +1730,7 @@ translate_compat_table(struct net *net, newinfo->hook_entry[i] = info->hook_entry[i]; newinfo->underflow[i] = info->underflow[i]; } - entry1 = newinfo->entries[raw_smp_processor_id()]; + entry1 = newinfo->entries; pos = entry1; size = total_size; xt_entry_foreach(iter0, entry0, total_size) { @@ -1783,11 +1782,6 @@ translate_compat_table(struct net *net, return ret; } - /* And one copy for every other CPU */ - for_each_possible_cpu(i) - if (newinfo->entries[i] && newinfo->entries[i] != entry1) - memcpy(newinfo->entries[i], entry1, newinfo->size); - *pinfo = newinfo; *pentry0 = entry1; xt_free_table_info(info); @@ -1834,8 +1828,7 @@ compat_do_replace(struct net *net, void __user *user, unsigned int len) if (!newinfo) return -ENOMEM; - /* choose the copy that is on our node/cpu */ - loc_cpu_entry = newinfo->entries[raw_smp_processor_id()]; + loc_cpu_entry = newinfo->entries; if (copy_from_user(loc_cpu_entry, user + sizeof(tmp), tmp.size) != 0) { ret = -EFAULT; @@ -1906,7 +1899,6 @@ compat_copy_entries_to_user(unsigned int total_size, struct xt_table *table, void __user *pos; unsigned int size; int ret = 0; - const void *loc_cpu_entry; unsigned int i = 0; struct ip6t_entry *iter; @@ -1914,14 +1906,9 @@ compat_copy_entries_to_user(unsigned int total_size, struct xt_table *table, if (IS_ERR(counters)) return PTR_ERR(counters); - /* choose the copy that is on our node/cpu, ... - * This choice is lazy (because current thread is - * allowed to migrate to another cpu) - */ - loc_cpu_entry = private->entries[raw_smp_processor_id()]; pos = userptr; size = total_size; - xt_entry_foreach(iter, loc_cpu_entry, total_size) { + xt_entry_foreach(iter, private->entries, total_size) { ret = compat_copy_entry_to_user(iter, &pos, &size, counters, i++); if (ret != 0) @@ -2096,8 +2083,7 @@ struct xt_table *ip6t_register_table(struct net *net, goto out; } - /* choose the copy on our node/cpu, but dont care about preemption */ - loc_cpu_entry = newinfo->entries[raw_smp_processor_id()]; + loc_cpu_entry = newinfo->entries; memcpy(loc_cpu_entry, repl->entries, repl->size); ret = translate_table(net, newinfo, loc_cpu_entry, repl); @@ -2127,7 +2113,7 @@ void ip6t_unregister_table(struct net *net, struct xt_table *table) private = xt_unregister_table(table); /* Decrease module usage counts and free resources */ - loc_cpu_entry = private->entries[raw_smp_processor_id()]; + loc_cpu_entry = private->entries; xt_entry_foreach(iter, loc_cpu_entry, private->size) cleanup_entry(iter, net); if (private->number > private->initial_entries) diff --git a/net/ipv6/netfilter/ip6t_SYNPROXY.c b/net/ipv6/netfilter/ip6t_SYNPROXY.c index 6edb7b106..ebbb754c2 100644 --- a/net/ipv6/netfilter/ip6t_SYNPROXY.c +++ b/net/ipv6/netfilter/ip6t_SYNPROXY.c @@ -37,12 +37,13 @@ synproxy_build_ip(struct sk_buff *skb, const struct in6_addr *saddr, } static void -synproxy_send_tcp(const struct sk_buff *skb, struct sk_buff *nskb, +synproxy_send_tcp(const struct synproxy_net *snet, + const struct sk_buff *skb, struct sk_buff *nskb, struct nf_conntrack *nfct, enum ip_conntrack_info ctinfo, struct ipv6hdr *niph, struct tcphdr *nth, unsigned int tcp_hdr_size) { - struct net *net = nf_ct_net((struct nf_conn *)nfct); + struct net *net = nf_ct_net(snet->tmpl); struct dst_entry *dst; struct flowi6 fl6; @@ -83,7 +84,8 @@ free_nskb: } static void -synproxy_send_client_synack(const struct sk_buff *skb, const struct tcphdr *th, +synproxy_send_client_synack(const struct synproxy_net *snet, + const struct sk_buff *skb, const struct tcphdr *th, const struct synproxy_options *opts) { struct sk_buff *nskb; @@ -119,7 +121,7 @@ synproxy_send_client_synack(const struct sk_buff *skb, const struct tcphdr *th, synproxy_build_options(nth, opts); - synproxy_send_tcp(skb, nskb, skb->nfct, IP_CT_ESTABLISHED_REPLY, + synproxy_send_tcp(snet, skb, nskb, skb->nfct, IP_CT_ESTABLISHED_REPLY, niph, nth, tcp_hdr_size); } @@ -163,7 +165,7 @@ synproxy_send_server_syn(const struct synproxy_net *snet, synproxy_build_options(nth, opts); - synproxy_send_tcp(skb, nskb, &snet->tmpl->ct_general, IP_CT_NEW, + synproxy_send_tcp(snet, skb, nskb, &snet->tmpl->ct_general, IP_CT_NEW, niph, nth, tcp_hdr_size); } @@ -203,7 +205,7 @@ synproxy_send_server_ack(const struct synproxy_net *snet, synproxy_build_options(nth, opts); - synproxy_send_tcp(skb, nskb, NULL, 0, niph, nth, tcp_hdr_size); + synproxy_send_tcp(snet, skb, nskb, NULL, 0, niph, nth, tcp_hdr_size); } static void @@ -241,7 +243,8 @@ synproxy_send_client_ack(const struct synproxy_net *snet, synproxy_build_options(nth, opts); - synproxy_send_tcp(skb, nskb, NULL, 0, niph, nth, tcp_hdr_size); + synproxy_send_tcp(snet, skb, nskb, skb->nfct, IP_CT_ESTABLISHED_REPLY, + niph, nth, tcp_hdr_size); } static bool @@ -301,7 +304,7 @@ synproxy_tg6(struct sk_buff *skb, const struct xt_action_param *par) XT_SYNPROXY_OPT_SACK_PERM | XT_SYNPROXY_OPT_ECN); - synproxy_send_client_synack(skb, th, &opts); + synproxy_send_client_synack(snet, skb, th, &opts); return NF_DROP; } else if (th->ack && !(th->fin || th->rst || th->syn)) { diff --git a/net/ipv6/netfilter/nf_conntrack_reasm.c b/net/ipv6/netfilter/nf_conntrack_reasm.c index 6f187c8d8..6d0249817 100644 --- a/net/ipv6/netfilter/nf_conntrack_reasm.c +++ b/net/ipv6/netfilter/nf_conntrack_reasm.c @@ -348,7 +348,7 @@ found: fq->ecn |= ecn; if (payload_len > fq->q.max_size) fq->q.max_size = payload_len; - add_frag_mem_limit(&fq->q, skb->truesize); + add_frag_mem_limit(fq->q.net, skb->truesize); /* The first fragment. * nhoffset is obtained from the first fragment, of course. @@ -430,7 +430,7 @@ nf_ct_frag6_reasm(struct frag_queue *fq, struct net_device *dev) clone->ip_summed = head->ip_summed; NFCT_FRAG6_CB(clone)->orig = NULL; - add_frag_mem_limit(&fq->q, clone->truesize); + add_frag_mem_limit(fq->q.net, clone->truesize); } /* We have to remove fragment header from datagram and to relocate @@ -454,7 +454,7 @@ nf_ct_frag6_reasm(struct frag_queue *fq, struct net_device *dev) head->csum = csum_add(head->csum, fp->csum); head->truesize += fp->truesize; } - sub_frag_mem_limit(&fq->q, head->truesize); + sub_frag_mem_limit(fq->q.net, head->truesize); head->ignore_df = 1; head->next = NULL; diff --git a/net/ipv6/output_core.c b/net/ipv6/output_core.c index 85892af57..928a0fb0b 100644 --- a/net/ipv6/output_core.c +++ b/net/ipv6/output_core.c @@ -8,9 +8,11 @@ #include <net/ip6_fib.h> #include <net/addrconf.h> #include <net/secure_seq.h> +#include <linux/netfilter.h> static u32 __ipv6_select_ident(struct net *net, u32 hashrnd, - struct in6_addr *dst, struct in6_addr *src) + const struct in6_addr *dst, + const struct in6_addr *src) { u32 hash, id; @@ -60,17 +62,17 @@ void ipv6_proxy_select_ident(struct net *net, struct sk_buff *skb) } EXPORT_SYMBOL_GPL(ipv6_proxy_select_ident); -void ipv6_select_ident(struct net *net, struct frag_hdr *fhdr, - struct rt6_info *rt) +__be32 ipv6_select_ident(struct net *net, + const struct in6_addr *daddr, + const struct in6_addr *saddr) { static u32 ip6_idents_hashrnd __read_mostly; u32 id; net_get_random_once(&ip6_idents_hashrnd, sizeof(ip6_idents_hashrnd)); - id = __ipv6_select_ident(net, ip6_idents_hashrnd, &rt->rt6i_dst.addr, - &rt->rt6i_src.addr); - fhdr->identification = htonl(id); + id = __ipv6_select_ident(net, ip6_idents_hashrnd, daddr, saddr); + return htonl(id); } EXPORT_SYMBOL(ipv6_select_ident); diff --git a/net/ipv6/raw.c b/net/ipv6/raw.c index 8072bd413..ca4700cb2 100644 --- a/net/ipv6/raw.c +++ b/net/ipv6/raw.c @@ -865,6 +865,9 @@ static int rawv6_sendmsg(struct sock *sk, struct msghdr *msg, size_t len) fl6.flowi6_oif = np->ucast_oif; security_sk_classify_flow(sk, flowi6_to_flowi(&fl6)); + if (inet->hdrincl) + fl6.flowi6_flags |= FLOWI_FLAG_KNOWN_NH; + dst = ip6_dst_lookup_flow(sk, &fl6, final_p); if (IS_ERR(dst)) { err = PTR_ERR(dst); @@ -1324,13 +1327,7 @@ static struct inet_protosw rawv6_protosw = { int __init rawv6_init(void) { - int ret; - - ret = inet6_register_protosw(&rawv6_protosw); - if (ret) - goto out; -out: - return ret; + return inet6_register_protosw(&rawv6_protosw); } void rawv6_exit(void) diff --git a/net/ipv6/reassembly.c b/net/ipv6/reassembly.c index 8ffa2c8cc..f1159bb76 100644 --- a/net/ipv6/reassembly.c +++ b/net/ipv6/reassembly.c @@ -144,7 +144,7 @@ void ip6_expire_frag_queue(struct net *net, struct frag_queue *fq, IP6_INC_STATS_BH(net, __in6_dev_get(dev), IPSTATS_MIB_REASMFAILS); - if (fq->q.flags & INET_FRAG_EVICTED) + if (inet_frag_evicting(&fq->q)) goto out_rcu_unlock; IP6_INC_STATS_BH(net, __in6_dev_get(dev), IPSTATS_MIB_REASMTIMEOUT); @@ -330,7 +330,7 @@ found: fq->q.stamp = skb->tstamp; fq->q.meat += skb->len; fq->ecn |= ecn; - add_frag_mem_limit(&fq->q, skb->truesize); + add_frag_mem_limit(fq->q.net, skb->truesize); /* The first fragment. * nhoffset is obtained from the first fragment, of course. @@ -443,7 +443,7 @@ static int ip6_frag_reasm(struct frag_queue *fq, struct sk_buff *prev, head->len -= clone->len; clone->csum = 0; clone->ip_summed = head->ip_summed; - add_frag_mem_limit(&fq->q, clone->truesize); + add_frag_mem_limit(fq->q.net, clone->truesize); } /* We have to remove fragment header from datagram and to relocate @@ -481,7 +481,7 @@ static int ip6_frag_reasm(struct frag_queue *fq, struct sk_buff *prev, } fp = next; } - sub_frag_mem_limit(&fq->q, sum_truesize); + sub_frag_mem_limit(fq->q.net, sum_truesize); head->next = NULL; head->dev = dev; diff --git a/net/ipv6/route.c b/net/ipv6/route.c index c73ae5039..d15586490 100644 --- a/net/ipv6/route.c +++ b/net/ipv6/route.c @@ -72,8 +72,7 @@ enum rt6_nud_state { RT6_NUD_SUCCEED = 1 }; -static struct rt6_info *ip6_rt_copy(struct rt6_info *ort, - const struct in6_addr *dest); +static void ip6_rt_copy_init(struct rt6_info *rt, struct rt6_info *ort); static struct dst_entry *ip6_dst_check(struct dst_entry *dst, u32 cookie); static unsigned int ip6_default_advmss(const struct dst_entry *dst); static unsigned int ip6_mtu(const struct dst_entry *dst); @@ -92,6 +91,7 @@ static void ip6_rt_update_pmtu(struct dst_entry *dst, struct sock *sk, struct sk_buff *skb, u32 mtu); static void rt6_do_redirect(struct dst_entry *dst, struct sock *sk, struct sk_buff *skb); +static void rt6_dst_from_metrics_check(struct rt6_info *rt); static int rt6_score_route(struct rt6_info *rt, int oif, int strict); #ifdef CONFIG_IPV6_ROUTE_INFO @@ -104,65 +104,82 @@ static struct rt6_info *rt6_get_route_info(struct net *net, const struct in6_addr *gwaddr, int ifindex); #endif -static void rt6_bind_peer(struct rt6_info *rt, int create) +struct uncached_list { + spinlock_t lock; + struct list_head head; +}; + +static DEFINE_PER_CPU_ALIGNED(struct uncached_list, rt6_uncached_list); + +static void rt6_uncached_list_add(struct rt6_info *rt) { - struct inet_peer_base *base; - struct inet_peer *peer; + struct uncached_list *ul = raw_cpu_ptr(&rt6_uncached_list); - base = inetpeer_base_ptr(rt->_rt6i_peer); - if (!base) - return; + rt->dst.flags |= DST_NOCACHE; + rt->rt6i_uncached_list = ul; - peer = inet_getpeer_v6(base, &rt->rt6i_dst.addr, create); - if (peer) { - if (!rt6_set_peer(rt, peer)) - inet_putpeer(peer); + spin_lock_bh(&ul->lock); + list_add_tail(&rt->rt6i_uncached, &ul->head); + spin_unlock_bh(&ul->lock); +} + +static void rt6_uncached_list_del(struct rt6_info *rt) +{ + if (!list_empty(&rt->rt6i_uncached)) { + struct uncached_list *ul = rt->rt6i_uncached_list; + + spin_lock_bh(&ul->lock); + list_del(&rt->rt6i_uncached); + spin_unlock_bh(&ul->lock); } } -static struct inet_peer *__rt6_get_peer(struct rt6_info *rt, int create) +static void rt6_uncached_list_flush_dev(struct net *net, struct net_device *dev) { - if (rt6_has_peer(rt)) - return rt6_peer_ptr(rt); + struct net_device *loopback_dev = net->loopback_dev; + int cpu; + + for_each_possible_cpu(cpu) { + struct uncached_list *ul = per_cpu_ptr(&rt6_uncached_list, cpu); + struct rt6_info *rt; + + spin_lock_bh(&ul->lock); + list_for_each_entry(rt, &ul->head, rt6i_uncached) { + struct inet6_dev *rt_idev = rt->rt6i_idev; + struct net_device *rt_dev = rt->dst.dev; - rt6_bind_peer(rt, create); - return (rt6_has_peer(rt) ? rt6_peer_ptr(rt) : NULL); + if (rt_idev && (rt_idev->dev == dev || !dev) && + rt_idev->dev != loopback_dev) { + rt->rt6i_idev = in6_dev_get(loopback_dev); + in6_dev_put(rt_idev); + } + + if (rt_dev && (rt_dev == dev || !dev) && + rt_dev != loopback_dev) { + rt->dst.dev = loopback_dev; + dev_hold(rt->dst.dev); + dev_put(rt_dev); + } + } + spin_unlock_bh(&ul->lock); + } } -static struct inet_peer *rt6_get_peer_create(struct rt6_info *rt) +static u32 *rt6_pcpu_cow_metrics(struct rt6_info *rt) { - return __rt6_get_peer(rt, 1); + return dst_metrics_write_ptr(rt->dst.from); } static u32 *ipv6_cow_metrics(struct dst_entry *dst, unsigned long old) { - struct rt6_info *rt = (struct rt6_info *) dst; - struct inet_peer *peer; - u32 *p = NULL; + struct rt6_info *rt = (struct rt6_info *)dst; - if (!(rt->dst.flags & DST_HOST)) + if (rt->rt6i_flags & RTF_PCPU) + return rt6_pcpu_cow_metrics(rt); + else if (rt->rt6i_flags & RTF_CACHE) + return NULL; + else return dst_cow_metrics_generic(dst, old); - - peer = rt6_get_peer_create(rt); - if (peer) { - u32 *old_p = __DST_METRICS_PTR(old); - unsigned long prev, new; - - p = peer->metrics; - if (inet_metrics_new(peer) || - (old & DST_METRICS_FORCE_OVERWRITE)) - memcpy(p, old_p, sizeof(u32) * RTAX_MAX); - - new = (unsigned long) p; - prev = cmpxchg(&dst->_metrics, old, new); - - if (prev != old) { - p = __DST_METRICS_PTR(prev); - if (prev & DST_METRICS_READ_ONLY) - p = NULL; - } - } - return p; } static inline const void *choose_neigh_daddr(struct rt6_info *rt, @@ -299,10 +316,9 @@ static const struct rt6_info ip6_blk_hole_entry_template = { #endif /* allocate dst with ip6_dst_ops */ -static inline struct rt6_info *ip6_dst_alloc(struct net *net, - struct net_device *dev, - int flags, - struct fib6_table *table) +static struct rt6_info *__ip6_dst_alloc(struct net *net, + struct net_device *dev, + int flags) { struct rt6_info *rt = dst_alloc(&net->ipv6.ip6_dst_ops, dev, 0, DST_OBSOLETE_FORCE_CHK, flags); @@ -311,21 +327,50 @@ static inline struct rt6_info *ip6_dst_alloc(struct net *net, struct dst_entry *dst = &rt->dst; memset(dst + 1, 0, sizeof(*rt) - sizeof(*dst)); - rt6_init_peer(rt, table ? &table->tb6_peers : net->ipv6.peers); INIT_LIST_HEAD(&rt->rt6i_siblings); + INIT_LIST_HEAD(&rt->rt6i_uncached); + } + return rt; +} + +static struct rt6_info *ip6_dst_alloc(struct net *net, + struct net_device *dev, + int flags) +{ + struct rt6_info *rt = __ip6_dst_alloc(net, dev, flags); + + if (rt) { + rt->rt6i_pcpu = alloc_percpu_gfp(struct rt6_info *, GFP_ATOMIC); + if (rt->rt6i_pcpu) { + int cpu; + + for_each_possible_cpu(cpu) { + struct rt6_info **p; + + p = per_cpu_ptr(rt->rt6i_pcpu, cpu); + /* no one shares rt */ + *p = NULL; + } + } else { + dst_destroy((struct dst_entry *)rt); + return NULL; + } } + return rt; } static void ip6_dst_destroy(struct dst_entry *dst) { struct rt6_info *rt = (struct rt6_info *)dst; - struct inet6_dev *idev = rt->rt6i_idev; struct dst_entry *from = dst->from; + struct inet6_dev *idev; - if (!(rt->dst.flags & DST_HOST)) - dst_destroy_metrics_generic(dst); + dst_destroy_metrics_generic(dst); + free_percpu(rt->rt6i_pcpu); + rt6_uncached_list_del(rt); + idev = rt->rt6i_idev; if (idev) { rt->rt6i_idev = NULL; in6_dev_put(idev); @@ -333,11 +378,6 @@ static void ip6_dst_destroy(struct dst_entry *dst) dst->from = NULL; dst_release(from); - - if (rt6_has_peer(rt)) { - struct inet_peer *peer = rt6_peer_ptr(rt); - inet_putpeer(peer); - } } static void ip6_dst_ifdown(struct dst_entry *dst, struct net_device *dev, @@ -652,15 +692,33 @@ static struct rt6_info *find_rr_leaf(struct fib6_node *fn, u32 metric, int oif, int strict, bool *do_rr) { - struct rt6_info *rt, *match; + struct rt6_info *rt, *match, *cont; int mpri = -1; match = NULL; - for (rt = rr_head; rt && rt->rt6i_metric == metric; - rt = rt->dst.rt6_next) + cont = NULL; + for (rt = rr_head; rt; rt = rt->dst.rt6_next) { + if (rt->rt6i_metric != metric) { + cont = rt; + break; + } + + match = find_match(rt, oif, strict, &mpri, match, do_rr); + } + + for (rt = fn->leaf; rt && rt != rr_head; rt = rt->dst.rt6_next) { + if (rt->rt6i_metric != metric) { + cont = rt; + break; + } + match = find_match(rt, oif, strict, &mpri, match, do_rr); - for (rt = fn->leaf; rt && rt != rr_head && rt->rt6i_metric == metric; - rt = rt->dst.rt6_next) + } + + if (match || !cont) + return match; + + for (rt = cont; rt; rt = rt->dst.rt6_next) match = find_match(rt, oif, strict, &mpri, match, do_rr); return match; @@ -694,6 +752,11 @@ static struct rt6_info *rt6_select(struct fib6_node *fn, int oif, int strict) return match ? match : net->ipv6.ip6_null_entry; } +static bool rt6_is_gw_or_nonexthop(const struct rt6_info *rt) +{ + return (rt->rt6i_flags & (RTF_NONEXTHOP | RTF_GATEWAY)); +} + #ifdef CONFIG_IPV6_ROUTE_INFO int rt6_route_rcv(struct net_device *dev, u8 *opt, int len, const struct in6_addr *gwaddr) @@ -872,9 +935,9 @@ int ip6_ins_rt(struct rt6_info *rt) return __ip6_ins_rt(rt, &info, &mxc); } -static struct rt6_info *rt6_alloc_cow(struct rt6_info *ort, - const struct in6_addr *daddr, - const struct in6_addr *saddr) +static struct rt6_info *ip6_rt_cache_alloc(struct rt6_info *ort, + const struct in6_addr *daddr, + const struct in6_addr *saddr) { struct rt6_info *rt; @@ -882,15 +945,25 @@ static struct rt6_info *rt6_alloc_cow(struct rt6_info *ort, * Clone the route. */ - rt = ip6_rt_copy(ort, daddr); + if (ort->rt6i_flags & (RTF_CACHE | RTF_PCPU)) + ort = (struct rt6_info *)ort->dst.from; - if (rt) { + rt = __ip6_dst_alloc(dev_net(ort->dst.dev), ort->dst.dev, 0); + + if (!rt) + return NULL; + + ip6_rt_copy_init(rt, ort); + rt->rt6i_flags |= RTF_CACHE; + rt->rt6i_metric = 0; + rt->dst.flags |= DST_HOST; + rt->rt6i_dst.addr = *daddr; + rt->rt6i_dst.plen = 128; + + if (!rt6_is_gw_or_nonexthop(ort)) { if (ort->rt6i_dst.plen != 128 && ipv6_addr_equal(&ort->rt6i_dst.addr, daddr)) rt->rt6i_flags |= RTF_ANYCAST; - - rt->rt6i_flags |= RTF_CACHE; - #ifdef CONFIG_IPV6_SUBTREES if (rt->rt6i_src.plen && saddr) { rt->rt6i_src.addr = *saddr; @@ -902,30 +975,85 @@ static struct rt6_info *rt6_alloc_cow(struct rt6_info *ort, return rt; } -static struct rt6_info *rt6_alloc_clone(struct rt6_info *ort, - const struct in6_addr *daddr) +static struct rt6_info *ip6_rt_pcpu_alloc(struct rt6_info *rt) { - struct rt6_info *rt = ip6_rt_copy(ort, daddr); + struct rt6_info *pcpu_rt; - if (rt) - rt->rt6i_flags |= RTF_CACHE; - return rt; + pcpu_rt = __ip6_dst_alloc(dev_net(rt->dst.dev), + rt->dst.dev, rt->dst.flags); + + if (!pcpu_rt) + return NULL; + ip6_rt_copy_init(pcpu_rt, rt); + pcpu_rt->rt6i_protocol = rt->rt6i_protocol; + pcpu_rt->rt6i_flags |= RTF_PCPU; + return pcpu_rt; +} + +/* It should be called with read_lock_bh(&tb6_lock) acquired */ +static struct rt6_info *rt6_get_pcpu_route(struct rt6_info *rt) +{ + struct rt6_info *pcpu_rt, **p; + + p = this_cpu_ptr(rt->rt6i_pcpu); + pcpu_rt = *p; + + if (pcpu_rt) { + dst_hold(&pcpu_rt->dst); + rt6_dst_from_metrics_check(pcpu_rt); + } + return pcpu_rt; +} + +static struct rt6_info *rt6_make_pcpu_route(struct rt6_info *rt) +{ + struct fib6_table *table = rt->rt6i_table; + struct rt6_info *pcpu_rt, *prev, **p; + + pcpu_rt = ip6_rt_pcpu_alloc(rt); + if (!pcpu_rt) { + struct net *net = dev_net(rt->dst.dev); + + dst_hold(&net->ipv6.ip6_null_entry->dst); + return net->ipv6.ip6_null_entry; + } + + read_lock_bh(&table->tb6_lock); + if (rt->rt6i_pcpu) { + p = this_cpu_ptr(rt->rt6i_pcpu); + prev = cmpxchg(p, NULL, pcpu_rt); + if (prev) { + /* If someone did it before us, return prev instead */ + dst_destroy(&pcpu_rt->dst); + pcpu_rt = prev; + } + } else { + /* rt has been removed from the fib6 tree + * before we have a chance to acquire the read_lock. + * In this case, don't brother to create a pcpu rt + * since rt is going away anyway. The next + * dst_check() will trigger a re-lookup. + */ + dst_destroy(&pcpu_rt->dst); + pcpu_rt = rt; + } + dst_hold(&pcpu_rt->dst); + rt6_dst_from_metrics_check(pcpu_rt); + read_unlock_bh(&table->tb6_lock); + return pcpu_rt; } static struct rt6_info *ip6_pol_route(struct net *net, struct fib6_table *table, int oif, struct flowi6 *fl6, int flags) { struct fib6_node *fn, *saved_fn; - struct rt6_info *rt, *nrt; + struct rt6_info *rt; int strict = 0; - int attempts = 3; - int err; strict |= flags & RT6_LOOKUP_F_IFACE; if (net->ipv6.devconf_all->forwarding == 0) strict |= RT6_LOOKUP_F_REACHABLE; -redo_fib6_lookup_lock: read_lock_bh(&table->tb6_lock); fn = fib6_lookup(&table->tb6_root, &fl6->daddr, &fl6->saddr); @@ -944,51 +1072,65 @@ redo_rt6_select: strict &= ~RT6_LOOKUP_F_REACHABLE; fn = saved_fn; goto redo_rt6_select; - } else { - dst_hold(&rt->dst); - read_unlock_bh(&table->tb6_lock); - goto out2; } } - dst_hold(&rt->dst); - read_unlock_bh(&table->tb6_lock); - if (rt->rt6i_flags & RTF_CACHE) - goto out2; + if (rt == net->ipv6.ip6_null_entry || (rt->rt6i_flags & RTF_CACHE)) { + dst_use(&rt->dst, jiffies); + read_unlock_bh(&table->tb6_lock); - if (!(rt->rt6i_flags & (RTF_NONEXTHOP | RTF_GATEWAY))) - nrt = rt6_alloc_cow(rt, &fl6->daddr, &fl6->saddr); - else if (!(rt->dst.flags & DST_HOST)) - nrt = rt6_alloc_clone(rt, &fl6->daddr); - else - goto out2; + rt6_dst_from_metrics_check(rt); + return rt; + } else if (unlikely((fl6->flowi6_flags & FLOWI_FLAG_KNOWN_NH) && + !(rt->rt6i_flags & RTF_GATEWAY))) { + /* Create a RTF_CACHE clone which will not be + * owned by the fib6 tree. It is for the special case where + * the daddr in the skb during the neighbor look-up is different + * from the fl6->daddr used to look-up route here. + */ - ip6_rt_put(rt); - rt = nrt ? : net->ipv6.ip6_null_entry; + struct rt6_info *uncached_rt; - dst_hold(&rt->dst); - if (nrt) { - err = ip6_ins_rt(nrt); - if (!err) - goto out2; - } + dst_use(&rt->dst, jiffies); + read_unlock_bh(&table->tb6_lock); - if (--attempts <= 0) - goto out2; + uncached_rt = ip6_rt_cache_alloc(rt, &fl6->daddr, NULL); + dst_release(&rt->dst); - /* - * Race condition! In the gap, when table->tb6_lock was - * released someone could insert this route. Relookup. - */ - ip6_rt_put(rt); - goto redo_fib6_lookup_lock; + if (uncached_rt) + rt6_uncached_list_add(uncached_rt); + else + uncached_rt = net->ipv6.ip6_null_entry; -out2: - rt->dst.lastuse = jiffies; - rt->dst.__use++; + dst_hold(&uncached_rt->dst); + return uncached_rt; - return rt; + } else { + /* Get a percpu copy */ + + struct rt6_info *pcpu_rt; + + rt->dst.lastuse = jiffies; + rt->dst.__use++; + pcpu_rt = rt6_get_pcpu_route(rt); + + if (pcpu_rt) { + read_unlock_bh(&table->tb6_lock); + } else { + /* We have to do the read_unlock first + * because rt6_make_pcpu_route() may trigger + * ip6_dst_gc() which will take the write_lock. + */ + dst_hold(&rt->dst); + read_unlock_bh(&table->tb6_lock); + pcpu_rt = rt6_make_pcpu_route(rt); + dst_release(&rt->dst); + } + + return pcpu_rt; + + } } static struct rt6_info *ip6_pol_route_input(struct net *net, struct fib6_table *table, @@ -1059,7 +1201,6 @@ struct dst_entry *ip6_blackhole_route(struct net *net, struct dst_entry *dst_ori new = &rt->dst; memset(new + 1, 0, sizeof(*rt) - sizeof(*new)); - rt6_init_peer(rt, net->ipv6.peers); new->__use = 1; new->input = dst_discard; @@ -1093,6 +1234,33 @@ struct dst_entry *ip6_blackhole_route(struct net *net, struct dst_entry *dst_ori * Destination cache support functions */ +static void rt6_dst_from_metrics_check(struct rt6_info *rt) +{ + if (rt->dst.from && + dst_metrics_ptr(&rt->dst) != dst_metrics_ptr(rt->dst.from)) + dst_init_metrics(&rt->dst, dst_metrics_ptr(rt->dst.from), true); +} + +static struct dst_entry *rt6_check(struct rt6_info *rt, u32 cookie) +{ + if (!rt->rt6i_node || (rt->rt6i_node->fn_sernum != cookie)) + return NULL; + + if (rt6_check_expired(rt)) + return NULL; + + return &rt->dst; +} + +static struct dst_entry *rt6_dst_from_check(struct rt6_info *rt, u32 cookie) +{ + if (rt->dst.obsolete == DST_OBSOLETE_FORCE_CHK && + rt6_check((struct rt6_info *)(rt->dst.from), cookie)) + return &rt->dst; + else + return NULL; +} + static struct dst_entry *ip6_dst_check(struct dst_entry *dst, u32 cookie) { struct rt6_info *rt; @@ -1103,13 +1271,13 @@ static struct dst_entry *ip6_dst_check(struct dst_entry *dst, u32 cookie) * DST_OBSOLETE_FORCE_CHK which forces validation calls down * into this function always. */ - if (!rt->rt6i_node || (rt->rt6i_node->fn_sernum != cookie)) - return NULL; - if (rt6_check_expired(rt)) - return NULL; + rt6_dst_from_metrics_check(rt); - return dst; + if ((rt->rt6i_flags & RTF_PCPU) || unlikely(dst->flags & DST_NOCACHE)) + return rt6_dst_from_check(rt, cookie); + else + return rt6_check(rt, cookie); } static struct dst_entry *ip6_negative_advice(struct dst_entry *dst) @@ -1148,24 +1316,63 @@ static void ip6_link_failure(struct sk_buff *skb) } } -static void ip6_rt_update_pmtu(struct dst_entry *dst, struct sock *sk, - struct sk_buff *skb, u32 mtu) +static void rt6_do_update_pmtu(struct rt6_info *rt, u32 mtu) +{ + struct net *net = dev_net(rt->dst.dev); + + rt->rt6i_flags |= RTF_MODIFIED; + rt->rt6i_pmtu = mtu; + rt6_update_expires(rt, net->ipv6.sysctl.ip6_rt_mtu_expires); +} + +static void __ip6_rt_update_pmtu(struct dst_entry *dst, const struct sock *sk, + const struct ipv6hdr *iph, u32 mtu) { struct rt6_info *rt6 = (struct rt6_info *)dst; - dst_confirm(dst); - if (mtu < dst_mtu(dst) && rt6->rt6i_dst.plen == 128) { - struct net *net = dev_net(dst->dev); + if (rt6->rt6i_flags & RTF_LOCAL) + return; - rt6->rt6i_flags |= RTF_MODIFIED; - if (mtu < IPV6_MIN_MTU) - mtu = IPV6_MIN_MTU; + dst_confirm(dst); + mtu = max_t(u32, mtu, IPV6_MIN_MTU); + if (mtu >= dst_mtu(dst)) + return; - dst_metric_set(dst, RTAX_MTU, mtu); - rt6_update_expires(rt6, net->ipv6.sysctl.ip6_rt_mtu_expires); + if (rt6->rt6i_flags & RTF_CACHE) { + rt6_do_update_pmtu(rt6, mtu); + } else { + const struct in6_addr *daddr, *saddr; + struct rt6_info *nrt6; + + if (iph) { + daddr = &iph->daddr; + saddr = &iph->saddr; + } else if (sk) { + daddr = &sk->sk_v6_daddr; + saddr = &inet6_sk(sk)->saddr; + } else { + return; + } + nrt6 = ip6_rt_cache_alloc(rt6, daddr, saddr); + if (nrt6) { + rt6_do_update_pmtu(nrt6, mtu); + + /* ip6_ins_rt(nrt6) will bump the + * rt6->rt6i_node->fn_sernum + * which will fail the next rt6_check() and + * invalidate the sk->sk_dst_cache. + */ + ip6_ins_rt(nrt6); + } } } +static void ip6_rt_update_pmtu(struct dst_entry *dst, struct sock *sk, + struct sk_buff *skb, u32 mtu) +{ + __ip6_rt_update_pmtu(dst, sk, skb ? ipv6_hdr(skb) : NULL, mtu); +} + void ip6_update_pmtu(struct sk_buff *skb, struct net *net, __be32 mtu, int oif, u32 mark) { @@ -1182,7 +1389,7 @@ void ip6_update_pmtu(struct sk_buff *skb, struct net *net, __be32 mtu, dst = ip6_route_output(net, NULL, &fl6); if (!dst->error) - ip6_rt_update_pmtu(dst, NULL, skb, ntohl(mtu)); + __ip6_rt_update_pmtu(dst, NULL, iph, ntohl(mtu)); dst_release(dst); } EXPORT_SYMBOL_GPL(ip6_update_pmtu); @@ -1341,12 +1548,17 @@ static unsigned int ip6_default_advmss(const struct dst_entry *dst) static unsigned int ip6_mtu(const struct dst_entry *dst) { + const struct rt6_info *rt = (const struct rt6_info *)dst; + unsigned int mtu = rt->rt6i_pmtu; struct inet6_dev *idev; - unsigned int mtu = dst_metric_raw(dst, RTAX_MTU); if (mtu) goto out; + mtu = dst_metric_raw(dst, RTAX_MTU); + if (mtu) + goto out; + mtu = IPV6_MIN_MTU; rcu_read_lock(); @@ -1373,7 +1585,7 @@ struct dst_entry *icmp6_dst_alloc(struct net_device *dev, if (unlikely(!idev)) return ERR_PTR(-ENODEV); - rt = ip6_dst_alloc(net, dev, 0, NULL); + rt = ip6_dst_alloc(net, dev, 0); if (unlikely(!rt)) { in6_dev_put(idev); dst = ERR_PTR(-ENOMEM); @@ -1560,7 +1772,8 @@ int ip6_route_add(struct fib6_config *cfg) if (!table) goto out; - rt = ip6_dst_alloc(net, NULL, (cfg->fc_flags & RTF_ADDRCONF) ? 0 : DST_NOCOUNT, table); + rt = ip6_dst_alloc(net, NULL, + (cfg->fc_flags & RTF_ADDRCONF) ? 0 : DST_NOCOUNT); if (!rt) { err = -ENOMEM; @@ -1590,10 +1803,8 @@ int ip6_route_add(struct fib6_config *cfg) ipv6_addr_prefix(&rt->rt6i_dst.addr, &cfg->fc_dst, cfg->fc_dst_len); rt->rt6i_dst.plen = cfg->fc_dst_len; - if (rt->rt6i_dst.plen == 128) { + if (rt->rt6i_dst.plen == 128) rt->dst.flags |= DST_HOST; - dst_metrics_set_force_overwrite(&rt->dst); - } #ifdef CONFIG_IPV6_SUBTREES ipv6_addr_prefix(&rt->rt6i_src.addr, &cfg->fc_src, cfg->fc_src_len); @@ -1651,9 +1862,21 @@ int ip6_route_add(struct fib6_config *cfg) int gwa_type; gw_addr = &cfg->fc_gateway; - rt->rt6i_gateway = *gw_addr; gwa_type = ipv6_addr_type(gw_addr); + /* if gw_addr is local we will fail to detect this in case + * address is still TENTATIVE (DAD in progress). rt6_lookup() + * will return already-added prefix route via interface that + * prefix route was assigned to, which might be non-loopback. + */ + err = -EINVAL; + if (ipv6_chk_addr_and_flags(net, gw_addr, + gwa_type & IPV6_ADDR_LINKLOCAL ? + dev : NULL, 0, 0)) + goto out; + + rt->rt6i_gateway = *gw_addr; + if (gwa_type != (IPV6_ADDR_LINKLOCAL|IPV6_ADDR_UNICAST)) { struct rt6_info *grt; @@ -1664,7 +1887,6 @@ int ip6_route_add(struct fib6_config *cfg) (SIT, PtP, NBMA NOARP links) it is handy to allow some exceptions. --ANK */ - err = -EINVAL; if (!(gwa_type & IPV6_ADDR_UNICAST)) goto out; @@ -1785,6 +2007,9 @@ static int ip6_route_del(struct fib6_config *cfg) if (fn) { for (rt = fn->leaf; rt; rt = rt->dst.rt6_next) { + if ((rt->rt6i_flags & RTF_CACHE) && + !(cfg->fc_flags & RTF_CACHE)) + continue; if (cfg->fc_ifindex && (!rt->dst.dev || rt->dst.dev->ifindex != cfg->fc_ifindex)) @@ -1894,7 +2119,7 @@ static void rt6_do_redirect(struct dst_entry *dst, struct sock *sk, struct sk_bu NEIGH_UPDATE_F_ISROUTER)) ); - nrt = ip6_rt_copy(rt, &msg->dest); + nrt = ip6_rt_cache_alloc(rt, &msg->dest, NULL); if (!nrt) goto out; @@ -1926,42 +2151,35 @@ out: * Misc support functions */ -static struct rt6_info *ip6_rt_copy(struct rt6_info *ort, - const struct in6_addr *dest) +static void rt6_set_from(struct rt6_info *rt, struct rt6_info *from) { - struct net *net = dev_net(ort->dst.dev); - struct rt6_info *rt = ip6_dst_alloc(net, ort->dst.dev, 0, - ort->rt6i_table); - - if (rt) { - rt->dst.input = ort->dst.input; - rt->dst.output = ort->dst.output; - rt->dst.flags |= DST_HOST; + BUG_ON(from->dst.from); - rt->rt6i_dst.addr = *dest; - rt->rt6i_dst.plen = 128; - dst_copy_metrics(&rt->dst, &ort->dst); - rt->dst.error = ort->dst.error; - rt->rt6i_idev = ort->rt6i_idev; - if (rt->rt6i_idev) - in6_dev_hold(rt->rt6i_idev); - rt->dst.lastuse = jiffies; - - if (ort->rt6i_flags & RTF_GATEWAY) - rt->rt6i_gateway = ort->rt6i_gateway; - else - rt->rt6i_gateway = *dest; - rt->rt6i_flags = ort->rt6i_flags; - rt6_set_from(rt, ort); - rt->rt6i_metric = 0; + rt->rt6i_flags &= ~RTF_EXPIRES; + dst_hold(&from->dst); + rt->dst.from = &from->dst; + dst_init_metrics(&rt->dst, dst_metrics_ptr(&from->dst), true); +} +static void ip6_rt_copy_init(struct rt6_info *rt, struct rt6_info *ort) +{ + rt->dst.input = ort->dst.input; + rt->dst.output = ort->dst.output; + rt->rt6i_dst = ort->rt6i_dst; + rt->dst.error = ort->dst.error; + rt->rt6i_idev = ort->rt6i_idev; + if (rt->rt6i_idev) + in6_dev_hold(rt->rt6i_idev); + rt->dst.lastuse = jiffies; + rt->rt6i_gateway = ort->rt6i_gateway; + rt->rt6i_flags = ort->rt6i_flags; + rt6_set_from(rt, ort); + rt->rt6i_metric = ort->rt6i_metric; #ifdef CONFIG_IPV6_SUBTREES - memcpy(&rt->rt6i_src, &ort->rt6i_src, sizeof(struct rt6key)); + rt->rt6i_src = ort->rt6i_src; #endif - memcpy(&rt->rt6i_prefsrc, &ort->rt6i_prefsrc, sizeof(struct rt6key)); - rt->rt6i_table = ort->rt6i_table; - } - return rt; + rt->rt6i_prefsrc = ort->rt6i_prefsrc; + rt->rt6i_table = ort->rt6i_table; } #ifdef CONFIG_IPV6_ROUTE_INFO @@ -2212,7 +2430,7 @@ struct rt6_info *addrconf_dst_alloc(struct inet6_dev *idev, { struct net *net = dev_net(idev->dev); struct rt6_info *rt = ip6_dst_alloc(net, net->loopback_dev, - DST_NOCOUNT, NULL); + DST_NOCOUNT); if (!rt) return ERR_PTR(-ENOMEM); @@ -2336,6 +2554,7 @@ void rt6_ifdown(struct net *net, struct net_device *dev) fib6_clean_all(net, fib6_ifdown, &adn); icmp6_clean_all(fib6_ifdown, &adn); + rt6_uncached_list_flush_dev(net, dev); } struct rt6_mtu_change_arg { @@ -2373,11 +2592,20 @@ static int rt6_mtu_change_route(struct rt6_info *rt, void *p_arg) PMTU discouvery. */ if (rt->dst.dev == arg->dev && - !dst_metric_locked(&rt->dst, RTAX_MTU) && - (dst_mtu(&rt->dst) >= arg->mtu || - (dst_mtu(&rt->dst) < arg->mtu && - dst_mtu(&rt->dst) == idev->cnf.mtu6))) { - dst_metric_set(&rt->dst, RTAX_MTU, arg->mtu); + !dst_metric_locked(&rt->dst, RTAX_MTU)) { + if (rt->rt6i_flags & RTF_CACHE) { + /* For RTF_CACHE with rt6i_pmtu == 0 + * (i.e. a redirected route), + * the metrics of its rt->dst.from has already + * been updated. + */ + if (rt->rt6i_pmtu && rt->rt6i_pmtu > arg->mtu) + rt->rt6i_pmtu = arg->mtu; + } else if (dst_mtu(&rt->dst) >= arg->mtu || + (dst_mtu(&rt->dst) < arg->mtu && + dst_mtu(&rt->dst) == idev->cnf.mtu6)) { + dst_metric_set(&rt->dst, RTAX_MTU, arg->mtu); + } } return 0; } @@ -2434,6 +2662,9 @@ static int rtm_to_fib6_config(struct sk_buff *skb, struct nlmsghdr *nlh, if (rtm->rtm_type == RTN_LOCAL) cfg->fc_flags |= RTF_LOCAL; + if (rtm->rtm_flags & RTM_F_CLONED) + cfg->fc_flags |= RTF_CACHE; + cfg->fc_nlinfo.portid = NETLINK_CB(skb).portid; cfg->fc_nlinfo.nlh = nlh; cfg->fc_nlinfo.nl_net = sock_net(skb->sk); @@ -2608,6 +2839,7 @@ static int rt6_fill_node(struct net *net, int iif, int type, u32 portid, u32 seq, int prefix, int nowait, unsigned int flags) { + u32 metrics[RTAX_MAX]; struct rtmsg *rtm; struct nlmsghdr *nlh; long expires; @@ -2721,7 +2953,10 @@ static int rt6_fill_node(struct net *net, goto nla_put_failure; } - if (rtnetlink_put_metrics(skb, dst_metrics_ptr(&rt->dst)) < 0) + memcpy(metrics, dst_metrics_ptr(&rt->dst), sizeof(metrics)); + if (rt->rt6i_pmtu) + metrics[RTAX_MTU - 1] = rt->rt6i_pmtu; + if (rtnetlink_put_metrics(skb, metrics) < 0) goto nla_put_failure; if (rt->rt6i_flags & RTF_GATEWAY) { @@ -3216,6 +3451,7 @@ static struct notifier_block ip6_route_dev_notifier = { int __init ip6_route_init(void) { int ret; + int cpu; ret = -ENOMEM; ip6_dst_ops_template.kmem_cachep = @@ -3275,6 +3511,13 @@ int __init ip6_route_init(void) if (ret) goto out_register_late_subsys; + for_each_possible_cpu(cpu) { + struct uncached_list *ul = per_cpu_ptr(&rt6_uncached_list, cpu); + + INIT_LIST_HEAD(&ul->head); + spin_lock_init(&ul->lock); + } + out: return ret; diff --git a/net/ipv6/syncookies.c b/net/ipv6/syncookies.c index 21bc2eb53..0909f4e0d 100644 --- a/net/ipv6/syncookies.c +++ b/net/ipv6/syncookies.c @@ -41,23 +41,6 @@ static __u16 const msstab[] = { 9000 - 60, }; -static inline struct sock *get_cookie_sock(struct sock *sk, struct sk_buff *skb, - struct request_sock *req, - struct dst_entry *dst) -{ - struct inet_connection_sock *icsk = inet_csk(sk); - struct sock *child; - - child = icsk->icsk_af_ops->syn_recv_sock(sk, skb, req, dst); - if (child) { - atomic_set(&req->rsk_refcnt, 1); - inet_csk_reqsk_queue_add(sk, req, child); - } else { - reqsk_free(req); - } - return child; -} - static DEFINE_PER_CPU(__u32 [16 + 5 + SHA_WORKSPACE_WORDS], ipv6_cookie_scratch); @@ -264,7 +247,7 @@ struct sock *cookie_v6_check(struct sock *sk, struct sk_buff *skb) ireq->rcv_wscale = rcv_wscale; ireq->ecn_ok = cookie_ecn_ok(&tcp_opt, sock_net(sk), dst); - ret = get_cookie_sock(sk, skb, req, dst); + ret = tcp_get_cookie_sock(sk, skb, req, dst); out: return ret; out_free: diff --git a/net/ipv6/sysctl_net_ipv6.c b/net/ipv6/sysctl_net_ipv6.c index abcc79f64..4e705add4 100644 --- a/net/ipv6/sysctl_net_ipv6.c +++ b/net/ipv6/sysctl_net_ipv6.c @@ -68,6 +68,13 @@ static struct ctl_table ipv6_table_template[] = { .mode = 0644, .proc_handler = proc_dointvec_jiffies, }, + { + .procname = "flowlabel_state_ranges", + .data = &init_net.ipv6.sysctl.flowlabel_state_ranges, + .maxlen = sizeof(int), + .mode = 0644, + .proc_handler = proc_dointvec + }, { } }; @@ -109,6 +116,7 @@ static int __net_init ipv6_sysctl_net_init(struct net *net) ipv6_table[4].data = &net->ipv6.sysctl.fwmark_reflect; ipv6_table[5].data = &net->ipv6.sysctl.idgen_retries; ipv6_table[6].data = &net->ipv6.sysctl.idgen_delay; + ipv6_table[7].data = &net->ipv6.sysctl.flowlabel_state_ranges; ipv6_route_table = ipv6_route_sysctl_init(net); if (!ipv6_route_table) diff --git a/net/ipv6/tcp_ipv6.c b/net/ipv6/tcp_ipv6.c index 5193d0953..a6f28765d 100644 --- a/net/ipv6/tcp_ipv6.c +++ b/net/ipv6/tcp_ipv6.c @@ -100,8 +100,7 @@ static void inet6_sk_rx_dst_set(struct sock *sk, const struct sk_buff *skb) dst_hold(dst); sk->sk_rx_dst = dst; inet_sk(sk)->rx_dst_ifindex = skb->skb_iif; - if (rt->rt6i_node) - inet6_sk(sk)->rx_dst_cookie = rt->rt6i_node->fn_sernum; + inet6_sk(sk)->rx_dst_cookie = rt6_get_cookie(rt); } } @@ -122,7 +121,6 @@ static int tcp_v6_connect(struct sock *sk, struct sockaddr *uaddr, struct ipv6_pinfo *np = inet6_sk(sk); struct tcp_sock *tp = tcp_sk(sk); struct in6_addr *saddr = NULL, *final_p, final; - struct rt6_info *rt; struct flowi6 fl6; struct dst_entry *dst; int addr_type; @@ -260,10 +258,9 @@ static int tcp_v6_connect(struct sock *sk, struct sockaddr *uaddr, sk->sk_gso_type = SKB_GSO_TCPV6; __ip6_dst_store(sk, dst, NULL, NULL); - rt = (struct rt6_info *) dst; if (tcp_death_row.sysctl_tw_recycle && !tp->rx_opt.ts_recent_stamp && - ipv6_addr_equal(&rt->rt6i_dst.addr, &sk->sk_v6_daddr)) + ipv6_addr_equal(&fl6.daddr, &sk->sk_v6_daddr)) tcp_fetch_timewait_stamp(sk, dst); icsk->icsk_ext_hdr_len = 0; @@ -962,7 +959,7 @@ static struct sock *tcp_v6_hnd_req(struct sock *sk, struct sk_buff *skb) &ipv6_hdr(skb)->daddr, tcp_v6_iif(skb)); if (req) { nsk = tcp_check_req(sk, skb, req, false); - if (!nsk) + if (!nsk || nsk == sk) reqsk_put(req); return nsk; } @@ -1268,7 +1265,7 @@ static int tcp_v6_do_rcv(struct sock *sk, struct sk_buff *skb) return 0; } - if (skb->len < tcp_hdrlen(skb) || tcp_checksum_complete(skb)) + if (tcp_checksum_complete(skb)) goto csum_err; #ifdef CONFIG_TCP_STEALTH @@ -1445,6 +1442,7 @@ process: skb->dev = NULL; bh_lock_sock_nested(sk); + tcp_sk(sk)->segs_in += max_t(u16, 1, skb_shinfo(skb)->gso_segs); ret = 0; if (!sock_owned_by_user(sk)) { if (!tcp_prequeue(sk, skb)) @@ -1466,7 +1464,7 @@ no_tcp_socket: tcp_v6_fill_cb(skb, hdr, th); - if (skb->len < (th->doff<<2) || tcp_checksum_complete(skb)) { + if (tcp_checksum_complete(skb)) { csum_error: TCP_INC_STATS_BH(net, TCP_MIB_CSUMERRORS); bad_packet: @@ -1491,10 +1489,6 @@ do_time_wait: tcp_v6_fill_cb(skb, hdr, th); - if (skb->len < (th->doff<<2)) { - inet_twsk_put(inet_twsk(sk)); - goto bad_packet; - } if (tcp_checksum_complete(skb)) { inet_twsk_put(inet_twsk(sk)); goto csum_error; diff --git a/net/ipv6/xfrm6_policy.c b/net/ipv6/xfrm6_policy.c index f337a908a..ed0583c1b 100644 --- a/net/ipv6/xfrm6_policy.c +++ b/net/ipv6/xfrm6_policy.c @@ -71,20 +71,12 @@ static int xfrm6_get_tos(const struct flowi *fl) return 0; } -static void xfrm6_init_dst(struct net *net, struct xfrm_dst *xdst) -{ - struct rt6_info *rt = (struct rt6_info *)xdst; - - rt6_init_peer(rt, net->ipv6.peers); -} - static int xfrm6_init_path(struct xfrm_dst *path, struct dst_entry *dst, int nfheader_len) { if (dst->ops->family == AF_INET6) { struct rt6_info *rt = (struct rt6_info *)dst; - if (rt->rt6i_node) - path->path_cookie = rt->rt6i_node->fn_sernum; + path->path_cookie = rt6_get_cookie(rt); } path->u.rt6.rt6i_nfheader_len = nfheader_len; @@ -106,16 +98,13 @@ static int xfrm6_fill_dst(struct xfrm_dst *xdst, struct net_device *dev, return -ENODEV; } - rt6_transfer_peer(&xdst->u.rt6, rt); - /* Sheit... I remember I did this right. Apparently, * it was magically lost, so this code needs audit */ xdst->u.rt6.rt6i_flags = rt->rt6i_flags & (RTF_ANYCAST | RTF_LOCAL); xdst->u.rt6.rt6i_metric = rt->rt6i_metric; xdst->u.rt6.rt6i_node = rt->rt6i_node; - if (rt->rt6i_node) - xdst->route_cookie = rt->rt6i_node->fn_sernum; + xdst->route_cookie = rt6_get_cookie(rt); xdst->u.rt6.rt6i_gateway = rt->rt6i_gateway; xdst->u.rt6.rt6i_dst = rt->rt6i_dst; xdst->u.rt6.rt6i_src = rt->rt6i_src; @@ -255,10 +244,6 @@ static void xfrm6_dst_destroy(struct dst_entry *dst) if (likely(xdst->u.rt6.rt6i_idev)) in6_dev_put(xdst->u.rt6.rt6i_idev); dst_destroy_metrics_generic(dst); - if (rt6_has_peer(&xdst->u.rt6)) { - struct inet_peer *peer = rt6_peer_ptr(&xdst->u.rt6); - inet_putpeer(peer); - } xfrm_dst_destroy(xdst); } @@ -308,7 +293,6 @@ static struct xfrm_policy_afinfo xfrm6_policy_afinfo = { .get_saddr = xfrm6_get_saddr, .decode_session = _decode_session6, .get_tos = xfrm6_get_tos, - .init_dst = xfrm6_init_dst, .init_path = xfrm6_init_path, .fill_dst = xfrm6_fill_dst, .blackhole_route = ip6_blackhole_route, |