diff options
Diffstat (limited to 'net/ipv6')
35 files changed, 808 insertions, 557 deletions
diff --git a/net/ipv6/Kconfig b/net/ipv6/Kconfig index 40c897515..11e875ffd 100644 --- a/net/ipv6/Kconfig +++ b/net/ipv6/Kconfig @@ -207,6 +207,7 @@ config IPV6_NDISC_NODETYPE config IPV6_TUNNEL tristate "IPv6: IP-in-IPv6 tunnel (RFC2473)" select INET6_TUNNEL + select DST_CACHE ---help--- Support for IPv6-in-IPv6 and IPv4-in-IPv6 tunnels described in RFC 2473. diff --git a/net/ipv6/addrconf.c b/net/ipv6/addrconf.c index bdd7eac43..8ec4b3089 100644 --- a/net/ipv6/addrconf.c +++ b/net/ipv6/addrconf.c @@ -216,6 +216,7 @@ static struct ipv6_devconf ipv6_devconf __read_mostly = { }, .use_oif_addrs_only = 0, .ignore_routes_with_linkdown = 0, + .keep_addr_on_down = 0, }; static struct ipv6_devconf ipv6_devconf_dflt __read_mostly = { @@ -260,6 +261,7 @@ static struct ipv6_devconf ipv6_devconf_dflt __read_mostly = { }, .use_oif_addrs_only = 0, .ignore_routes_with_linkdown = 0, + .keep_addr_on_down = 0, }; /* Check if a valid qdisc is available */ @@ -471,18 +473,21 @@ static int inet6_netconf_msgsize_devconf(int type) { int size = NLMSG_ALIGN(sizeof(struct netconfmsg)) + nla_total_size(4); /* NETCONFA_IFINDEX */ + bool all = false; - /* type -1 is used for ALL */ - if (type == -1 || type == NETCONFA_FORWARDING) + if (type == NETCONFA_ALL) + all = true; + + if (all || type == NETCONFA_FORWARDING) size += nla_total_size(4); #ifdef CONFIG_IPV6_MROUTE - if (type == -1 || type == NETCONFA_MC_FORWARDING) + if (all || type == NETCONFA_MC_FORWARDING) size += nla_total_size(4); #endif - if (type == -1 || type == NETCONFA_PROXY_NEIGH) + if (all || type == NETCONFA_PROXY_NEIGH) size += nla_total_size(4); - if (type == -1 || type == NETCONFA_IGNORE_ROUTES_WITH_LINKDOWN) + if (all || type == NETCONFA_IGNORE_ROUTES_WITH_LINKDOWN) size += nla_total_size(4); return size; @@ -495,33 +500,36 @@ static int inet6_netconf_fill_devconf(struct sk_buff *skb, int ifindex, { struct nlmsghdr *nlh; struct netconfmsg *ncm; + bool all = false; nlh = nlmsg_put(skb, portid, seq, event, sizeof(struct netconfmsg), flags); if (!nlh) return -EMSGSIZE; + if (type == NETCONFA_ALL) + all = true; + ncm = nlmsg_data(nlh); ncm->ncm_family = AF_INET6; if (nla_put_s32(skb, NETCONFA_IFINDEX, ifindex) < 0) goto nla_put_failure; - /* type -1 is used for ALL */ - if ((type == -1 || type == NETCONFA_FORWARDING) && + if ((all || type == NETCONFA_FORWARDING) && nla_put_s32(skb, NETCONFA_FORWARDING, devconf->forwarding) < 0) goto nla_put_failure; #ifdef CONFIG_IPV6_MROUTE - if ((type == -1 || type == NETCONFA_MC_FORWARDING) && + if ((all || type == NETCONFA_MC_FORWARDING) && nla_put_s32(skb, NETCONFA_MC_FORWARDING, devconf->mc_forwarding) < 0) goto nla_put_failure; #endif - if ((type == -1 || type == NETCONFA_PROXY_NEIGH) && + if ((all || type == NETCONFA_PROXY_NEIGH) && nla_put_s32(skb, NETCONFA_PROXY_NEIGH, devconf->proxy_ndp) < 0) goto nla_put_failure; - if ((type == -1 || type == NETCONFA_IGNORE_ROUTES_WITH_LINKDOWN) && + if ((all || type == NETCONFA_IGNORE_ROUTES_WITH_LINKDOWN) && nla_put_s32(skb, NETCONFA_IGNORE_ROUTES_WITH_LINKDOWN, devconf->ignore_routes_with_linkdown) < 0) goto nla_put_failure; @@ -607,14 +615,14 @@ static int inet6_netconf_get_devconf(struct sk_buff *in_skb, } err = -ENOBUFS; - skb = nlmsg_new(inet6_netconf_msgsize_devconf(-1), GFP_ATOMIC); + skb = nlmsg_new(inet6_netconf_msgsize_devconf(NETCONFA_ALL), GFP_ATOMIC); if (!skb) goto errout; err = inet6_netconf_fill_devconf(skb, ifindex, devconf, NETLINK_CB(in_skb).portid, nlh->nlmsg_seq, RTM_NEWNETCONF, 0, - -1); + NETCONFA_ALL); if (err < 0) { /* -EMSGSIZE implies BUG in inet6_netconf_msgsize_devconf() */ WARN_ON(err == -EMSGSIZE); @@ -658,7 +666,7 @@ static int inet6_netconf_dump_devconf(struct sk_buff *skb, cb->nlh->nlmsg_seq, RTM_NEWNETCONF, NLM_F_MULTI, - -1) < 0) { + NETCONFA_ALL) < 0) { rcu_read_unlock(); goto done; } @@ -674,7 +682,7 @@ cont: NETLINK_CB(cb->skb).portid, cb->nlh->nlmsg_seq, RTM_NEWNETCONF, NLM_F_MULTI, - -1) < 0) + NETCONFA_ALL) < 0) goto done; else h++; @@ -685,7 +693,7 @@ cont: NETLINK_CB(cb->skb).portid, cb->nlh->nlmsg_seq, RTM_NEWNETCONF, NLM_F_MULTI, - -1) < 0) + NETCONFA_ALL) < 0) goto done; else h++; @@ -3168,10 +3176,60 @@ static void addrconf_gre_config(struct net_device *dev) } #endif +static int fixup_permanent_addr(struct inet6_dev *idev, + struct inet6_ifaddr *ifp) +{ + if (!ifp->rt) { + struct rt6_info *rt; + + rt = addrconf_dst_alloc(idev, &ifp->addr, false); + if (unlikely(IS_ERR(rt))) + return PTR_ERR(rt); + + ifp->rt = rt; + } + + if (!(ifp->flags & IFA_F_NOPREFIXROUTE)) { + addrconf_prefix_route(&ifp->addr, ifp->prefix_len, + idev->dev, 0, 0); + } + + addrconf_dad_start(ifp); + + return 0; +} + +static void addrconf_permanent_addr(struct net_device *dev) +{ + struct inet6_ifaddr *ifp, *tmp; + struct inet6_dev *idev; + + idev = __in6_dev_get(dev); + if (!idev) + return; + + write_lock_bh(&idev->lock); + + list_for_each_entry_safe(ifp, tmp, &idev->addr_list, if_list) { + if ((ifp->flags & IFA_F_PERMANENT) && + fixup_permanent_addr(idev, ifp) < 0) { + write_unlock_bh(&idev->lock); + ipv6_del_addr(ifp); + write_lock_bh(&idev->lock); + + net_info_ratelimited("%s: Failed to add prefix route for address %pI6c; dropping\n", + idev->dev->name, &ifp->addr); + } + } + + write_unlock_bh(&idev->lock); +} + static int addrconf_notify(struct notifier_block *this, unsigned long event, void *ptr) { struct net_device *dev = netdev_notifier_info_to_dev(ptr); + struct netdev_notifier_changeupper_info *info; struct inet6_dev *idev = __in6_dev_get(dev); int run_pending = 0; int err; @@ -3220,6 +3278,9 @@ static int addrconf_notify(struct notifier_block *this, unsigned long event, break; if (event == NETDEV_UP) { + /* restore routes for permanent addresses */ + addrconf_permanent_addr(dev); + if (!addrconf_qdisc_ok(dev)) { /* device is not ready yet. */ pr_info("ADDRCONF(NETDEV_UP): %s: link is not ready\n", @@ -3327,6 +3388,15 @@ static int addrconf_notify(struct notifier_block *this, unsigned long event, if (idev) addrconf_type_change(dev, event); break; + + case NETDEV_CHANGEUPPER: + info = ptr; + + /* flush all routes if dev is linked to or unlinked from + * an L3 master device (e.g., VRF) + */ + if (info->upper_dev && netif_is_l3_master(info->upper_dev)) + addrconf_ifdown(dev, 0); } return NOTIFY_OK; @@ -3352,11 +3422,20 @@ static void addrconf_type_change(struct net_device *dev, unsigned long event) ipv6_mc_unmap(idev); } +static bool addr_is_local(const struct in6_addr *addr) +{ + return ipv6_addr_type(addr) & + (IPV6_ADDR_LINKLOCAL | IPV6_ADDR_LOOPBACK); +} + static int addrconf_ifdown(struct net_device *dev, int how) { struct net *net = dev_net(dev); struct inet6_dev *idev; - struct inet6_ifaddr *ifa; + struct inet6_ifaddr *ifa, *tmp; + struct list_head del_list; + int _keep_addr; + bool keep_addr; int state, i; ASSERT_RTNL(); @@ -3383,6 +3462,16 @@ static int addrconf_ifdown(struct net_device *dev, int how) } + /* aggregate the system setting and interface setting */ + _keep_addr = net->ipv6.devconf_all->keep_addr_on_down; + if (!_keep_addr) + _keep_addr = idev->cnf.keep_addr_on_down; + + /* combine the user config with event to determine if permanent + * addresses are to be removed from address hash table + */ + keep_addr = !(how || _keep_addr <= 0); + /* Step 2: clear hash table */ for (i = 0; i < IN6_ADDR_HSIZE; i++) { struct hlist_head *h = &inet6_addr_lst[i]; @@ -3391,9 +3480,16 @@ static int addrconf_ifdown(struct net_device *dev, int how) restart: hlist_for_each_entry_rcu(ifa, h, addr_lst) { if (ifa->idev == idev) { - hlist_del_init_rcu(&ifa->addr_lst); addrconf_del_dad_work(ifa); - goto restart; + /* combined flag + permanent flag decide if + * address is retained on a down event + */ + if (!keep_addr || + !(ifa->flags & IFA_F_PERMANENT) || + addr_is_local(&ifa->addr)) { + hlist_del_init_rcu(&ifa->addr_lst); + goto restart; + } } } spin_unlock_bh(&addrconf_hash_lock); @@ -3427,31 +3523,62 @@ restart: write_lock_bh(&idev->lock); } - while (!list_empty(&idev->addr_list)) { - ifa = list_first_entry(&idev->addr_list, - struct inet6_ifaddr, if_list); - addrconf_del_dad_work(ifa); + /* re-combine the user config with event to determine if permanent + * addresses are to be removed from the interface list + */ + keep_addr = (!how && _keep_addr > 0); - list_del(&ifa->if_list); + INIT_LIST_HEAD(&del_list); + list_for_each_entry_safe(ifa, tmp, &idev->addr_list, if_list) { + struct rt6_info *rt = NULL; - write_unlock_bh(&idev->lock); + addrconf_del_dad_work(ifa); + write_unlock_bh(&idev->lock); spin_lock_bh(&ifa->lock); - state = ifa->state; - ifa->state = INET6_IFADDR_STATE_DEAD; + + if (keep_addr && (ifa->flags & IFA_F_PERMANENT) && + !addr_is_local(&ifa->addr)) { + /* set state to skip the notifier below */ + state = INET6_IFADDR_STATE_DEAD; + ifa->state = 0; + if (!(ifa->flags & IFA_F_NODAD)) + ifa->flags |= IFA_F_TENTATIVE; + + rt = ifa->rt; + ifa->rt = NULL; + } else { + state = ifa->state; + ifa->state = INET6_IFADDR_STATE_DEAD; + + list_del(&ifa->if_list); + list_add(&ifa->if_list, &del_list); + } + spin_unlock_bh(&ifa->lock); + if (rt) + ip6_del_rt(rt); + if (state != INET6_IFADDR_STATE_DEAD) { __ipv6_ifa_notify(RTM_DELADDR, ifa); inet6addr_notifier_call_chain(NETDEV_DOWN, ifa); } - in6_ifa_put(ifa); write_lock_bh(&idev->lock); } write_unlock_bh(&idev->lock); + /* now clean up addresses to be removed */ + while (!list_empty(&del_list)) { + ifa = list_first_entry(&del_list, + struct inet6_ifaddr, if_list); + list_del(&ifa->if_list); + + in6_ifa_put(ifa); + } + /* Step 5: Discard anycast and multicast list */ if (how) { ipv6_ac_destroy_dev(idev); @@ -4714,6 +4841,9 @@ static inline void ipv6_store_devconf(struct ipv6_devconf *cnf, array[DEVCONF_IGNORE_ROUTES_WITH_LINKDOWN] = cnf->ignore_routes_with_linkdown; /* we omit DEVCONF_STABLE_SECRET for now */ array[DEVCONF_USE_OIF_ADDRS_ONLY] = cnf->use_oif_addrs_only; + array[DEVCONF_DROP_UNICAST_IN_L2_MULTICAST] = cnf->drop_unicast_in_l2_multicast; + array[DEVCONF_DROP_UNSOLICITED_NA] = cnf->drop_unsolicited_na; + array[DEVCONF_KEEP_ADDR_ON_DOWN] = cnf->keep_addr_on_down; } static inline size_t inet6_ifla6_size(void) @@ -5195,10 +5325,10 @@ static void __ipv6_ifa_notify(int event, struct inet6_ifaddr *ifp) if (rt) ip6_del_rt(rt); } - dst_hold(&ifp->rt->dst); - - ip6_del_rt(ifp->rt); - + if (ifp->rt) { + dst_hold(&ifp->rt->dst); + ip6_del_rt(ifp->rt); + } rt_genid_bump_ipv6(net); break; } @@ -5788,6 +5918,28 @@ static struct addrconf_sysctl_table .proc_handler = addrconf_sysctl_ignore_routes_with_linkdown, }, { + .procname = "drop_unicast_in_l2_multicast", + .data = &ipv6_devconf.drop_unicast_in_l2_multicast, + .maxlen = sizeof(int), + .mode = 0644, + .proc_handler = proc_dointvec, + }, + { + .procname = "drop_unsolicited_na", + .data = &ipv6_devconf.drop_unsolicited_na, + .maxlen = sizeof(int), + .mode = 0644, + .proc_handler = proc_dointvec, + }, + { + .procname = "keep_addr_on_down", + .data = &ipv6_devconf.keep_addr_on_down, + .maxlen = sizeof(int), + .mode = 0644, + .proc_handler = proc_dointvec, + + }, + { /* sentinel */ } }, diff --git a/net/ipv6/af_inet6.c b/net/ipv6/af_inet6.c index 9f5137cd6..b11c37cfd 100644 --- a/net/ipv6/af_inet6.c +++ b/net/ipv6/af_inet6.c @@ -235,7 +235,11 @@ lookup_protocol: * creation time automatically shares. */ inet->inet_sport = htons(inet->inet_num); - sk->sk_prot->hash(sk); + err = sk->sk_prot->hash(sk); + if (err) { + sk_common_release(sk); + goto out; + } } if (sk->sk_prot->init) { err = sk->sk_prot->init(sk); diff --git a/net/ipv6/datagram.c b/net/ipv6/datagram.c index 428162155..9dd3882fe 100644 --- a/net/ipv6/datagram.c +++ b/net/ipv6/datagram.c @@ -40,18 +40,114 @@ static bool ipv6_mapped_addr_any(const struct in6_addr *a) return ipv6_addr_v4mapped(a) && (a->s6_addr32[3] == 0); } +static void ip6_datagram_flow_key_init(struct flowi6 *fl6, struct sock *sk) +{ + struct inet_sock *inet = inet_sk(sk); + struct ipv6_pinfo *np = inet6_sk(sk); + + memset(fl6, 0, sizeof(*fl6)); + fl6->flowi6_proto = sk->sk_protocol; + fl6->daddr = sk->sk_v6_daddr; + fl6->saddr = np->saddr; + fl6->flowi6_oif = sk->sk_bound_dev_if; + fl6->flowi6_mark = sk->sk_mark; + fl6->fl6_dport = inet->inet_dport; + fl6->fl6_sport = inet->inet_sport; + fl6->flowlabel = np->flow_label; + + if (!fl6->flowi6_oif) + fl6->flowi6_oif = np->sticky_pktinfo.ipi6_ifindex; + + if (!fl6->flowi6_oif && ipv6_addr_is_multicast(&fl6->daddr)) + fl6->flowi6_oif = np->mcast_oif; + + security_sk_classify_flow(sk, flowi6_to_flowi(fl6)); +} + +int ip6_datagram_dst_update(struct sock *sk, bool fix_sk_saddr) +{ + struct ip6_flowlabel *flowlabel = NULL; + struct in6_addr *final_p, final; + struct ipv6_txoptions *opt; + struct dst_entry *dst; + struct inet_sock *inet = inet_sk(sk); + struct ipv6_pinfo *np = inet6_sk(sk); + struct flowi6 fl6; + int err = 0; + + if (np->sndflow && (np->flow_label & IPV6_FLOWLABEL_MASK)) { + flowlabel = fl6_sock_lookup(sk, np->flow_label); + if (!flowlabel) + return -EINVAL; + } + ip6_datagram_flow_key_init(&fl6, sk); + + rcu_read_lock(); + opt = flowlabel ? flowlabel->opt : rcu_dereference(np->opt); + final_p = fl6_update_dst(&fl6, opt, &final); + rcu_read_unlock(); + + dst = ip6_dst_lookup_flow(sk, &fl6, final_p); + if (IS_ERR(dst)) { + err = PTR_ERR(dst); + goto out; + } + + if (fix_sk_saddr) { + if (ipv6_addr_any(&np->saddr)) + np->saddr = fl6.saddr; + + if (ipv6_addr_any(&sk->sk_v6_rcv_saddr)) { + sk->sk_v6_rcv_saddr = fl6.saddr; + inet->inet_rcv_saddr = LOOPBACK4_IPV6; + if (sk->sk_prot->rehash) + sk->sk_prot->rehash(sk); + } + } + + ip6_dst_store(sk, dst, + ipv6_addr_equal(&fl6.daddr, &sk->sk_v6_daddr) ? + &sk->sk_v6_daddr : NULL, +#ifdef CONFIG_IPV6_SUBTREES + ipv6_addr_equal(&fl6.saddr, &np->saddr) ? + &np->saddr : +#endif + NULL); + +out: + fl6_sock_release(flowlabel); + return err; +} + +void ip6_datagram_release_cb(struct sock *sk) +{ + struct dst_entry *dst; + + if (ipv6_addr_v4mapped(&sk->sk_v6_daddr)) + return; + + rcu_read_lock(); + dst = __sk_dst_get(sk); + if (!dst || !dst->obsolete || + dst->ops->check(dst, inet6_sk(sk)->dst_cookie)) { + rcu_read_unlock(); + return; + } + rcu_read_unlock(); + + ip6_datagram_dst_update(sk, false); +} +EXPORT_SYMBOL_GPL(ip6_datagram_release_cb); + static int __ip6_datagram_connect(struct sock *sk, struct sockaddr *uaddr, int addr_len) { struct sockaddr_in6 *usin = (struct sockaddr_in6 *) uaddr; struct inet_sock *inet = inet_sk(sk); struct ipv6_pinfo *np = inet6_sk(sk); - struct in6_addr *daddr, *final_p, final; - struct dst_entry *dst; - struct flowi6 fl6; - struct ip6_flowlabel *flowlabel = NULL; - struct ipv6_txoptions *opt; + struct in6_addr *daddr; int addr_type; int err; + __be32 fl6_flowlabel = 0; if (usin->sin6_family == AF_INET) { if (__ipv6_only_sock(sk)) @@ -66,15 +162,8 @@ static int __ip6_datagram_connect(struct sock *sk, struct sockaddr *uaddr, int a if (usin->sin6_family != AF_INET6) return -EAFNOSUPPORT; - memset(&fl6, 0, sizeof(fl6)); - if (np->sndflow) { - fl6.flowlabel = usin->sin6_flowinfo&IPV6_FLOWINFO_MASK; - if (fl6.flowlabel&IPV6_FLOWLABEL_MASK) { - flowlabel = fl6_sock_lookup(sk, fl6.flowlabel); - if (!flowlabel) - return -EINVAL; - } - } + if (np->sndflow) + fl6_flowlabel = usin->sin6_flowinfo & IPV6_FLOWINFO_MASK; addr_type = ipv6_addr_type(&usin->sin6_addr); @@ -145,7 +234,7 @@ ipv4_connected: } sk->sk_v6_daddr = *daddr; - np->flow_label = fl6.flowlabel; + np->flow_label = fl6_flowlabel; inet->inet_dport = usin->sin6_port; @@ -154,59 +243,13 @@ ipv4_connected: * destination cache for it. */ - fl6.flowi6_proto = sk->sk_protocol; - fl6.daddr = sk->sk_v6_daddr; - fl6.saddr = np->saddr; - fl6.flowi6_oif = sk->sk_bound_dev_if; - fl6.flowi6_mark = sk->sk_mark; - fl6.fl6_dport = inet->inet_dport; - fl6.fl6_sport = inet->inet_sport; - - if (!fl6.flowi6_oif) - fl6.flowi6_oif = np->sticky_pktinfo.ipi6_ifindex; - - if (!fl6.flowi6_oif && (addr_type&IPV6_ADDR_MULTICAST)) - fl6.flowi6_oif = np->mcast_oif; - - security_sk_classify_flow(sk, flowi6_to_flowi(&fl6)); - - rcu_read_lock(); - opt = flowlabel ? flowlabel->opt : rcu_dereference(np->opt); - final_p = fl6_update_dst(&fl6, opt, &final); - rcu_read_unlock(); - - dst = ip6_dst_lookup_flow(sk, &fl6, final_p); - err = 0; - if (IS_ERR(dst)) { - err = PTR_ERR(dst); + err = ip6_datagram_dst_update(sk, true); + if (err) goto out; - } - - /* source address lookup done in ip6_dst_lookup */ - - if (ipv6_addr_any(&np->saddr)) - np->saddr = fl6.saddr; - - if (ipv6_addr_any(&sk->sk_v6_rcv_saddr)) { - sk->sk_v6_rcv_saddr = fl6.saddr; - inet->inet_rcv_saddr = LOOPBACK4_IPV6; - if (sk->sk_prot->rehash) - sk->sk_prot->rehash(sk); - } - - ip6_dst_store(sk, dst, - ipv6_addr_equal(&fl6.daddr, &sk->sk_v6_daddr) ? - &sk->sk_v6_daddr : NULL, -#ifdef CONFIG_IPV6_SUBTREES - ipv6_addr_equal(&fl6.saddr, &np->saddr) ? - &np->saddr : -#endif - NULL); sk->sk_state = TCP_ESTABLISHED; sk_set_txhash(sk); out: - fl6_sock_release(flowlabel); return err; } diff --git a/net/ipv6/icmp.c b/net/ipv6/icmp.c index 0a37ddc7a..0013cacf7 100644 --- a/net/ipv6/icmp.c +++ b/net/ipv6/icmp.c @@ -445,6 +445,8 @@ static void icmp6_send(struct sk_buff *skb, u8 type, u8 code, __u32 info) if (__ipv6_addr_needs_scope_id(addr_type)) iif = skb->dev->ifindex; + else + iif = l3mdev_master_ifindex(skb->dev); /* * Must not send error if the source does not uniquely @@ -499,9 +501,6 @@ static void icmp6_send(struct sk_buff *skb, u8 type, u8 code, __u32 info) else if (!fl6.flowi6_oif) fl6.flowi6_oif = np->ucast_oif; - if (!fl6.flowi6_oif) - fl6.flowi6_oif = l3mdev_master_ifindex(skb->dev); - dst = icmpv6_route_lookup(net, skb, sk, &fl6); if (IS_ERR(dst)) goto out; diff --git a/net/ipv6/ila/ila_common.c b/net/ipv6/ila/ila_common.c index 32dc9aab7..30613050e 100644 --- a/net/ipv6/ila/ila_common.c +++ b/net/ipv6/ila/ila_common.c @@ -99,5 +99,6 @@ static void __exit ila_fini(void) module_init(ila_init); module_exit(ila_fini); +MODULE_ALIAS_RTNL_LWT(ILA); MODULE_AUTHOR("Tom Herbert <tom@herbertland.com>"); MODULE_LICENSE("GPL"); diff --git a/net/ipv6/ila/ila_lwt.c b/net/ipv6/ila/ila_lwt.c index 2ae3c4fd8..41f18de5d 100644 --- a/net/ipv6/ila/ila_lwt.c +++ b/net/ipv6/ila/ila_lwt.c @@ -120,8 +120,7 @@ nla_put_failure: static int ila_encap_nlsize(struct lwtunnel_state *lwtstate) { - /* No encapsulation overhead */ - return 0; + return nla_total_size(sizeof(u64)); /* ILA_ATTR_LOCATOR */ } static int ila_encap_cmp(struct lwtunnel_state *a, struct lwtunnel_state *b) diff --git a/net/ipv6/inet6_connection_sock.c b/net/ipv6/inet6_connection_sock.c index 36c3f0155..532c3ef28 100644 --- a/net/ipv6/inet6_connection_sock.c +++ b/net/ipv6/inet6_connection_sock.c @@ -26,6 +26,7 @@ #include <net/ip6_route.h> #include <net/sock.h> #include <net/inet6_connection_sock.h> +#include <net/sock_reuseport.h> int inet6_csk_bind_conflict(const struct sock *sk, const struct inet_bind_bucket *tb, bool relax) @@ -48,6 +49,7 @@ int inet6_csk_bind_conflict(const struct sock *sk, if ((!reuse || !sk2->sk_reuse || sk2->sk_state == TCP_LISTEN) && (!reuseport || !sk2->sk_reuseport || + rcu_access_pointer(sk->sk_reuseport_cb) || (sk2->sk_state != TCP_TIME_WAIT && !uid_eq(uid, sock_i_uid((struct sock *)sk2))))) { diff --git a/net/ipv6/inet6_hashtables.c b/net/ipv6/inet6_hashtables.c index 21ace5a2b..70f2628be 100644 --- a/net/ipv6/inet6_hashtables.c +++ b/net/ipv6/inet6_hashtables.c @@ -17,11 +17,13 @@ #include <linux/module.h> #include <linux/random.h> +#include <net/addrconf.h> #include <net/inet_connection_sock.h> #include <net/inet_hashtables.h> #include <net/inet6_hashtables.h> #include <net/secure_seq.h> #include <net/ip.h> +#include <net/sock_reuseport.h> u32 inet6_ehashfn(const struct net *net, const struct in6_addr *laddr, const u16 lport, @@ -121,7 +123,9 @@ static inline int compute_score(struct sock *sk, struct net *net, } struct sock *inet6_lookup_listener(struct net *net, - struct inet_hashinfo *hashinfo, const struct in6_addr *saddr, + struct inet_hashinfo *hashinfo, + struct sk_buff *skb, int doff, + const struct in6_addr *saddr, const __be16 sport, const struct in6_addr *daddr, const unsigned short hnum, const int dif) { @@ -129,6 +133,7 @@ struct sock *inet6_lookup_listener(struct net *net, const struct hlist_nulls_node *node; struct sock *result; int score, hiscore, matches = 0, reuseport = 0; + bool select_ok = true; u32 phash = 0; unsigned int hash = inet_lhashfn(net, hnum); struct inet_listen_hashbucket *ilb = &hashinfo->listening_hash[hash]; @@ -146,6 +151,15 @@ begin: if (reuseport) { phash = inet6_ehashfn(net, daddr, hnum, saddr, sport); + if (select_ok) { + struct sock *sk2; + sk2 = reuseport_select_sock(sk, phash, + skb, doff); + if (sk2) { + result = sk2; + goto found; + } + } matches = 1; } } else if (score == hiscore && reuseport) { @@ -163,11 +177,13 @@ begin: if (get_nulls_value(node) != hash + LISTENING_NULLS_BASE) goto begin; if (result) { +found: if (unlikely(!atomic_inc_not_zero(&result->sk_refcnt))) result = NULL; else if (unlikely(compute_score(result, net, hnum, daddr, dif) < hiscore)) { sock_put(result); + select_ok = false; goto begin; } } @@ -177,6 +193,7 @@ begin: EXPORT_SYMBOL_GPL(inet6_lookup_listener); struct sock *inet6_lookup(struct net *net, struct inet_hashinfo *hashinfo, + struct sk_buff *skb, int doff, const struct in6_addr *saddr, const __be16 sport, const struct in6_addr *daddr, const __be16 dport, const int dif) @@ -184,7 +201,8 @@ struct sock *inet6_lookup(struct net *net, struct inet_hashinfo *hashinfo, struct sock *sk; local_bh_disable(); - sk = __inet6_lookup(net, hashinfo, saddr, sport, daddr, ntohs(dport), dif); + sk = __inet6_lookup(net, hashinfo, skb, doff, saddr, sport, daddr, + ntohs(dport), dif); local_bh_enable(); return sk; @@ -274,3 +292,59 @@ int inet6_hash_connect(struct inet_timewait_death_row *death_row, __inet6_check_established); } EXPORT_SYMBOL_GPL(inet6_hash_connect); + +int inet6_hash(struct sock *sk) +{ + if (sk->sk_state != TCP_CLOSE) { + local_bh_disable(); + __inet_hash(sk, NULL, ipv6_rcv_saddr_equal); + local_bh_enable(); + } + + return 0; +} +EXPORT_SYMBOL_GPL(inet6_hash); + +/* match_wildcard == true: IPV6_ADDR_ANY equals to any IPv6 addresses if IPv6 + * only, and any IPv4 addresses if not IPv6 only + * match_wildcard == false: addresses must be exactly the same, i.e. + * IPV6_ADDR_ANY only equals to IPV6_ADDR_ANY, + * and 0.0.0.0 equals to 0.0.0.0 only + */ +int ipv6_rcv_saddr_equal(const struct sock *sk, const struct sock *sk2, + bool match_wildcard) +{ + const struct in6_addr *sk2_rcv_saddr6 = inet6_rcv_saddr(sk2); + int sk2_ipv6only = inet_v6_ipv6only(sk2); + int addr_type = ipv6_addr_type(&sk->sk_v6_rcv_saddr); + int addr_type2 = sk2_rcv_saddr6 ? ipv6_addr_type(sk2_rcv_saddr6) : IPV6_ADDR_MAPPED; + + /* if both are mapped, treat as IPv4 */ + if (addr_type == IPV6_ADDR_MAPPED && addr_type2 == IPV6_ADDR_MAPPED) { + if (!sk2_ipv6only) { + if (sk->sk_rcv_saddr == sk2->sk_rcv_saddr) + return 1; + if (!sk->sk_rcv_saddr || !sk2->sk_rcv_saddr) + return match_wildcard; + } + return 0; + } + + if (addr_type == IPV6_ADDR_ANY && addr_type2 == IPV6_ADDR_ANY) + return 1; + + if (addr_type2 == IPV6_ADDR_ANY && match_wildcard && + !(sk2_ipv6only && addr_type == IPV6_ADDR_MAPPED)) + return 1; + + if (addr_type == IPV6_ADDR_ANY && match_wildcard && + !(ipv6_only_sock(sk) && addr_type2 == IPV6_ADDR_MAPPED)) + return 1; + + if (sk2_rcv_saddr6 && + ipv6_addr_equal(&sk->sk_v6_rcv_saddr, sk2_rcv_saddr6)) + return 1; + + return 0; +} +EXPORT_SYMBOL_GPL(ipv6_rcv_saddr_equal); diff --git a/net/ipv6/ip6_checksum.c b/net/ipv6/ip6_checksum.c index 9a4d7322f..b2025bf3d 100644 --- a/net/ipv6/ip6_checksum.c +++ b/net/ipv6/ip6_checksum.c @@ -6,8 +6,7 @@ #ifndef _HAVE_ARCH_IPV6_CSUM __sum16 csum_ipv6_magic(const struct in6_addr *saddr, const struct in6_addr *daddr, - __u32 len, unsigned short proto, - __wsum csum) + __u32 len, __u8 proto, __wsum csum) { int carry; @@ -98,27 +97,16 @@ void udp6_set_csum(bool nocheck, struct sk_buff *skb, uh->check = 0; else if (skb_is_gso(skb)) uh->check = ~udp_v6_check(len, saddr, daddr, 0); - else if (skb_dst(skb) && skb_dst(skb)->dev && - (skb_dst(skb)->dev->features & NETIF_F_IPV6_CSUM)) { - - BUG_ON(skb->ip_summed == CHECKSUM_PARTIAL); - + else if (skb->ip_summed == CHECKSUM_PARTIAL) { + uh->check = 0; + uh->check = udp_v6_check(len, saddr, daddr, lco_csum(skb)); + if (uh->check == 0) + uh->check = CSUM_MANGLED_0; + } else { skb->ip_summed = CHECKSUM_PARTIAL; skb->csum_start = skb_transport_header(skb) - skb->head; skb->csum_offset = offsetof(struct udphdr, check); uh->check = ~udp_v6_check(len, saddr, daddr, 0); - } else { - __wsum csum; - - BUG_ON(skb->ip_summed == CHECKSUM_PARTIAL); - - uh->check = 0; - csum = skb_checksum(skb, 0, len, 0); - uh->check = udp_v6_check(len, saddr, daddr, csum); - if (uh->check == 0) - uh->check = CSUM_MANGLED_0; - - skb->ip_summed = CHECKSUM_UNNECESSARY; } } EXPORT_SYMBOL(udp6_set_csum); diff --git a/net/ipv6/ip6_fib.c b/net/ipv6/ip6_fib.c index 0c7e276c2..ea071fad6 100644 --- a/net/ipv6/ip6_fib.c +++ b/net/ipv6/ip6_fib.c @@ -55,8 +55,6 @@ struct fib6_cleaner { void *arg; }; -static DEFINE_RWLOCK(fib6_walker_lock); - #ifdef CONFIG_IPV6_SUBTREES #define FWS_INIT FWS_S #else @@ -66,7 +64,7 @@ static DEFINE_RWLOCK(fib6_walker_lock); static void fib6_prune_clones(struct net *net, struct fib6_node *fn); static struct rt6_info *fib6_find_prefix(struct net *net, struct fib6_node *fn); static struct fib6_node *fib6_repair_tree(struct net *net, struct fib6_node *fn); -static int fib6_walk(struct fib6_walker *w); +static int fib6_walk(struct net *net, struct fib6_walker *w); static int fib6_walk_continue(struct fib6_walker *w); /* @@ -78,21 +76,21 @@ static int fib6_walk_continue(struct fib6_walker *w); static void fib6_gc_timer_cb(unsigned long arg); -static LIST_HEAD(fib6_walkers); -#define FOR_WALKERS(w) list_for_each_entry(w, &fib6_walkers, lh) +#define FOR_WALKERS(net, w) \ + list_for_each_entry(w, &(net)->ipv6.fib6_walkers, lh) -static void fib6_walker_link(struct fib6_walker *w) +static void fib6_walker_link(struct net *net, struct fib6_walker *w) { - write_lock_bh(&fib6_walker_lock); - list_add(&w->lh, &fib6_walkers); - write_unlock_bh(&fib6_walker_lock); + write_lock_bh(&net->ipv6.fib6_walker_lock); + list_add(&w->lh, &net->ipv6.fib6_walkers); + write_unlock_bh(&net->ipv6.fib6_walker_lock); } -static void fib6_walker_unlink(struct fib6_walker *w) +static void fib6_walker_unlink(struct net *net, struct fib6_walker *w) { - write_lock_bh(&fib6_walker_lock); + write_lock_bh(&net->ipv6.fib6_walker_lock); list_del(&w->lh); - write_unlock_bh(&fib6_walker_lock); + write_unlock_bh(&net->ipv6.fib6_walker_lock); } static int fib6_new_sernum(struct net *net) @@ -325,12 +323,13 @@ static int fib6_dump_node(struct fib6_walker *w) static void fib6_dump_end(struct netlink_callback *cb) { + struct net *net = sock_net(cb->skb->sk); struct fib6_walker *w = (void *)cb->args[2]; if (w) { if (cb->args[4]) { cb->args[4] = 0; - fib6_walker_unlink(w); + fib6_walker_unlink(net, w); } cb->args[2] = 0; kfree(w); @@ -348,6 +347,7 @@ static int fib6_dump_done(struct netlink_callback *cb) static int fib6_dump_table(struct fib6_table *table, struct sk_buff *skb, struct netlink_callback *cb) { + struct net *net = sock_net(skb->sk); struct fib6_walker *w; int res; @@ -359,7 +359,7 @@ static int fib6_dump_table(struct fib6_table *table, struct sk_buff *skb, w->skip = 0; read_lock_bh(&table->tb6_lock); - res = fib6_walk(w); + res = fib6_walk(net, w); read_unlock_bh(&table->tb6_lock); if (res > 0) { cb->args[4] = 1; @@ -379,7 +379,7 @@ static int fib6_dump_table(struct fib6_table *table, struct sk_buff *skb, res = fib6_walk_continue(w); read_unlock_bh(&table->tb6_lock); if (res <= 0) { - fib6_walker_unlink(w); + fib6_walker_unlink(net, w); cb->args[4] = 0; } } @@ -1340,8 +1340,8 @@ static struct fib6_node *fib6_repair_tree(struct net *net, } #endif - read_lock(&fib6_walker_lock); - FOR_WALKERS(w) { + read_lock(&net->ipv6.fib6_walker_lock); + FOR_WALKERS(net, w) { if (!child) { if (w->root == fn) { w->root = w->node = NULL; @@ -1368,7 +1368,7 @@ static struct fib6_node *fib6_repair_tree(struct net *net, } } } - read_unlock(&fib6_walker_lock); + read_unlock(&net->ipv6.fib6_walker_lock); node_free(fn); if (pn->fn_flags & RTN_RTINFO || FIB6_SUBTREE(pn)) @@ -1411,8 +1411,8 @@ static void fib6_del_route(struct fib6_node *fn, struct rt6_info **rtp, } /* Adjust walkers */ - read_lock(&fib6_walker_lock); - FOR_WALKERS(w) { + read_lock(&net->ipv6.fib6_walker_lock); + FOR_WALKERS(net, w) { if (w->state == FWS_C && w->leaf == rt) { RT6_TRACE("walker %p adjusted by delroute\n", w); w->leaf = rt->dst.rt6_next; @@ -1420,7 +1420,7 @@ static void fib6_del_route(struct fib6_node *fn, struct rt6_info **rtp, w->state = FWS_U; } } - read_unlock(&fib6_walker_lock); + read_unlock(&net->ipv6.fib6_walker_lock); rt->dst.rt6_next = NULL; @@ -1588,17 +1588,17 @@ skip: } } -static int fib6_walk(struct fib6_walker *w) +static int fib6_walk(struct net *net, struct fib6_walker *w) { int res; w->state = FWS_INIT; w->node = w->root; - fib6_walker_link(w); + fib6_walker_link(net, w); res = fib6_walk_continue(w); if (res <= 0) - fib6_walker_unlink(w); + fib6_walker_unlink(net, w); return res; } @@ -1668,7 +1668,7 @@ static void fib6_clean_tree(struct net *net, struct fib6_node *root, c.arg = arg; c.net = net; - fib6_walk(&c.w); + fib6_walk(net, &c.w); } static void __fib6_clean_all(struct net *net, @@ -1725,14 +1725,15 @@ static void fib6_flush_trees(struct net *net) * Garbage collection */ -static struct fib6_gc_args +struct fib6_gc_args { int timeout; int more; -} gc_args; +}; static int fib6_age(struct rt6_info *rt, void *arg) { + struct fib6_gc_args *gc_args = arg; unsigned long now = jiffies; /* @@ -1748,10 +1749,10 @@ static int fib6_age(struct rt6_info *rt, void *arg) RT6_TRACE("expiring %p\n", rt); return -1; } - gc_args.more++; + gc_args->more++; } else if (rt->rt6i_flags & RTF_CACHE) { if (atomic_read(&rt->dst.__refcnt) == 0 && - time_after_eq(now, rt->dst.lastuse + gc_args.timeout)) { + time_after_eq(now, rt->dst.lastuse + gc_args->timeout)) { RT6_TRACE("aging clone %p\n", rt); return -1; } else if (rt->rt6i_flags & RTF_GATEWAY) { @@ -1769,21 +1770,20 @@ static int fib6_age(struct rt6_info *rt, void *arg) return -1; } } - gc_args.more++; + gc_args->more++; } return 0; } -static DEFINE_SPINLOCK(fib6_gc_lock); - void fib6_run_gc(unsigned long expires, struct net *net, bool force) { + struct fib6_gc_args gc_args; unsigned long now; if (force) { - spin_lock_bh(&fib6_gc_lock); - } else if (!spin_trylock_bh(&fib6_gc_lock)) { + spin_lock_bh(&net->ipv6.fib6_gc_lock); + } else if (!spin_trylock_bh(&net->ipv6.fib6_gc_lock)) { mod_timer(&net->ipv6.ip6_fib_timer, jiffies + HZ); return; } @@ -1792,7 +1792,7 @@ void fib6_run_gc(unsigned long expires, struct net *net, bool force) gc_args.more = icmp6_dst_gc(); - fib6_clean_all(net, fib6_age, NULL); + fib6_clean_all(net, fib6_age, &gc_args); now = jiffies; net->ipv6.ip6_rt_last_gc = now; @@ -1802,7 +1802,7 @@ void fib6_run_gc(unsigned long expires, struct net *net, bool force) + net->ipv6.sysctl.ip6_rt_gc_interval)); else del_timer(&net->ipv6.ip6_fib_timer); - spin_unlock_bh(&fib6_gc_lock); + spin_unlock_bh(&net->ipv6.fib6_gc_lock); } static void fib6_gc_timer_cb(unsigned long arg) @@ -1814,6 +1814,9 @@ static int __net_init fib6_net_init(struct net *net) { size_t size = sizeof(struct hlist_head) * FIB6_TABLE_HASHSZ; + spin_lock_init(&net->ipv6.fib6_gc_lock); + rwlock_init(&net->ipv6.fib6_walker_lock); + INIT_LIST_HEAD(&net->ipv6.fib6_walkers); setup_timer(&net->ipv6.ip6_fib_timer, fib6_gc_timer_cb, (unsigned long)net); net->ipv6.rt6_stats = kzalloc(sizeof(*net->ipv6.rt6_stats), GFP_KERNEL); @@ -1974,7 +1977,8 @@ static int ipv6_route_yield(struct fib6_walker *w) return 0; } -static void ipv6_route_seq_setup_walk(struct ipv6_route_iter *iter) +static void ipv6_route_seq_setup_walk(struct ipv6_route_iter *iter, + struct net *net) { memset(&iter->w, 0, sizeof(iter->w)); iter->w.func = ipv6_route_yield; @@ -1984,7 +1988,7 @@ static void ipv6_route_seq_setup_walk(struct ipv6_route_iter *iter) iter->w.args = iter; iter->sernum = iter->w.root->fn_sernum; INIT_LIST_HEAD(&iter->w.lh); - fib6_walker_link(&iter->w); + fib6_walker_link(net, &iter->w); } static struct fib6_table *ipv6_route_seq_next_table(struct fib6_table *tbl, @@ -2045,16 +2049,16 @@ iter_table: ++*pos; return iter->w.leaf; } else if (r < 0) { - fib6_walker_unlink(&iter->w); + fib6_walker_unlink(net, &iter->w); return NULL; } - fib6_walker_unlink(&iter->w); + fib6_walker_unlink(net, &iter->w); iter->tbl = ipv6_route_seq_next_table(iter->tbl, net); if (!iter->tbl) return NULL; - ipv6_route_seq_setup_walk(iter); + ipv6_route_seq_setup_walk(iter, net); goto iter_table; } @@ -2069,7 +2073,7 @@ static void *ipv6_route_seq_start(struct seq_file *seq, loff_t *pos) iter->skip = *pos; if (iter->tbl) { - ipv6_route_seq_setup_walk(iter); + ipv6_route_seq_setup_walk(iter, net); return ipv6_route_seq_next(seq, NULL, pos); } else { return NULL; @@ -2085,10 +2089,11 @@ static bool ipv6_route_iter_active(struct ipv6_route_iter *iter) static void ipv6_route_seq_stop(struct seq_file *seq, void *v) __releases(RCU_BH) { + struct net *net = seq_file_net(seq); struct ipv6_route_iter *iter = seq->private; if (ipv6_route_iter_active(iter)) - fib6_walker_unlink(&iter->w); + fib6_walker_unlink(net, &iter->w); rcu_read_unlock_bh(); } diff --git a/net/ipv6/ip6_gre.c b/net/ipv6/ip6_gre.c index c0d4dc1c5..4e636e60a 100644 --- a/net/ipv6/ip6_gre.c +++ b/net/ipv6/ip6_gre.c @@ -360,7 +360,7 @@ static void ip6gre_tunnel_uninit(struct net_device *dev) struct ip6gre_net *ign = net_generic(t->net, ip6gre_net_id); ip6gre_tunnel_unlink(ign, t); - ip6_tnl_dst_reset(t); + dst_cache_reset(&t->dst_cache); dev_put(dev); } @@ -633,7 +633,7 @@ static netdev_tx_t ip6gre_xmit2(struct sk_buff *skb, } if (!fl6->flowi6_mark) - dst = ip6_tnl_dst_get(tunnel); + dst = dst_cache_get(&tunnel->dst_cache); if (!dst) { dst = ip6_route_output(net, NULL, fl6); @@ -702,7 +702,7 @@ static netdev_tx_t ip6gre_xmit2(struct sk_buff *skb, } if (!fl6->flowi6_mark && ndst) - ip6_tnl_dst_set(tunnel, ndst); + dst_cache_set_ip6(&tunnel->dst_cache, ndst, &fl6->saddr); skb_dst_set(skb, dst); proto = NEXTHDR_GRE; @@ -1011,7 +1011,7 @@ static int ip6gre_tnl_change(struct ip6_tnl *t, t->parms.o_key = p->o_key; t->parms.i_flags = p->i_flags; t->parms.o_flags = p->o_flags; - ip6_tnl_dst_reset(t); + dst_cache_reset(&t->dst_cache); ip6gre_tnl_link_config(t, set_mtu); return 0; } @@ -1221,7 +1221,7 @@ static void ip6gre_dev_free(struct net_device *dev) { struct ip6_tnl *t = netdev_priv(dev); - ip6_tnl_dst_destroy(t); + dst_cache_destroy(&t->dst_cache); free_percpu(dev->tstats); free_netdev(dev); } @@ -1259,7 +1259,7 @@ static int ip6gre_tunnel_init_common(struct net_device *dev) if (!dev->tstats) return -ENOMEM; - ret = ip6_tnl_dst_init(tunnel); + ret = dst_cache_init(&tunnel->dst_cache, GFP_KERNEL); if (ret) { free_percpu(dev->tstats); dev->tstats = NULL; diff --git a/net/ipv6/ip6_input.c b/net/ipv6/ip6_input.c index 9075acf08..c05c425c2 100644 --- a/net/ipv6/ip6_input.c +++ b/net/ipv6/ip6_input.c @@ -49,7 +49,7 @@ int ip6_rcv_finish(struct net *net, struct sock *sk, struct sk_buff *skb) { - if (sysctl_ip_early_demux && !skb_dst(skb) && skb->sk == NULL) { + if (net->ipv4.sysctl_ip_early_demux && !skb_dst(skb) && skb->sk == NULL) { const struct inet6_protocol *ipprot; ipprot = rcu_dereference(inet6_protos[ipv6_hdr(skb)->nexthdr]); @@ -134,6 +134,16 @@ int ipv6_rcv(struct sk_buff *skb, struct net_device *dev, struct packet_type *pt IPV6_ADDR_MC_SCOPE(&hdr->daddr) == 1) goto err; + /* If enabled, drop unicast packets that were encapsulated in link-layer + * multicast or broadcast to protected against the so-called "hole-196" + * attack in 802.11 wireless. + */ + if (!ipv6_addr_is_multicast(&hdr->daddr) && + (skb->pkt_type == PACKET_BROADCAST || + skb->pkt_type == PACKET_MULTICAST) && + idev->cnf.drop_unicast_in_l2_multicast) + goto err; + /* RFC4291 2.7 * Nodes must not originate a packet to a multicast address whose scope * field contains the reserved value 0; if such a packet is received, it diff --git a/net/ipv6/ip6_offload.c b/net/ipv6/ip6_offload.c index eeca943f1..82e9f3076 100644 --- a/net/ipv6/ip6_offload.c +++ b/net/ipv6/ip6_offload.c @@ -258,6 +258,19 @@ out: return pp; } +static struct sk_buff **sit_gro_receive(struct sk_buff **head, + struct sk_buff *skb) +{ + if (NAPI_GRO_CB(skb)->encap_mark) { + NAPI_GRO_CB(skb)->flush = 1; + return NULL; + } + + NAPI_GRO_CB(skb)->encap_mark = 1; + + return ipv6_gro_receive(head, skb); +} + static int ipv6_gro_complete(struct sk_buff *skb, int nhoff) { const struct net_offload *ops; @@ -302,7 +315,7 @@ static struct packet_offload ipv6_packet_offload __read_mostly = { static const struct net_offload sit_offload = { .callbacks = { .gso_segment = ipv6_gso_segment, - .gro_receive = ipv6_gro_receive, + .gro_receive = sit_gro_receive, .gro_complete = sit_gro_complete, }, }; diff --git a/net/ipv6/ip6_output.c b/net/ipv6/ip6_output.c index 2a6606c93..bc972e715 100644 --- a/net/ipv6/ip6_output.c +++ b/net/ipv6/ip6_output.c @@ -332,7 +332,6 @@ static int ip6_forward_proxy_check(struct sk_buff *skb) static inline int ip6_forward_finish(struct net *net, struct sock *sk, struct sk_buff *skb) { - skb_sender_cpu_clear(skb); return dst_output(net, sk, skb); } diff --git a/net/ipv6/ip6_tunnel.c b/net/ipv6/ip6_tunnel.c index 3991b21e2..1f20345cb 100644 --- a/net/ipv6/ip6_tunnel.c +++ b/net/ipv6/ip6_tunnel.c @@ -122,97 +122,6 @@ static struct net_device_stats *ip6_get_stats(struct net_device *dev) return &dev->stats; } -/* - * Locking : hash tables are protected by RCU and RTNL - */ - -static void ip6_tnl_per_cpu_dst_set(struct ip6_tnl_dst *idst, - struct dst_entry *dst) -{ - write_seqlock_bh(&idst->lock); - dst_release(rcu_dereference_protected( - idst->dst, - lockdep_is_held(&idst->lock.lock))); - if (dst) { - dst_hold(dst); - idst->cookie = rt6_get_cookie((struct rt6_info *)dst); - } else { - idst->cookie = 0; - } - rcu_assign_pointer(idst->dst, dst); - write_sequnlock_bh(&idst->lock); -} - -struct dst_entry *ip6_tnl_dst_get(struct ip6_tnl *t) -{ - struct ip6_tnl_dst *idst; - struct dst_entry *dst; - unsigned int seq; - u32 cookie; - - idst = raw_cpu_ptr(t->dst_cache); - - rcu_read_lock(); - do { - seq = read_seqbegin(&idst->lock); - dst = rcu_dereference(idst->dst); - cookie = idst->cookie; - } while (read_seqretry(&idst->lock, seq)); - - if (dst && !atomic_inc_not_zero(&dst->__refcnt)) - dst = NULL; - rcu_read_unlock(); - - if (dst && dst->obsolete && !dst->ops->check(dst, cookie)) { - ip6_tnl_per_cpu_dst_set(idst, NULL); - dst_release(dst); - dst = NULL; - } - return dst; -} -EXPORT_SYMBOL_GPL(ip6_tnl_dst_get); - -void ip6_tnl_dst_reset(struct ip6_tnl *t) -{ - int i; - - for_each_possible_cpu(i) - ip6_tnl_per_cpu_dst_set(per_cpu_ptr(t->dst_cache, i), NULL); -} -EXPORT_SYMBOL_GPL(ip6_tnl_dst_reset); - -void ip6_tnl_dst_set(struct ip6_tnl *t, struct dst_entry *dst) -{ - ip6_tnl_per_cpu_dst_set(raw_cpu_ptr(t->dst_cache), dst); - -} -EXPORT_SYMBOL_GPL(ip6_tnl_dst_set); - -void ip6_tnl_dst_destroy(struct ip6_tnl *t) -{ - if (!t->dst_cache) - return; - - ip6_tnl_dst_reset(t); - free_percpu(t->dst_cache); -} -EXPORT_SYMBOL_GPL(ip6_tnl_dst_destroy); - -int ip6_tnl_dst_init(struct ip6_tnl *t) -{ - int i; - - t->dst_cache = alloc_percpu(struct ip6_tnl_dst); - if (!t->dst_cache) - return -ENOMEM; - - for_each_possible_cpu(i) - seqlock_init(&per_cpu_ptr(t->dst_cache, i)->lock); - - return 0; -} -EXPORT_SYMBOL_GPL(ip6_tnl_dst_init); - /** * ip6_tnl_lookup - fetch tunnel matching the end-point addresses * @remote: the address of the tunnel exit-point @@ -329,7 +238,7 @@ static void ip6_dev_free(struct net_device *dev) { struct ip6_tnl *t = netdev_priv(dev); - ip6_tnl_dst_destroy(t); + dst_cache_destroy(&t->dst_cache); free_percpu(dev->tstats); free_netdev(dev); } @@ -462,7 +371,7 @@ ip6_tnl_dev_uninit(struct net_device *dev) RCU_INIT_POINTER(ip6n->tnls_wc[0], NULL); else ip6_tnl_unlink(ip6n, t); - ip6_tnl_dst_reset(t); + dst_cache_reset(&t->dst_cache); dev_put(dev); } @@ -1069,7 +978,7 @@ static int ip6_tnl_xmit2(struct sk_buff *skb, memcpy(&fl6->daddr, addr6, sizeof(fl6->daddr)); neigh_release(neigh); } else if (!fl6->flowi6_mark) - dst = ip6_tnl_dst_get(t); + dst = dst_cache_get(&t->dst_cache); if (!ip6_tnl_xmit_ctl(t, &fl6->saddr, &fl6->daddr)) goto tx_err_link_failure; @@ -1133,7 +1042,7 @@ static int ip6_tnl_xmit2(struct sk_buff *skb, } if (!fl6->flowi6_mark && ndst) - ip6_tnl_dst_set(t, ndst); + dst_cache_set_ip6(&t->dst_cache, ndst, &fl6->saddr); skb_dst_set(skb, dst); skb->transport_header = skb->network_header; @@ -1368,7 +1277,7 @@ ip6_tnl_change(struct ip6_tnl *t, const struct __ip6_tnl_parm *p) t->parms.flowinfo = p->flowinfo; t->parms.link = p->link; t->parms.proto = p->proto; - ip6_tnl_dst_reset(t); + dst_cache_reset(&t->dst_cache); ip6_tnl_link_config(t); return 0; } @@ -1639,7 +1548,7 @@ ip6_tnl_dev_init_gen(struct net_device *dev) if (!dev->tstats) return -ENOMEM; - ret = ip6_tnl_dst_init(t); + ret = dst_cache_init(&t->dst_cache, GFP_KERNEL); if (ret) { free_percpu(dev->tstats); dev->tstats = NULL; diff --git a/net/ipv6/ip6_udp_tunnel.c b/net/ipv6/ip6_udp_tunnel.c index 14dacf1df..a7520528e 100644 --- a/net/ipv6/ip6_udp_tunnel.c +++ b/net/ipv6/ip6_udp_tunnel.c @@ -73,8 +73,8 @@ int udp_tunnel6_xmit_skb(struct dst_entry *dst, struct sock *sk, struct sk_buff *skb, struct net_device *dev, struct in6_addr *saddr, struct in6_addr *daddr, - __u8 prio, __u8 ttl, __be16 src_port, - __be16 dst_port, bool nocheck) + __u8 prio, __u8 ttl, __be32 label, + __be16 src_port, __be16 dst_port, bool nocheck) { struct udphdr *uh; struct ipv6hdr *ip6h; @@ -98,7 +98,7 @@ int udp_tunnel6_xmit_skb(struct dst_entry *dst, struct sock *sk, __skb_push(skb, sizeof(*ip6h)); skb_reset_network_header(skb); ip6h = ipv6_hdr(skb); - ip6_flow_hdr(ip6h, prio, htonl(0)); + ip6_flow_hdr(ip6h, prio, label); ip6h->payload_len = htons(skb->len); ip6h->nexthdr = IPPROTO_UDP; ip6h->hop_limit = ttl; diff --git a/net/ipv6/ip6_vti.c b/net/ipv6/ip6_vti.c index 0a8610b33..d90a11f14 100644 --- a/net/ipv6/ip6_vti.c +++ b/net/ipv6/ip6_vti.c @@ -640,7 +640,7 @@ vti6_tnl_change(struct ip6_tnl *t, const struct __ip6_tnl_parm *p) t->parms.i_key = p->i_key; t->parms.o_key = p->o_key; t->parms.proto = p->proto; - ip6_tnl_dst_reset(t); + dst_cache_reset(&t->dst_cache); vti6_link_config(t); return 0; } diff --git a/net/ipv6/ndisc.c b/net/ipv6/ndisc.c index 84afb9a77..c245895a3 100644 --- a/net/ipv6/ndisc.c +++ b/net/ipv6/ndisc.c @@ -883,6 +883,7 @@ static void ndisc_recv_na(struct sk_buff *skb) offsetof(struct nd_msg, opt)); struct ndisc_options ndopts; struct net_device *dev = skb->dev; + struct inet6_dev *idev = __in6_dev_get(dev); struct inet6_ifaddr *ifp; struct neighbour *neigh; @@ -902,6 +903,14 @@ static void ndisc_recv_na(struct sk_buff *skb) return; } + /* For some 802.11 wireless deployments (and possibly other networks), + * there will be a NA proxy and unsolicitd packets are attacks + * and thus should not be accepted. + */ + if (!msg->icmph.icmp6_solicited && idev && + idev->cnf.drop_unsolicited_na) + return; + if (!ndisc_parse_options(msg->opt, ndoptlen, &ndopts)) { ND_PRINTK(2, warn, "NS: invalid ND option\n"); return; diff --git a/net/ipv6/netfilter/ip6_tables.c b/net/ipv6/netfilter/ip6_tables.c index 99425cf28..86b67b70b 100644 --- a/net/ipv6/netfilter/ip6_tables.c +++ b/net/ipv6/netfilter/ip6_tables.c @@ -198,11 +198,12 @@ get_entry(const void *base, unsigned int offset) /* All zeroes == unconditional rule. */ /* Mildly perf critical (only if packet tracing is on) */ -static inline bool unconditional(const struct ip6t_ip6 *ipv6) +static inline bool unconditional(const struct ip6t_entry *e) { static const struct ip6t_ip6 uncond; - return memcmp(ipv6, &uncond, sizeof(uncond)) == 0; + return e->target_offset == sizeof(struct ip6t_entry) && + memcmp(&e->ipv6, &uncond, sizeof(uncond)) == 0; } static inline const struct xt_entry_target * @@ -258,11 +259,10 @@ get_chainname_rulenum(const struct ip6t_entry *s, const struct ip6t_entry *e, } else if (s == e) { (*rulenum)++; - if (s->target_offset == sizeof(struct ip6t_entry) && + if (unconditional(s) && strcmp(t->target.u.kernel.target->name, XT_STANDARD_TARGET) == 0 && - t->verdict < 0 && - unconditional(&s->ipv6)) { + t->verdict < 0) { /* Tail of chains: STANDARD target (return/policy) */ *comment = *chainname == hookname ? comments[NF_IP6_TRACE_COMMENT_POLICY] @@ -488,11 +488,10 @@ mark_source_chains(const struct xt_table_info *newinfo, e->comefrom |= ((1 << hook) | (1 << NF_INET_NUMHOOKS)); /* Unconditional return/END. */ - if ((e->target_offset == sizeof(struct ip6t_entry) && + if ((unconditional(e) && (strcmp(t->target.u.user.name, XT_STANDARD_TARGET) == 0) && - t->verdict < 0 && - unconditional(&e->ipv6)) || visited) { + t->verdict < 0) || visited) { unsigned int oldpos, size; if ((strcmp(t->target.u.user.name, @@ -581,14 +580,12 @@ static void cleanup_match(struct xt_entry_match *m, struct net *net) } static int -check_entry(const struct ip6t_entry *e, const char *name) +check_entry(const struct ip6t_entry *e) { const struct xt_entry_target *t; - if (!ip6_checkentry(&e->ipv6)) { - duprintf("ip_tables: ip check failed %p %s.\n", e, name); + if (!ip6_checkentry(&e->ipv6)) return -EINVAL; - } if (e->target_offset + sizeof(struct xt_entry_target) > e->next_offset) @@ -679,10 +676,6 @@ find_check_entry(struct ip6t_entry *e, struct net *net, const char *name, struct xt_mtchk_param mtpar; struct xt_entry_match *ematch; - ret = check_entry(e, name); - if (ret) - return ret; - e->counters.pcnt = xt_percpu_counter_alloc(); if (IS_ERR_VALUE(e->counters.pcnt)) return -ENOMEM; @@ -733,7 +726,7 @@ static bool check_underflow(const struct ip6t_entry *e) const struct xt_entry_target *t; unsigned int verdict; - if (!unconditional(&e->ipv6)) + if (!unconditional(e)) return false; t = ip6t_get_target_c(e); if (strcmp(t->u.user.name, XT_STANDARD_TARGET) != 0) @@ -753,9 +746,11 @@ check_entry_size_and_hooks(struct ip6t_entry *e, unsigned int valid_hooks) { unsigned int h; + int err; if ((unsigned long)e % __alignof__(struct ip6t_entry) != 0 || - (unsigned char *)e + sizeof(struct ip6t_entry) >= limit) { + (unsigned char *)e + sizeof(struct ip6t_entry) >= limit || + (unsigned char *)e + e->next_offset > limit) { duprintf("Bad offset %p\n", e); return -EINVAL; } @@ -767,6 +762,10 @@ check_entry_size_and_hooks(struct ip6t_entry *e, return -EINVAL; } + err = check_entry(e); + if (err) + return err; + /* Check hooks & underflows */ for (h = 0; h < NF_INET_NUMHOOKS; h++) { if (!(valid_hooks & (1 << h))) @@ -775,9 +774,9 @@ check_entry_size_and_hooks(struct ip6t_entry *e, newinfo->hook_entry[h] = hook_entries[h]; if ((unsigned char *)e - base == underflows[h]) { if (!check_underflow(e)) { - pr_err("Underflows must be unconditional and " - "use the STANDARD target with " - "ACCEPT/DROP\n"); + pr_debug("Underflows must be unconditional and " + "use the STANDARD target with " + "ACCEPT/DROP\n"); return -EINVAL; } newinfo->underflow[h] = underflows[h]; @@ -1169,6 +1168,7 @@ get_entries(struct net *net, struct ip6t_get_entries __user *uptr, *len, sizeof(get) + get.size); return -EINVAL; } + get.name[sizeof(get.name) - 1] = '\0'; t = xt_find_table_lock(net, AF_INET6, get.name); if (!IS_ERR_OR_NULL(t)) { @@ -1505,7 +1505,8 @@ check_compat_entry_size_and_hooks(struct compat_ip6t_entry *e, duprintf("check_compat_entry_size_and_hooks %p\n", e); if ((unsigned long)e % __alignof__(struct compat_ip6t_entry) != 0 || - (unsigned char *)e + sizeof(struct compat_ip6t_entry) >= limit) { + (unsigned char *)e + sizeof(struct compat_ip6t_entry) >= limit || + (unsigned char *)e + e->next_offset > limit) { duprintf("Bad offset %p, limit = %p\n", e, limit); return -EINVAL; } @@ -1518,7 +1519,7 @@ check_compat_entry_size_and_hooks(struct compat_ip6t_entry *e, } /* For purposes of check_entry casting the compat entry is fine */ - ret = check_entry((struct ip6t_entry *)e, name); + ret = check_entry((struct ip6t_entry *)e); if (ret) return ret; @@ -1944,6 +1945,7 @@ compat_get_entries(struct net *net, struct compat_ip6t_get_entries __user *uptr, *len, sizeof(get) + get.size); return -EINVAL; } + get.name[sizeof(get.name) - 1] = '\0'; xt_compat_lock(AF_INET6); t = xt_find_table_lock(net, AF_INET6, get.name); @@ -2071,9 +2073,28 @@ do_ip6t_get_ctl(struct sock *sk, int cmd, void __user *user, int *len) return ret; } -struct xt_table *ip6t_register_table(struct net *net, - const struct xt_table *table, - const struct ip6t_replace *repl) +static void __ip6t_unregister_table(struct net *net, struct xt_table *table) +{ + struct xt_table_info *private; + void *loc_cpu_entry; + struct module *table_owner = table->me; + struct ip6t_entry *iter; + + private = xt_unregister_table(table); + + /* Decrease module usage counts and free resources */ + loc_cpu_entry = private->entries; + xt_entry_foreach(iter, loc_cpu_entry, private->size) + cleanup_entry(iter, net); + if (private->number > private->initial_entries) + module_put(table_owner); + xt_free_table_info(private); +} + +int ip6t_register_table(struct net *net, const struct xt_table *table, + const struct ip6t_replace *repl, + const struct nf_hook_ops *ops, + struct xt_table **res) { int ret; struct xt_table_info *newinfo; @@ -2082,10 +2103,8 @@ struct xt_table *ip6t_register_table(struct net *net, struct xt_table *new_table; newinfo = xt_alloc_table_info(repl->size); - if (!newinfo) { - ret = -ENOMEM; - goto out; - } + if (!newinfo) + return -ENOMEM; loc_cpu_entry = newinfo->entries; memcpy(loc_cpu_entry, repl->entries, repl->size); @@ -2099,30 +2118,28 @@ struct xt_table *ip6t_register_table(struct net *net, ret = PTR_ERR(new_table); goto out_free; } - return new_table; + + /* set res now, will see skbs right after nf_register_net_hooks */ + WRITE_ONCE(*res, new_table); + + ret = nf_register_net_hooks(net, ops, hweight32(table->valid_hooks)); + if (ret != 0) { + __ip6t_unregister_table(net, new_table); + *res = NULL; + } + + return ret; out_free: xt_free_table_info(newinfo); -out: - return ERR_PTR(ret); + return ret; } -void ip6t_unregister_table(struct net *net, struct xt_table *table) +void ip6t_unregister_table(struct net *net, struct xt_table *table, + const struct nf_hook_ops *ops) { - struct xt_table_info *private; - void *loc_cpu_entry; - struct module *table_owner = table->me; - struct ip6t_entry *iter; - - private = xt_unregister_table(table); - - /* Decrease module usage counts and free resources */ - loc_cpu_entry = private->entries; - xt_entry_foreach(iter, loc_cpu_entry, private->size) - cleanup_entry(iter, net); - if (private->number > private->initial_entries) - module_put(table_owner); - xt_free_table_info(private); + nf_unregister_net_hooks(net, ops, hweight32(table->valid_hooks)); + __ip6t_unregister_table(net, table); } /* Returns 1 if the type and code is matched by the range, 0 otherwise */ diff --git a/net/ipv6/netfilter/ip6table_filter.c b/net/ipv6/netfilter/ip6table_filter.c index 8b277b983..1343077dd 100644 --- a/net/ipv6/netfilter/ip6table_filter.c +++ b/net/ipv6/netfilter/ip6table_filter.c @@ -22,12 +22,15 @@ MODULE_DESCRIPTION("ip6tables filter table"); (1 << NF_INET_FORWARD) | \ (1 << NF_INET_LOCAL_OUT)) +static int __net_init ip6table_filter_table_init(struct net *net); + static const struct xt_table packet_filter = { .name = "filter", .valid_hooks = FILTER_VALID_HOOKS, .me = THIS_MODULE, .af = NFPROTO_IPV6, .priority = NF_IP6_PRI_FILTER, + .table_init = ip6table_filter_table_init, }; /* The work comes in here from netfilter.c. */ @@ -44,9 +47,13 @@ static struct nf_hook_ops *filter_ops __read_mostly; static bool forward = true; module_param(forward, bool, 0000); -static int __net_init ip6table_filter_net_init(struct net *net) +static int __net_init ip6table_filter_table_init(struct net *net) { struct ip6t_replace *repl; + int err; + + if (net->ipv6.ip6table_filter) + return 0; repl = ip6t_alloc_initial_table(&packet_filter); if (repl == NULL) @@ -55,15 +62,26 @@ static int __net_init ip6table_filter_net_init(struct net *net) ((struct ip6t_standard *)repl->entries)[1].target.verdict = forward ? -NF_ACCEPT - 1 : -NF_DROP - 1; - net->ipv6.ip6table_filter = - ip6t_register_table(net, &packet_filter, repl); + err = ip6t_register_table(net, &packet_filter, repl, filter_ops, + &net->ipv6.ip6table_filter); kfree(repl); - return PTR_ERR_OR_ZERO(net->ipv6.ip6table_filter); + return err; +} + +static int __net_init ip6table_filter_net_init(struct net *net) +{ + if (net == &init_net || !forward) + return ip6table_filter_table_init(net); + + return 0; } static void __net_exit ip6table_filter_net_exit(struct net *net) { - ip6t_unregister_table(net, net->ipv6.ip6table_filter); + if (!net->ipv6.ip6table_filter) + return; + ip6t_unregister_table(net, net->ipv6.ip6table_filter, filter_ops); + net->ipv6.ip6table_filter = NULL; } static struct pernet_operations ip6table_filter_net_ops = { @@ -75,28 +93,21 @@ static int __init ip6table_filter_init(void) { int ret; + filter_ops = xt_hook_ops_alloc(&packet_filter, ip6table_filter_hook); + if (IS_ERR(filter_ops)) + return PTR_ERR(filter_ops); + ret = register_pernet_subsys(&ip6table_filter_net_ops); if (ret < 0) - return ret; - - /* Register hooks */ - filter_ops = xt_hook_link(&packet_filter, ip6table_filter_hook); - if (IS_ERR(filter_ops)) { - ret = PTR_ERR(filter_ops); - goto cleanup_table; - } + kfree(filter_ops); return ret; - - cleanup_table: - unregister_pernet_subsys(&ip6table_filter_net_ops); - return ret; } static void __exit ip6table_filter_fini(void) { - xt_hook_unlink(&packet_filter, filter_ops); unregister_pernet_subsys(&ip6table_filter_net_ops); + kfree(filter_ops); } module_init(ip6table_filter_init); diff --git a/net/ipv6/netfilter/ip6table_mangle.c b/net/ipv6/netfilter/ip6table_mangle.c index abe278b07..cb2b28883 100644 --- a/net/ipv6/netfilter/ip6table_mangle.c +++ b/net/ipv6/netfilter/ip6table_mangle.c @@ -23,12 +23,15 @@ MODULE_DESCRIPTION("ip6tables mangle table"); (1 << NF_INET_LOCAL_OUT) | \ (1 << NF_INET_POST_ROUTING)) +static int __net_init ip6table_mangle_table_init(struct net *net); + static const struct xt_table packet_mangler = { .name = "mangle", .valid_hooks = MANGLE_VALID_HOOKS, .me = THIS_MODULE, .af = NFPROTO_IPV6, .priority = NF_IP6_PRI_MANGLE, + .table_init = ip6table_mangle_table_init, }; static unsigned int @@ -88,26 +91,33 @@ ip6table_mangle_hook(void *priv, struct sk_buff *skb, } static struct nf_hook_ops *mangle_ops __read_mostly; -static int __net_init ip6table_mangle_net_init(struct net *net) +static int __net_init ip6table_mangle_table_init(struct net *net) { struct ip6t_replace *repl; + int ret; + + if (net->ipv6.ip6table_mangle) + return 0; repl = ip6t_alloc_initial_table(&packet_mangler); if (repl == NULL) return -ENOMEM; - net->ipv6.ip6table_mangle = - ip6t_register_table(net, &packet_mangler, repl); + ret = ip6t_register_table(net, &packet_mangler, repl, mangle_ops, + &net->ipv6.ip6table_mangle); kfree(repl); - return PTR_ERR_OR_ZERO(net->ipv6.ip6table_mangle); + return ret; } static void __net_exit ip6table_mangle_net_exit(struct net *net) { - ip6t_unregister_table(net, net->ipv6.ip6table_mangle); + if (!net->ipv6.ip6table_mangle) + return; + + ip6t_unregister_table(net, net->ipv6.ip6table_mangle, mangle_ops); + net->ipv6.ip6table_mangle = NULL; } static struct pernet_operations ip6table_mangle_net_ops = { - .init = ip6table_mangle_net_init, .exit = ip6table_mangle_net_exit, }; @@ -115,28 +125,28 @@ static int __init ip6table_mangle_init(void) { int ret; + mangle_ops = xt_hook_ops_alloc(&packet_mangler, ip6table_mangle_hook); + if (IS_ERR(mangle_ops)) + return PTR_ERR(mangle_ops); + ret = register_pernet_subsys(&ip6table_mangle_net_ops); - if (ret < 0) + if (ret < 0) { + kfree(mangle_ops); return ret; - - /* Register hooks */ - mangle_ops = xt_hook_link(&packet_mangler, ip6table_mangle_hook); - if (IS_ERR(mangle_ops)) { - ret = PTR_ERR(mangle_ops); - goto cleanup_table; } - return ret; - - cleanup_table: - unregister_pernet_subsys(&ip6table_mangle_net_ops); + ret = ip6table_mangle_table_init(&init_net); + if (ret) { + unregister_pernet_subsys(&ip6table_mangle_net_ops); + kfree(mangle_ops); + } return ret; } static void __exit ip6table_mangle_fini(void) { - xt_hook_unlink(&packet_mangler, mangle_ops); unregister_pernet_subsys(&ip6table_mangle_net_ops); + kfree(mangle_ops); } module_init(ip6table_mangle_init); diff --git a/net/ipv6/netfilter/ip6table_nat.c b/net/ipv6/netfilter/ip6table_nat.c index de2a10a56..7d2bd9402 100644 --- a/net/ipv6/netfilter/ip6table_nat.c +++ b/net/ipv6/netfilter/ip6table_nat.c @@ -20,6 +20,8 @@ #include <net/netfilter/nf_nat_core.h> #include <net/netfilter/nf_nat_l3proto.h> +static int __net_init ip6table_nat_table_init(struct net *net); + static const struct xt_table nf_nat_ipv6_table = { .name = "nat", .valid_hooks = (1 << NF_INET_PRE_ROUTING) | @@ -28,6 +30,7 @@ static const struct xt_table nf_nat_ipv6_table = { (1 << NF_INET_LOCAL_IN), .me = THIS_MODULE, .af = NFPROTO_IPV6, + .table_init = ip6table_nat_table_init, }; static unsigned int ip6table_nat_do_chain(void *priv, @@ -97,50 +100,50 @@ static struct nf_hook_ops nf_nat_ipv6_ops[] __read_mostly = { }, }; -static int __net_init ip6table_nat_net_init(struct net *net) +static int __net_init ip6table_nat_table_init(struct net *net) { struct ip6t_replace *repl; + int ret; + + if (net->ipv6.ip6table_nat) + return 0; repl = ip6t_alloc_initial_table(&nf_nat_ipv6_table); if (repl == NULL) return -ENOMEM; - net->ipv6.ip6table_nat = ip6t_register_table(net, &nf_nat_ipv6_table, repl); + ret = ip6t_register_table(net, &nf_nat_ipv6_table, repl, + nf_nat_ipv6_ops, &net->ipv6.ip6table_nat); kfree(repl); - return PTR_ERR_OR_ZERO(net->ipv6.ip6table_nat); + return ret; } static void __net_exit ip6table_nat_net_exit(struct net *net) { - ip6t_unregister_table(net, net->ipv6.ip6table_nat); + if (!net->ipv6.ip6table_nat) + return; + ip6t_unregister_table(net, net->ipv6.ip6table_nat, nf_nat_ipv6_ops); + net->ipv6.ip6table_nat = NULL; } static struct pernet_operations ip6table_nat_net_ops = { - .init = ip6table_nat_net_init, .exit = ip6table_nat_net_exit, }; static int __init ip6table_nat_init(void) { - int err; + int ret = register_pernet_subsys(&ip6table_nat_net_ops); - err = register_pernet_subsys(&ip6table_nat_net_ops); - if (err < 0) - goto err1; + if (ret) + return ret; - err = nf_register_hooks(nf_nat_ipv6_ops, ARRAY_SIZE(nf_nat_ipv6_ops)); - if (err < 0) - goto err2; - return 0; - -err2: - unregister_pernet_subsys(&ip6table_nat_net_ops); -err1: - return err; + ret = ip6table_nat_table_init(&init_net); + if (ret) + unregister_pernet_subsys(&ip6table_nat_net_ops); + return ret; } static void __exit ip6table_nat_exit(void) { - nf_unregister_hooks(nf_nat_ipv6_ops, ARRAY_SIZE(nf_nat_ipv6_ops)); unregister_pernet_subsys(&ip6table_nat_net_ops); } diff --git a/net/ipv6/netfilter/ip6table_raw.c b/net/ipv6/netfilter/ip6table_raw.c index 902196356..d4bc56443 100644 --- a/net/ipv6/netfilter/ip6table_raw.c +++ b/net/ipv6/netfilter/ip6table_raw.c @@ -9,12 +9,15 @@ #define RAW_VALID_HOOKS ((1 << NF_INET_PRE_ROUTING) | (1 << NF_INET_LOCAL_OUT)) +static int __net_init ip6table_raw_table_init(struct net *net); + static const struct xt_table packet_raw = { .name = "raw", .valid_hooks = RAW_VALID_HOOKS, .me = THIS_MODULE, .af = NFPROTO_IPV6, .priority = NF_IP6_PRI_RAW, + .table_init = ip6table_raw_table_init, }; /* The work comes in here from netfilter.c. */ @@ -27,26 +30,32 @@ ip6table_raw_hook(void *priv, struct sk_buff *skb, static struct nf_hook_ops *rawtable_ops __read_mostly; -static int __net_init ip6table_raw_net_init(struct net *net) +static int __net_init ip6table_raw_table_init(struct net *net) { struct ip6t_replace *repl; + int ret; + + if (net->ipv6.ip6table_raw) + return 0; repl = ip6t_alloc_initial_table(&packet_raw); if (repl == NULL) return -ENOMEM; - net->ipv6.ip6table_raw = - ip6t_register_table(net, &packet_raw, repl); + ret = ip6t_register_table(net, &packet_raw, repl, rawtable_ops, + &net->ipv6.ip6table_raw); kfree(repl); - return PTR_ERR_OR_ZERO(net->ipv6.ip6table_raw); + return ret; } static void __net_exit ip6table_raw_net_exit(struct net *net) { - ip6t_unregister_table(net, net->ipv6.ip6table_raw); + if (!net->ipv6.ip6table_raw) + return; + ip6t_unregister_table(net, net->ipv6.ip6table_raw, rawtable_ops); + net->ipv6.ip6table_raw = NULL; } static struct pernet_operations ip6table_raw_net_ops = { - .init = ip6table_raw_net_init, .exit = ip6table_raw_net_exit, }; @@ -54,28 +63,29 @@ static int __init ip6table_raw_init(void) { int ret; + /* Register hooks */ + rawtable_ops = xt_hook_ops_alloc(&packet_raw, ip6table_raw_hook); + if (IS_ERR(rawtable_ops)) + return PTR_ERR(rawtable_ops); + ret = register_pernet_subsys(&ip6table_raw_net_ops); - if (ret < 0) + if (ret < 0) { + kfree(rawtable_ops); return ret; - - /* Register hooks */ - rawtable_ops = xt_hook_link(&packet_raw, ip6table_raw_hook); - if (IS_ERR(rawtable_ops)) { - ret = PTR_ERR(rawtable_ops); - goto cleanup_table; } - return ret; - - cleanup_table: - unregister_pernet_subsys(&ip6table_raw_net_ops); + ret = ip6table_raw_table_init(&init_net); + if (ret) { + unregister_pernet_subsys(&ip6table_raw_net_ops); + kfree(rawtable_ops); + } return ret; } static void __exit ip6table_raw_fini(void) { - xt_hook_unlink(&packet_raw, rawtable_ops); unregister_pernet_subsys(&ip6table_raw_net_ops); + kfree(rawtable_ops); } module_init(ip6table_raw_init); diff --git a/net/ipv6/netfilter/ip6table_security.c b/net/ipv6/netfilter/ip6table_security.c index 0d856fedf..cf26ccb04 100644 --- a/net/ipv6/netfilter/ip6table_security.c +++ b/net/ipv6/netfilter/ip6table_security.c @@ -27,12 +27,15 @@ MODULE_DESCRIPTION("ip6tables security table, for MAC rules"); (1 << NF_INET_FORWARD) | \ (1 << NF_INET_LOCAL_OUT) +static int __net_init ip6table_security_table_init(struct net *net); + static const struct xt_table security_table = { .name = "security", .valid_hooks = SECURITY_VALID_HOOKS, .me = THIS_MODULE, .af = NFPROTO_IPV6, .priority = NF_IP6_PRI_SECURITY, + .table_init = ip6table_security_table_init, }; static unsigned int @@ -44,26 +47,32 @@ ip6table_security_hook(void *priv, struct sk_buff *skb, static struct nf_hook_ops *sectbl_ops __read_mostly; -static int __net_init ip6table_security_net_init(struct net *net) +static int __net_init ip6table_security_table_init(struct net *net) { struct ip6t_replace *repl; + int ret; + + if (net->ipv6.ip6table_security) + return 0; repl = ip6t_alloc_initial_table(&security_table); if (repl == NULL) return -ENOMEM; - net->ipv6.ip6table_security = - ip6t_register_table(net, &security_table, repl); + ret = ip6t_register_table(net, &security_table, repl, sectbl_ops, + &net->ipv6.ip6table_security); kfree(repl); - return PTR_ERR_OR_ZERO(net->ipv6.ip6table_security); + return ret; } static void __net_exit ip6table_security_net_exit(struct net *net) { - ip6t_unregister_table(net, net->ipv6.ip6table_security); + if (!net->ipv6.ip6table_security) + return; + ip6t_unregister_table(net, net->ipv6.ip6table_security, sectbl_ops); + net->ipv6.ip6table_security = NULL; } static struct pernet_operations ip6table_security_net_ops = { - .init = ip6table_security_net_init, .exit = ip6table_security_net_exit, }; @@ -71,27 +80,28 @@ static int __init ip6table_security_init(void) { int ret; + sectbl_ops = xt_hook_ops_alloc(&security_table, ip6table_security_hook); + if (IS_ERR(sectbl_ops)) + return PTR_ERR(sectbl_ops); + ret = register_pernet_subsys(&ip6table_security_net_ops); - if (ret < 0) + if (ret < 0) { + kfree(sectbl_ops); return ret; - - sectbl_ops = xt_hook_link(&security_table, ip6table_security_hook); - if (IS_ERR(sectbl_ops)) { - ret = PTR_ERR(sectbl_ops); - goto cleanup_table; } - return ret; - -cleanup_table: - unregister_pernet_subsys(&ip6table_security_net_ops); + ret = ip6table_security_table_init(&init_net); + if (ret) { + unregister_pernet_subsys(&ip6table_security_net_ops); + kfree(sectbl_ops); + } return ret; } static void __exit ip6table_security_fini(void) { - xt_hook_unlink(&security_table, sectbl_ops); unregister_pernet_subsys(&ip6table_security_net_ops); + kfree(sectbl_ops); } module_init(ip6table_security_init); diff --git a/net/ipv6/netfilter/nf_nat_l3proto_ipv6.c b/net/ipv6/netfilter/nf_nat_l3proto_ipv6.c index 6ce309928..e0be97e63 100644 --- a/net/ipv6/netfilter/nf_nat_l3proto_ipv6.c +++ b/net/ipv6/netfilter/nf_nat_l3proto_ipv6.c @@ -131,29 +131,15 @@ static void nf_nat_ipv6_csum_recalc(struct sk_buff *skb, u8 proto, void *data, __sum16 *check, int datalen, int oldlen) { - const struct ipv6hdr *ipv6h = ipv6_hdr(skb); - struct rt6_info *rt = (struct rt6_info *)skb_dst(skb); - if (skb->ip_summed != CHECKSUM_PARTIAL) { - if (!(rt->rt6i_flags & RTF_LOCAL) && - (!skb->dev || skb->dev->features & - (NETIF_F_IPV6_CSUM | NETIF_F_HW_CSUM))) { - skb->ip_summed = CHECKSUM_PARTIAL; - skb->csum_start = skb_headroom(skb) + - skb_network_offset(skb) + - (data - (void *)skb->data); - skb->csum_offset = (void *)check - data; - *check = ~csum_ipv6_magic(&ipv6h->saddr, &ipv6h->daddr, - datalen, proto, 0); - } else { - *check = 0; - *check = csum_ipv6_magic(&ipv6h->saddr, &ipv6h->daddr, - datalen, proto, - csum_partial(data, datalen, - 0)); - if (proto == IPPROTO_UDP && !*check) - *check = CSUM_MANGLED_0; - } + const struct ipv6hdr *ipv6h = ipv6_hdr(skb); + + skb->ip_summed = CHECKSUM_PARTIAL; + skb->csum_start = skb_headroom(skb) + skb_network_offset(skb) + + (data - (void *)skb->data); + skb->csum_offset = (void *)check - data; + *check = ~csum_ipv6_magic(&ipv6h->saddr, &ipv6h->daddr, + datalen, proto, 0); } else inet_proto_csum_replace2(check, skb, htons(oldlen), htons(datalen), true); diff --git a/net/ipv6/netfilter/nft_masq_ipv6.c b/net/ipv6/netfilter/nft_masq_ipv6.c index cd1ac1637..9597ffb74 100644 --- a/net/ipv6/netfilter/nft_masq_ipv6.c +++ b/net/ipv6/netfilter/nft_masq_ipv6.c @@ -26,7 +26,12 @@ static void nft_masq_ipv6_eval(const struct nft_expr *expr, memset(&range, 0, sizeof(range)); range.flags = priv->flags; - + if (priv->sreg_proto_min) { + range.min_proto.all = + *(__be16 *)®s->data[priv->sreg_proto_min]; + range.max_proto.all = + *(__be16 *)®s->data[priv->sreg_proto_max]; + } regs->verdict.code = nf_nat_masquerade_ipv6(pkt->skb, &range, pkt->out); } diff --git a/net/ipv6/ping.c b/net/ipv6/ping.c index 263a5164a..c382db7a2 100644 --- a/net/ipv6/ping.c +++ b/net/ipv6/ping.c @@ -26,35 +26,6 @@ #include <net/transp_v6.h> #include <net/ping.h> -struct proto pingv6_prot = { - .name = "PINGv6", - .owner = THIS_MODULE, - .init = ping_init_sock, - .close = ping_close, - .connect = ip6_datagram_connect_v6_only, - .disconnect = udp_disconnect, - .setsockopt = ipv6_setsockopt, - .getsockopt = ipv6_getsockopt, - .sendmsg = ping_v6_sendmsg, - .recvmsg = ping_recvmsg, - .bind = ping_bind, - .backlog_rcv = ping_queue_rcv_skb, - .hash = ping_hash, - .unhash = ping_unhash, - .get_port = ping_get_port, - .obj_size = sizeof(struct raw6_sock), -}; -EXPORT_SYMBOL_GPL(pingv6_prot); - -static struct inet_protosw pingv6_protosw = { - .type = SOCK_DGRAM, - .protocol = IPPROTO_ICMPV6, - .prot = &pingv6_prot, - .ops = &inet6_dgram_ops, - .flags = INET_PROTOSW_REUSE, -}; - - /* Compatibility glue so we can support IPv6 when it's compiled as a module */ static int dummy_ipv6_recv_error(struct sock *sk, struct msghdr *msg, int len, int *addr_len) @@ -77,7 +48,7 @@ static int dummy_ipv6_chk_addr(struct net *net, const struct in6_addr *addr, return 0; } -int ping_v6_sendmsg(struct sock *sk, struct msghdr *msg, size_t len) +static int ping_v6_sendmsg(struct sock *sk, struct msghdr *msg, size_t len) { struct inet_sock *inet = inet_sk(sk); struct ipv6_pinfo *np = inet6_sk(sk); @@ -192,6 +163,34 @@ int ping_v6_sendmsg(struct sock *sk, struct msghdr *msg, size_t len) return len; } +struct proto pingv6_prot = { + .name = "PINGv6", + .owner = THIS_MODULE, + .init = ping_init_sock, + .close = ping_close, + .connect = ip6_datagram_connect_v6_only, + .disconnect = udp_disconnect, + .setsockopt = ipv6_setsockopt, + .getsockopt = ipv6_getsockopt, + .sendmsg = ping_v6_sendmsg, + .recvmsg = ping_recvmsg, + .bind = ping_bind, + .backlog_rcv = ping_queue_rcv_skb, + .hash = ping_hash, + .unhash = ping_unhash, + .get_port = ping_get_port, + .obj_size = sizeof(struct raw6_sock), +}; +EXPORT_SYMBOL_GPL(pingv6_prot); + +static struct inet_protosw pingv6_protosw = { + .type = SOCK_DGRAM, + .protocol = IPPROTO_ICMPV6, + .prot = &pingv6_prot, + .ops = &inet6_dgram_ops, + .flags = INET_PROTOSW_REUSE, +}; + #ifdef CONFIG_PROC_FS static void *ping_v6_seq_start(struct seq_file *seq, loff_t *pos) { diff --git a/net/ipv6/reassembly.c b/net/ipv6/reassembly.c index 18f3498a6..e2ea31175 100644 --- a/net/ipv6/reassembly.c +++ b/net/ipv6/reassembly.c @@ -496,10 +496,8 @@ static int ip6_frag_reasm(struct frag_queue *fq, struct sk_buff *prev, IP6CB(head)->flags |= IP6SKB_FRAGMENTED; /* Yes, and fold redundant checksum back. 8) */ - if (head->ip_summed == CHECKSUM_COMPLETE) - head->csum = csum_partial(skb_network_header(head), - skb_network_header_len(head), - head->csum); + skb_postpush_rcsum(head, skb_network_header(head), + skb_network_header_len(head)); rcu_read_lock(); IP6_INC_STATS_BH(net, __in6_dev_get(dev), IPSTATS_MIB_REASMOKS); diff --git a/net/ipv6/route.c b/net/ipv6/route.c index ed4466392..6f32944e0 100644 --- a/net/ipv6/route.c +++ b/net/ipv6/route.c @@ -338,9 +338,9 @@ static struct rt6_info *__ip6_dst_alloc(struct net *net, return rt; } -static struct rt6_info *ip6_dst_alloc(struct net *net, - struct net_device *dev, - int flags) +struct rt6_info *ip6_dst_alloc(struct net *net, + struct net_device *dev, + int flags) { struct rt6_info *rt = __ip6_dst_alloc(net, dev, flags); @@ -364,6 +364,7 @@ static struct rt6_info *ip6_dst_alloc(struct net *net, return rt; } +EXPORT_SYMBOL(ip6_dst_alloc); static void ip6_dst_destroy(struct dst_entry *dst) { @@ -1417,8 +1418,20 @@ EXPORT_SYMBOL_GPL(ip6_update_pmtu); void ip6_sk_update_pmtu(struct sk_buff *skb, struct sock *sk, __be32 mtu) { + struct dst_entry *dst; + ip6_update_pmtu(skb, sock_net(sk), mtu, sk->sk_bound_dev_if, sk->sk_mark); + + dst = __sk_dst_get(sk); + if (!dst || !dst->obsolete || + dst->ops->check(dst, inet6_sk(sk)->dst_cookie)) + return; + + bh_lock_sock(sk); + if (!sock_owned_by_user(sk) && !ipv6_addr_v4mapped(&sk->sk_v6_daddr)) + ip6_datagram_dst_update(sk, false); + bh_unlock_sock(sk); } EXPORT_SYMBOL_GPL(ip6_sk_update_pmtu); @@ -1737,6 +1750,8 @@ static int ip6_convert_metrics(struct mx6_config *mxc, } else { val = nla_get_u32(nla); } + if (type == RTAX_HOPLIMIT && val > 255) + val = 255; if (type == RTAX_FEATURES && (val & ~RTAX_FEATURE_MASK)) goto err; diff --git a/net/ipv6/sit.c b/net/ipv6/sit.c index 2066d1c25..83384308d 100644 --- a/net/ipv6/sit.c +++ b/net/ipv6/sit.c @@ -475,7 +475,7 @@ static void ipip6_tunnel_uninit(struct net_device *dev) ipip6_tunnel_unlink(sitn, tunnel); ipip6_tunnel_del_prl(tunnel, NULL); } - ip_tunnel_dst_reset_all(tunnel); + dst_cache_reset(&tunnel->dst_cache); dev_put(dev); } @@ -681,14 +681,16 @@ static int ipip6_rcv(struct sk_buff *skb) skb->mac_header = skb->network_header; skb_reset_network_header(skb); IPCB(skb)->flags = 0; - skb->protocol = htons(ETH_P_IPV6); + skb->dev = tunnel->dev; if (packet_is_spoofed(skb, iph, tunnel)) { tunnel->dev->stats.rx_errors++; goto out; } - __skb_tunnel_rx(skb, tunnel->dev, tunnel->net); + if (iptunnel_pull_header(skb, 0, htons(ETH_P_IPV6), + !net_eq(tunnel->net, dev_net(tunnel->dev)))) + goto out; err = IP_ECN_decapsulate(iph, skb); if (unlikely(err)) { @@ -740,7 +742,7 @@ static int ipip_rcv(struct sk_buff *skb) if (!xfrm4_policy_check(NULL, XFRM_POLICY_IN, skb)) goto drop; - if (iptunnel_pull_header(skb, 0, tpi.proto)) + if (iptunnel_pull_header(skb, 0, tpi.proto, false)) goto drop; return ip_tunnel_rcv(tunnel, skb, &tpi, NULL, log_ecn_error); } @@ -911,7 +913,7 @@ static netdev_tx_t ipip6_tunnel_xmit(struct sk_buff *skb, goto tx_error; } - skb = iptunnel_handle_offloads(skb, false, SKB_GSO_SIT); + skb = iptunnel_handle_offloads(skb, SKB_GSO_SIT); if (IS_ERR(skb)) { ip_rt_put(rt); goto out; @@ -1000,7 +1002,7 @@ static netdev_tx_t ipip_tunnel_xmit(struct sk_buff *skb, struct net_device *dev) struct ip_tunnel *tunnel = netdev_priv(dev); const struct iphdr *tiph = &tunnel->parms.iph; - skb = iptunnel_handle_offloads(skb, false, SKB_GSO_IPIP); + skb = iptunnel_handle_offloads(skb, SKB_GSO_IPIP); if (IS_ERR(skb)) goto out; @@ -1093,7 +1095,7 @@ static void ipip6_tunnel_update(struct ip_tunnel *t, struct ip_tunnel_parm *p) t->parms.link = p->link; ipip6_tunnel_bind_dev(t->dev); } - ip_tunnel_dst_reset_all(t); + dst_cache_reset(&t->dst_cache); netdev_state_change(t->dev); } @@ -1124,7 +1126,7 @@ static int ipip6_tunnel_update_6rd(struct ip_tunnel *t, t->ip6rd.relay_prefix = relay_prefix; t->ip6rd.prefixlen = ip6rd->prefixlen; t->ip6rd.relay_prefixlen = ip6rd->relay_prefixlen; - ip_tunnel_dst_reset_all(t); + dst_cache_reset(&t->dst_cache); netdev_state_change(t->dev); return 0; } @@ -1278,7 +1280,7 @@ ipip6_tunnel_ioctl(struct net_device *dev, struct ifreq *ifr, int cmd) err = ipip6_tunnel_add_prl(t, &prl, cmd == SIOCCHGPRL); break; } - ip_tunnel_dst_reset_all(t); + dst_cache_reset(&t->dst_cache); netdev_state_change(dev); break; @@ -1339,7 +1341,7 @@ static void ipip6_dev_free(struct net_device *dev) { struct ip_tunnel *tunnel = netdev_priv(dev); - free_percpu(tunnel->dst_cache); + dst_cache_destroy(&tunnel->dst_cache); free_percpu(dev->tstats); free_netdev(dev); } @@ -1372,6 +1374,7 @@ static void ipip6_tunnel_setup(struct net_device *dev) static int ipip6_tunnel_init(struct net_device *dev) { struct ip_tunnel *tunnel = netdev_priv(dev); + int err; tunnel->dev = dev; tunnel->net = dev_net(dev); @@ -1382,10 +1385,10 @@ static int ipip6_tunnel_init(struct net_device *dev) if (!dev->tstats) return -ENOMEM; - tunnel->dst_cache = alloc_percpu(struct ip_tunnel_dst); - if (!tunnel->dst_cache) { + err = dst_cache_init(&tunnel->dst_cache, GFP_KERNEL); + if (err) { free_percpu(dev->tstats); - return -ENOMEM; + return err; } return 0; diff --git a/net/ipv6/syncookies.c b/net/ipv6/syncookies.c index 2906ef207..aab91fa86 100644 --- a/net/ipv6/syncookies.c +++ b/net/ipv6/syncookies.c @@ -41,8 +41,7 @@ static __u16 const msstab[] = { 9000 - 60, }; -static DEFINE_PER_CPU(__u32 [16 + 5 + SHA_WORKSPACE_WORDS], - ipv6_cookie_scratch); +static DEFINE_PER_CPU(__u32 [16 + 5 + SHA_WORKSPACE_WORDS], ipv6_cookie_scratch); static u32 cookie_hash(const struct in6_addr *saddr, const struct in6_addr *daddr, __be16 sport, __be16 dport, u32 count, int c) @@ -148,7 +147,7 @@ struct sock *cookie_v6_check(struct sock *sk, struct sk_buff *skb) struct dst_entry *dst; __u8 rcv_wscale; - if (!sysctl_tcp_syncookies || !th->ack || th->rst) + if (!sock_net(sk)->ipv4.sysctl_tcp_syncookies || !th->ack || th->rst) goto out; if (tcp_synq_no_recent_overflow(sk)) diff --git a/net/ipv6/tcp_ipv6.c b/net/ipv6/tcp_ipv6.c index 62e167ec1..31959f7fa 100644 --- a/net/ipv6/tcp_ipv6.c +++ b/net/ipv6/tcp_ipv6.c @@ -67,7 +67,7 @@ #include <linux/proc_fs.h> #include <linux/seq_file.h> -#include <linux/crypto.h> +#include <crypto/hash.h> #include <linux/scatterlist.h> static void tcp_v6_send_reset(const struct sock *sk, struct sk_buff *skb); @@ -557,7 +557,8 @@ static int tcp_v6_md5_hash_pseudoheader(struct tcp_md5sig_pool *hp, bp->len = cpu_to_be32(nbytes); sg_init_one(&sg, bp, sizeof(*bp)); - return crypto_hash_update(&hp->md5_desc, &sg, sizeof(*bp)); + ahash_request_set_crypt(hp->md5_req, &sg, NULL, sizeof(*bp)); + return crypto_ahash_update(hp->md5_req); } static int tcp_v6_md5_hash_hdr(char *md5_hash, struct tcp_md5sig_key *key, @@ -565,14 +566,14 @@ static int tcp_v6_md5_hash_hdr(char *md5_hash, struct tcp_md5sig_key *key, const struct tcphdr *th) { struct tcp_md5sig_pool *hp; - struct hash_desc *desc; + struct ahash_request *req; hp = tcp_get_md5sig_pool(); if (!hp) goto clear_hash_noput; - desc = &hp->md5_desc; + req = hp->md5_req; - if (crypto_hash_init(desc)) + if (crypto_ahash_init(req)) goto clear_hash; if (tcp_v6_md5_hash_pseudoheader(hp, daddr, saddr, th->doff << 2)) goto clear_hash; @@ -580,7 +581,8 @@ static int tcp_v6_md5_hash_hdr(char *md5_hash, struct tcp_md5sig_key *key, goto clear_hash; if (tcp_md5_hash_key(hp, key)) goto clear_hash; - if (crypto_hash_final(desc, md5_hash)) + ahash_request_set_crypt(req, NULL, md5_hash, 0); + if (crypto_ahash_final(req)) goto clear_hash; tcp_put_md5sig_pool(); @@ -600,7 +602,7 @@ static int tcp_v6_md5_hash_skb(char *md5_hash, { const struct in6_addr *saddr, *daddr; struct tcp_md5sig_pool *hp; - struct hash_desc *desc; + struct ahash_request *req; const struct tcphdr *th = tcp_hdr(skb); if (sk) { /* valid for establish/request sockets */ @@ -615,9 +617,9 @@ static int tcp_v6_md5_hash_skb(char *md5_hash, hp = tcp_get_md5sig_pool(); if (!hp) goto clear_hash_noput; - desc = &hp->md5_desc; + req = hp->md5_req; - if (crypto_hash_init(desc)) + if (crypto_ahash_init(req)) goto clear_hash; if (tcp_v6_md5_hash_pseudoheader(hp, daddr, saddr, skb->len)) @@ -628,7 +630,8 @@ static int tcp_v6_md5_hash_skb(char *md5_hash, goto clear_hash; if (tcp_md5_hash_key(hp, key)) goto clear_hash; - if (crypto_hash_final(desc, md5_hash)) + ahash_request_set_crypt(req, NULL, md5_hash, 0); + if (crypto_ahash_final(req)) goto clear_hash; tcp_put_md5sig_pool(); @@ -823,8 +826,13 @@ static void tcp_v6_send_response(const struct sock *sk, struct sk_buff *skb, u32 fl6.flowi6_proto = IPPROTO_TCP; if (rt6_need_strict(&fl6.daddr) && !oif) fl6.flowi6_oif = tcp_v6_iif(skb); - else + else { + if (!oif && netif_index_is_l3_master(net, skb->skb_iif)) + oif = skb->skb_iif; + fl6.flowi6_oif = oif; + } + fl6.flowi6_mark = IP6_REPLY_MARK(net, skb->mark); fl6.fl6_dport = t1->dest; fl6.fl6_sport = t1->source; @@ -883,7 +891,8 @@ static void tcp_v6_send_reset(const struct sock *sk, struct sk_buff *skb) * no RST generated if md5 hash doesn't match. */ sk1 = inet6_lookup_listener(dev_net(skb_dst(skb)->dev), - &tcp_hashinfo, &ipv6h->saddr, + &tcp_hashinfo, NULL, 0, + &ipv6h->saddr, th->source, &ipv6h->daddr, ntohs(th->source), tcp_v6_iif(skb)); if (!sk1) @@ -1400,8 +1409,8 @@ static int tcp_v6_rcv(struct sk_buff *skb) hdr = ipv6_hdr(skb); lookup: - sk = __inet6_lookup_skb(&tcp_hashinfo, skb, th->source, th->dest, - inet6_iif(skb)); + sk = __inet6_lookup_skb(&tcp_hashinfo, skb, __tcp_hdrlen(th), + th->source, th->dest, inet6_iif(skb)); if (!sk) goto no_tcp_socket; @@ -1466,7 +1475,7 @@ process: sk_incoming_cpu_update(sk); bh_lock_sock_nested(sk); - tcp_sk(sk)->segs_in += max_t(u16, 1, skb_shinfo(skb)->gso_segs); + tcp_segs_in(tcp_sk(sk), skb); ret = 0; if (!sock_owned_by_user(sk)) { if (!tcp_prequeue(sk, skb)) @@ -1525,6 +1534,7 @@ do_time_wait: struct sock *sk2; sk2 = inet6_lookup_listener(dev_net(skb->dev), &tcp_hashinfo, + skb, __tcp_hdrlen(th), &ipv6_hdr(skb)->saddr, th->source, &ipv6_hdr(skb)->daddr, ntohs(th->dest), tcp_v6_iif(skb)); @@ -1890,7 +1900,7 @@ struct proto tcpv6_prot = { .sendpage = tcp_sendpage, .backlog_rcv = tcp_v6_do_rcv, .release_cb = tcp_release_cb, - .hash = inet_hash, + .hash = inet6_hash, .unhash = inet_unhash, .get_port = inet_csk_get_port, .enter_memory_pressure = tcp_enter_memory_pressure, diff --git a/net/ipv6/udp.c b/net/ipv6/udp.c index 6794120f5..6bc5c664f 100644 --- a/net/ipv6/udp.c +++ b/net/ipv6/udp.c @@ -37,6 +37,7 @@ #include <linux/slab.h> #include <asm/uaccess.h> +#include <net/addrconf.h> #include <net/ndisc.h> #include <net/protocol.h> #include <net/transp_v6.h> @@ -77,49 +78,6 @@ static u32 udp6_ehashfn(const struct net *net, udp_ipv6_hash_secret + net_hash_mix(net)); } -/* match_wildcard == true: IPV6_ADDR_ANY equals to any IPv6 addresses if IPv6 - * only, and any IPv4 addresses if not IPv6 only - * match_wildcard == false: addresses must be exactly the same, i.e. - * IPV6_ADDR_ANY only equals to IPV6_ADDR_ANY, - * and 0.0.0.0 equals to 0.0.0.0 only - */ -int ipv6_rcv_saddr_equal(const struct sock *sk, const struct sock *sk2, - bool match_wildcard) -{ - const struct in6_addr *sk2_rcv_saddr6 = inet6_rcv_saddr(sk2); - int sk2_ipv6only = inet_v6_ipv6only(sk2); - int addr_type = ipv6_addr_type(&sk->sk_v6_rcv_saddr); - int addr_type2 = sk2_rcv_saddr6 ? ipv6_addr_type(sk2_rcv_saddr6) : IPV6_ADDR_MAPPED; - - /* if both are mapped, treat as IPv4 */ - if (addr_type == IPV6_ADDR_MAPPED && addr_type2 == IPV6_ADDR_MAPPED) { - if (!sk2_ipv6only) { - if (sk->sk_rcv_saddr == sk2->sk_rcv_saddr) - return 1; - if (!sk->sk_rcv_saddr || !sk2->sk_rcv_saddr) - return match_wildcard; - } - return 0; - } - - if (addr_type == IPV6_ADDR_ANY && addr_type2 == IPV6_ADDR_ANY) - return 1; - - if (addr_type2 == IPV6_ADDR_ANY && match_wildcard && - !(sk2_ipv6only && addr_type == IPV6_ADDR_MAPPED)) - return 1; - - if (addr_type == IPV6_ADDR_ANY && match_wildcard && - !(ipv6_only_sock(sk) && addr_type2 == IPV6_ADDR_MAPPED)) - return 1; - - if (sk2_rcv_saddr6 && - ipv6_addr_equal(&sk->sk_v6_rcv_saddr, sk2_rcv_saddr6)) - return 1; - - return 0; -} - static u32 udp6_portaddr_hash(const struct net *net, const struct in6_addr *addr6, unsigned int port) @@ -590,6 +548,7 @@ void __udp6_lib_err(struct sk_buff *skb, struct inet6_skb_parm *opt, const struct in6_addr *daddr = &hdr->daddr; struct udphdr *uh = (struct udphdr *)(skb->data+offset); struct sock *sk; + int harderr; int err; struct net *net = dev_net(skb->dev); @@ -601,26 +560,27 @@ void __udp6_lib_err(struct sk_buff *skb, struct inet6_skb_parm *opt, return; } + harderr = icmpv6_err_convert(type, code, &err); + np = inet6_sk(sk); + if (type == ICMPV6_PKT_TOOBIG) { if (!ip6_sk_accept_pmtu(sk)) goto out; ip6_sk_update_pmtu(skb, sk, info); + if (np->pmtudisc != IPV6_PMTUDISC_DONT) + harderr = 1; } if (type == NDISC_REDIRECT) { ip6_sk_redirect(skb, sk); goto out; } - np = inet6_sk(sk); - - if (!icmpv6_err_convert(type, code, &err) && !np->recverr) - goto out; - - if (sk->sk_state != TCP_ESTABLISHED && !np->recverr) - goto out; - - if (np->recverr) + if (!np->recverr) { + if (!harderr || sk->sk_state != TCP_ESTABLISHED) + goto out; + } else { ipv6_icmp_error(sk, skb, err, uh->dest, ntohl(info), (u8 *)(uh+1)); + } sk->sk_err = err; sk->sk_error_report(sk); @@ -1579,6 +1539,7 @@ struct proto udpv6_prot = { .sendmsg = udpv6_sendmsg, .recvmsg = udpv6_recvmsg, .backlog_rcv = __udpv6_queue_rcv_skb, + .release_cb = ip6_datagram_release_cb, .hash = udp_lib_hash, .unhash = udp_lib_unhash, .rehash = udp_v6_rehash, diff --git a/net/ipv6/udp_offload.c b/net/ipv6/udp_offload.c index 7441e1e63..2b0fbe692 100644 --- a/net/ipv6/udp_offload.c +++ b/net/ipv6/udp_offload.c @@ -81,12 +81,18 @@ static struct sk_buff *udp6_ufo_fragment(struct sk_buff *skb, csum = skb_checksum(skb, 0, skb->len, 0); uh->check = udp_v6_check(skb->len, &ipv6h->saddr, &ipv6h->daddr, csum); - if (uh->check == 0) uh->check = CSUM_MANGLED_0; skb->ip_summed = CHECKSUM_NONE; + /* If there is no outer header we can fake a checksum offload + * due to the fact that we have already done the checksum in + * software prior to segmenting the frame. + */ + if (!skb->encap_hdr_csum) + features |= NETIF_F_HW_CSUM; + /* Check if there is enough headroom to insert fragment header. */ tnl_hlen = skb_tnl_header_len(skb); if (skb->mac_header < (tnl_hlen + frag_hdr_sz)) { |