summaryrefslogtreecommitdiff
path: root/net/ipv6/route.c
diff options
context:
space:
mode:
Diffstat (limited to 'net/ipv6/route.c')
-rw-r--r--net/ipv6/route.c209
1 files changed, 137 insertions, 72 deletions
diff --git a/net/ipv6/route.c b/net/ipv6/route.c
index dd6ebba58..946880ad4 100644
--- a/net/ipv6/route.c
+++ b/net/ipv6/route.c
@@ -54,10 +54,13 @@
#include <net/tcp.h>
#include <linux/rtnetlink.h>
#include <net/dst.h>
+#include <net/dst_metadata.h>
#include <net/xfrm.h>
#include <net/netevent.h>
#include <net/netlink.h>
#include <net/nexthop.h>
+#include <net/lwtunnel.h>
+#include <net/ip_tunnels.h>
#include <asm/uaccess.h>
@@ -245,12 +248,6 @@ static void ip6_rt_blackhole_redirect(struct dst_entry *dst, struct sock *sk,
{
}
-static u32 *ip6_rt_blackhole_cow_metrics(struct dst_entry *dst,
- unsigned long old)
-{
- return NULL;
-}
-
static struct dst_ops ip6_dst_blackhole_ops = {
.family = AF_INET6,
.destroy = ip6_dst_destroy,
@@ -259,7 +256,7 @@ static struct dst_ops ip6_dst_blackhole_ops = {
.default_advmss = ip6_default_advmss,
.update_pmtu = ip6_rt_blackhole_update_pmtu,
.redirect = ip6_rt_blackhole_redirect,
- .cow_metrics = ip6_rt_blackhole_cow_metrics,
+ .cow_metrics = dst_cow_metrics_generic,
.neigh_lookup = ip6_neigh_lookup,
};
@@ -316,6 +313,15 @@ static const struct rt6_info ip6_blk_hole_entry_template = {
#endif
+static void rt6_info_init(struct rt6_info *rt)
+{
+ struct dst_entry *dst = &rt->dst;
+
+ memset(dst + 1, 0, sizeof(*rt) - sizeof(*dst));
+ INIT_LIST_HEAD(&rt->rt6i_siblings);
+ INIT_LIST_HEAD(&rt->rt6i_uncached);
+}
+
/* allocate dst with ip6_dst_ops */
static struct rt6_info *__ip6_dst_alloc(struct net *net,
struct net_device *dev,
@@ -324,13 +330,9 @@ static struct rt6_info *__ip6_dst_alloc(struct net *net,
struct rt6_info *rt = dst_alloc(&net->ipv6.ip6_dst_ops, dev,
0, DST_OBSOLETE_FORCE_CHK, flags);
- if (rt) {
- struct dst_entry *dst = &rt->dst;
+ if (rt)
+ rt6_info_init(rt);
- memset(dst + 1, 0, sizeof(*rt) - sizeof(*dst));
- INIT_LIST_HEAD(&rt->rt6i_siblings);
- INIT_LIST_HEAD(&rt->rt6i_uncached);
- }
return rt;
}
@@ -536,13 +538,14 @@ static void rt6_probe_deferred(struct work_struct *w)
container_of(w, struct __rt6_probe_work, work);
addrconf_addr_solict_mult(&work->target, &mcaddr);
- ndisc_send_ns(work->dev, NULL, &work->target, &mcaddr, NULL);
+ ndisc_send_ns(work->dev, NULL, &work->target, &mcaddr, NULL, NULL);
dev_put(work->dev);
kfree(work);
}
static void rt6_probe(struct rt6_info *rt)
{
+ struct __rt6_probe_work *work;
struct neighbour *neigh;
/*
* Okay, this does not seem to be appropriate
@@ -557,34 +560,33 @@ static void rt6_probe(struct rt6_info *rt)
rcu_read_lock_bh();
neigh = __ipv6_neigh_lookup_noref(rt->dst.dev, &rt->rt6i_gateway);
if (neigh) {
- write_lock(&neigh->lock);
if (neigh->nud_state & NUD_VALID)
goto out;
- }
-
- if (!neigh ||
- time_after(jiffies, neigh->updated + rt->rt6i_idev->cnf.rtr_probe_interval)) {
- struct __rt6_probe_work *work;
+ work = NULL;
+ write_lock(&neigh->lock);
+ if (!(neigh->nud_state & NUD_VALID) &&
+ time_after(jiffies,
+ neigh->updated +
+ rt->rt6i_idev->cnf.rtr_probe_interval)) {
+ work = kmalloc(sizeof(*work), GFP_ATOMIC);
+ if (work)
+ __neigh_set_probe_once(neigh);
+ }
+ write_unlock(&neigh->lock);
+ } else {
work = kmalloc(sizeof(*work), GFP_ATOMIC);
+ }
- if (neigh && work)
- __neigh_set_probe_once(neigh);
-
- if (neigh)
- write_unlock(&neigh->lock);
+ if (work) {
+ INIT_WORK(&work->work, rt6_probe_deferred);
+ work->target = rt->rt6i_gateway;
+ dev_hold(rt->dst.dev);
+ work->dev = rt->dst.dev;
+ schedule_work(&work->work);
+ }
- if (work) {
- INIT_WORK(&work->work, rt6_probe_deferred);
- work->target = rt->rt6i_gateway;
- dev_hold(rt->dst.dev);
- work->dev = rt->dst.dev;
- schedule_work(&work->work);
- }
- } else {
out:
- write_unlock(&neigh->lock);
- }
rcu_read_unlock_bh();
}
#else
@@ -663,6 +665,12 @@ static struct rt6_info *find_match(struct rt6_info *rt, int oif, int strict,
{
int m;
bool match_do_rr = false;
+ struct inet6_dev *idev = rt->rt6i_idev;
+ struct net_device *dev = rt->dst.dev;
+
+ if (dev && !netif_carrier_ok(dev) &&
+ idev->cnf.ignore_routes_with_linkdown)
+ goto out;
if (rt6_check_expired(rt))
goto out;
@@ -1060,6 +1068,9 @@ static struct rt6_info *ip6_pol_route(struct net *net, struct fib6_table *table,
fn = fib6_lookup(&table->tb6_root, &fl6->daddr, &fl6->saddr);
saved_fn = fn;
+ if (fl6->flowi6_flags & FLOWI_FLAG_SKIP_NH_OIF)
+ oif = 0;
+
redo_rt6_select:
rt = rt6_select(fn, oif, strict);
if (rt->rt6i_nsiblings)
@@ -1155,6 +1166,7 @@ void ip6_route_input(struct sk_buff *skb)
const struct ipv6hdr *iph = ipv6_hdr(skb);
struct net *net = dev_net(skb->dev);
int flags = RT6_LOOKUP_F_HAS_SADDR;
+ struct ip_tunnel_info *tun_info;
struct flowi6 fl6 = {
.flowi6_iif = skb->dev->ifindex,
.daddr = iph->daddr,
@@ -1164,6 +1176,10 @@ void ip6_route_input(struct sk_buff *skb)
.flowi6_proto = iph->nexthdr,
};
+ tun_info = skb_tunnel_info(skb);
+ if (tun_info && !(tun_info->mode & IP_TUNNEL_INFO_TX))
+ fl6.flowi6_tun_key.tun_id = tun_info->key.tun_id;
+ skb_dst_drop(skb);
skb_dst_set(skb, ip6_route_input_lookup(net, skb->dev, &fl6, flags));
}
@@ -1177,13 +1193,16 @@ struct dst_entry *ip6_route_output(struct net *net, const struct sock *sk,
struct flowi6 *fl6)
{
int flags = 0;
+ bool any_src;
fl6->flowi6_iif = LOOPBACK_IFINDEX;
- if ((sk && sk->sk_bound_dev_if) || rt6_need_strict(&fl6->daddr))
+ any_src = ipv6_addr_any(&fl6->saddr);
+ if ((sk && sk->sk_bound_dev_if) || rt6_need_strict(&fl6->daddr) ||
+ (fl6->flowi6_oif && any_src))
flags |= RT6_LOOKUP_F_IFACE;
- if (!ipv6_addr_any(&fl6->saddr))
+ if (!any_src)
flags |= RT6_LOOKUP_F_HAS_SADDR;
else if (sk)
flags |= rt6_srcprefs2flags(inet6_sk(sk)->srcprefs);
@@ -1199,24 +1218,20 @@ struct dst_entry *ip6_blackhole_route(struct net *net, struct dst_entry *dst_ori
rt = dst_alloc(&ip6_dst_blackhole_ops, ort->dst.dev, 1, DST_OBSOLETE_NONE, 0);
if (rt) {
- new = &rt->dst;
-
- memset(new + 1, 0, sizeof(*rt) - sizeof(*new));
+ rt6_info_init(rt);
+ new = &rt->dst;
new->__use = 1;
new->input = dst_discard;
new->output = dst_discard_sk;
- if (dst_metrics_read_only(&ort->dst))
- new->_metrics = ort->dst._metrics;
- else
- dst_copy_metrics(new, &ort->dst);
+ dst_copy_metrics(new, &ort->dst);
rt->rt6i_idev = ort->rt6i_idev;
if (rt->rt6i_idev)
in6_dev_hold(rt->rt6i_idev);
rt->rt6i_gateway = ort->rt6i_gateway;
- rt->rt6i_flags = ort->rt6i_flags;
+ rt->rt6i_flags = ort->rt6i_flags & ~RTF_PCPU;
rt->rt6i_metric = 0;
memcpy(&rt->rt6i_dst, &ort->rt6i_dst, sizeof(struct rt6key));
@@ -1309,8 +1324,7 @@ static void ip6_link_failure(struct sk_buff *skb)
if (rt) {
if (rt->rt6i_flags & RTF_CACHE) {
dst_hold(&rt->dst);
- if (ip6_del_rt(rt))
- dst_free(&rt->dst);
+ ip6_del_rt(rt);
} else if (rt->rt6i_node && (rt->rt6i_flags & RTF_DEFAULT)) {
rt->rt6i_node->fn_sernum = -1;
}
@@ -1685,6 +1699,7 @@ out:
static int ip6_convert_metrics(struct mx6_config *mxc,
const struct fib6_config *cfg)
{
+ bool ecn_ca = false;
struct nlattr *nla;
int remaining;
u32 *mp;
@@ -1698,30 +1713,36 @@ static int ip6_convert_metrics(struct mx6_config *mxc,
nla_for_each_attr(nla, cfg->fc_mx, cfg->fc_mx_len, remaining) {
int type = nla_type(nla);
+ u32 val;
+
+ if (!type)
+ continue;
+ if (unlikely(type > RTAX_MAX))
+ goto err;
- if (type) {
- u32 val;
+ if (type == RTAX_CC_ALGO) {
+ char tmp[TCP_CA_NAME_MAX];
- if (unlikely(type > RTAX_MAX))
+ nla_strlcpy(tmp, nla, sizeof(tmp));
+ val = tcp_ca_get_key_by_name(tmp, &ecn_ca);
+ if (val == TCP_CA_UNSPEC)
goto err;
- if (type == RTAX_CC_ALGO) {
- char tmp[TCP_CA_NAME_MAX];
+ } else {
+ val = nla_get_u32(nla);
+ }
+ if (type == RTAX_FEATURES && (val & ~RTAX_FEATURE_MASK))
+ goto err;
- nla_strlcpy(tmp, nla, sizeof(tmp));
- val = tcp_ca_get_key_by_name(tmp);
- if (val == TCP_CA_UNSPEC)
- goto err;
- } else {
- val = nla_get_u32(nla);
- }
+ mp[type - 1] = val;
+ __set_bit(type - 1, mxc->mx_valid);
+ }
- mp[type - 1] = val;
- __set_bit(type - 1, mxc->mx_valid);
- }
+ if (ecn_ca) {
+ __set_bit(RTAX_FEATURES - 1, mxc->mx_valid);
+ mp[RTAX_FEATURES - 1] |= DST_FEATURE_ECN_CA;
}
mxc->mx = mp;
-
return 0;
err:
kfree(mp);
@@ -1801,6 +1822,25 @@ int ip6_route_info_create(struct fib6_config *cfg, struct rt6_info **rt_ret)
rt->dst.output = ip6_output;
+ if (cfg->fc_encap) {
+ struct lwtunnel_state *lwtstate;
+
+ err = lwtunnel_build_state(dev, cfg->fc_encap_type,
+ cfg->fc_encap, AF_INET6, cfg,
+ &lwtstate);
+ if (err)
+ goto out;
+ rt->dst.lwtstate = lwtstate_get(lwtstate);
+ if (lwtunnel_output_redirect(rt->dst.lwtstate)) {
+ rt->dst.lwtstate->orig_output = rt->dst.output;
+ rt->dst.output = lwtunnel_output;
+ }
+ if (lwtunnel_input_redirect(rt->dst.lwtstate)) {
+ rt->dst.lwtstate->orig_input = rt->dst.input;
+ rt->dst.input = lwtunnel_input;
+ }
+ }
+
ipv6_addr_prefix(&rt->rt6i_dst.addr, &cfg->fc_dst, cfg->fc_dst_len);
rt->rt6i_dst.plen = cfg->fc_dst_len;
if (rt->rt6i_dst.plen == 128)
@@ -1847,9 +1887,11 @@ int ip6_route_info_create(struct fib6_config *cfg, struct rt6_info **rt_ret)
rt->dst.input = ip6_pkt_prohibit;
break;
case RTN_THROW:
+ case RTN_UNREACHABLE:
default:
rt->dst.error = (cfg->fc_type == RTN_THROW) ? -EAGAIN
- : -ENETUNREACH;
+ : (cfg->fc_type == RTN_UNREACHABLE)
+ ? -EHOSTUNREACH : -ENETUNREACH;
rt->dst.output = ip6_pkt_discard_out;
rt->dst.input = ip6_pkt_discard;
break;
@@ -1989,7 +2031,8 @@ static int __ip6_del_rt(struct rt6_info *rt, struct nl_info *info)
struct fib6_table *table;
struct net *net = dev_net(rt->dst.dev);
- if (rt == net->ipv6.ip6_null_entry) {
+ if (rt == net->ipv6.ip6_null_entry ||
+ rt->dst.flags & DST_NOCACHE) {
err = -ENOENT;
goto out;
}
@@ -2204,6 +2247,7 @@ static void ip6_rt_copy_init(struct rt6_info *rt, struct rt6_info *ort)
#endif
rt->rt6i_prefsrc = ort->rt6i_prefsrc;
rt->rt6i_table = ort->rt6i_table;
+ rt->dst.lwtstate = lwtstate_get(ort->dst.lwtstate);
}
#ifdef CONFIG_IPV6_ROUTE_INFO
@@ -2475,6 +2519,7 @@ struct rt6_info *addrconf_dst_alloc(struct inet6_dev *idev,
rt->rt6i_dst.addr = *addr;
rt->rt6i_dst.plen = 128;
rt->rt6i_table = fib6_get_table(net, RT6_TABLE_LOCAL);
+ rt->dst.flags |= DST_NOCACHE;
atomic_set(&rt->dst.__refcnt, 1);
@@ -2653,6 +2698,8 @@ static const struct nla_policy rtm_ipv6_policy[RTA_MAX+1] = {
[RTA_METRICS] = { .type = NLA_NESTED },
[RTA_MULTIPATH] = { .len = sizeof(struct rtnexthop) },
[RTA_PREF] = { .type = NLA_U8 },
+ [RTA_ENCAP_TYPE] = { .type = NLA_U16 },
+ [RTA_ENCAP] = { .type = NLA_NESTED },
};
static int rtm_to_fib6_config(struct sk_buff *skb, struct nlmsghdr *nlh,
@@ -2747,6 +2794,12 @@ static int rtm_to_fib6_config(struct sk_buff *skb, struct nlmsghdr *nlh,
cfg->fc_flags |= RTF_PREF(pref);
}
+ if (tb[RTA_ENCAP])
+ cfg->fc_encap = tb[RTA_ENCAP];
+
+ if (tb[RTA_ENCAP_TYPE])
+ cfg->fc_encap_type = nla_get_u16(tb[RTA_ENCAP_TYPE]);
+
err = 0;
errout:
return err;
@@ -2838,6 +2891,10 @@ static int ip6_route_multipath_add(struct fib6_config *cfg)
r_cfg.fc_gateway = nla_get_in6_addr(nla);
r_cfg.fc_flags |= RTF_GATEWAY;
}
+ r_cfg.fc_encap = nla_find(attrs, attrlen, RTA_ENCAP);
+ nla = nla_find(attrs, attrlen, RTA_ENCAP_TYPE);
+ if (nla)
+ r_cfg.fc_encap_type = nla_get_u16(nla);
}
err = ip6_route_info_create(&r_cfg, &rt);
@@ -2891,8 +2948,7 @@ cleanup:
list_for_each_entry_safe(nh, nh_safe, &rt6_nh_list, next) {
if (nh->rt6_info)
dst_free(&nh->rt6_info->dst);
- if (nh->mxc.mx)
- kfree(nh->mxc.mx);
+ kfree(nh->mxc.mx);
list_del(&nh->next);
kfree(nh);
}
@@ -2967,7 +3023,7 @@ static int inet6_rtm_newroute(struct sk_buff *skb, struct nlmsghdr *nlh)
return ip6_route_add(&cfg);
}
-static inline size_t rt6_nlmsg_size(void)
+static inline size_t rt6_nlmsg_size(struct rt6_info *rt)
{
return NLMSG_ALIGN(sizeof(struct rtmsg))
+ nla_total_size(16) /* RTA_SRC */
@@ -2981,7 +3037,8 @@ static inline size_t rt6_nlmsg_size(void)
+ RTAX_MAX * nla_total_size(4) /* RTA_METRICS */
+ nla_total_size(sizeof(struct rta_cacheinfo))
+ nla_total_size(TCP_CA_NAME_MAX) /* RTAX_CC_ALGO */
- + nla_total_size(1); /* RTA_PREF */
+ + nla_total_size(1) /* RTA_PREF */
+ + lwtunnel_get_encap_size(rt->dst.lwtstate);
}
static int rt6_fill_node(struct net *net,
@@ -3042,6 +3099,11 @@ static int rt6_fill_node(struct net *net,
else
rtm->rtm_type = RTN_UNICAST;
rtm->rtm_flags = 0;
+ if (!netif_carrier_ok(rt->dst.dev)) {
+ rtm->rtm_flags |= RTNH_F_LINKDOWN;
+ if (rt->rt6i_idev->cnf.ignore_routes_with_linkdown)
+ rtm->rtm_flags |= RTNH_F_DEAD;
+ }
rtm->rtm_scope = RT_SCOPE_UNIVERSE;
rtm->rtm_protocol = rt->rt6i_protocol;
if (rt->rt6i_flags & RTF_DYNAMIC)
@@ -3129,6 +3191,8 @@ static int rt6_fill_node(struct net *net,
if (nla_put_u8(skb, RTA_PREF, IPV6_EXTRACT_PREF(rt->rt6i_flags)))
goto nla_put_failure;
+ lwtunnel_fill_encap(skb, rt->dst.lwtstate);
+
nlmsg_end(skb, nlh);
return 0;
@@ -3245,7 +3309,8 @@ errout:
return err;
}
-void inet6_rt_notify(int event, struct rt6_info *rt, struct nl_info *info)
+void inet6_rt_notify(int event, struct rt6_info *rt, struct nl_info *info,
+ unsigned int nlm_flags)
{
struct sk_buff *skb;
struct net *net = info->nl_net;
@@ -3255,12 +3320,12 @@ void inet6_rt_notify(int event, struct rt6_info *rt, struct nl_info *info)
err = -ENOBUFS;
seq = info->nlh ? info->nlh->nlmsg_seq : 0;
- skb = nlmsg_new(rt6_nlmsg_size(), gfp_any());
+ skb = nlmsg_new(rt6_nlmsg_size(rt), gfp_any());
if (!skb)
goto errout;
err = rt6_fill_node(net, skb, rt, NULL, NULL, 0,
- event, info->portid, seq, 0, 0, 0);
+ event, info->portid, seq, 0, 0, nlm_flags);
if (err < 0) {
/* -EMSGSIZE implies BUG in rt6_nlmsg_size() */
WARN_ON(err == -EMSGSIZE);