diff options
Diffstat (limited to 'net/ipv4/fib_semantics.c')
-rw-r--r-- | net/ipv4/fib_semantics.c | 260 |
1 files changed, 214 insertions, 46 deletions
diff --git a/net/ipv4/fib_semantics.c b/net/ipv4/fib_semantics.c index 3a06586b1..ef5892f5e 100644 --- a/net/ipv4/fib_semantics.c +++ b/net/ipv4/fib_semantics.c @@ -42,6 +42,7 @@ #include <net/ip_fib.h> #include <net/netlink.h> #include <net/nexthop.h> +#include <net/lwtunnel.h> #include "fib_lookup.h" @@ -208,6 +209,7 @@ static void free_fib_info_rcu(struct rcu_head *head) change_nexthops(fi) { if (nexthop_nh->nh_dev) dev_put(nexthop_nh->nh_dev); + lwtstate_put(nexthop_nh->nh_lwtstate); free_nh_exceptions(nexthop_nh); rt_fibinfo_free_cpus(nexthop_nh->nh_pcpu_rth_output); rt_fibinfo_free(&nexthop_nh->nh_rth_input); @@ -266,6 +268,7 @@ static inline int nh_comp(const struct fib_info *fi, const struct fib_info *ofi) #ifdef CONFIG_IP_ROUTE_CLASSID nh->nh_tclassid != onh->nh_tclassid || #endif + lwtunnel_cmp_encap(nh->nh_lwtstate, onh->nh_lwtstate) || ((nh->nh_flags ^ onh->nh_flags) & ~RTNH_COMPARE_MASK)) return -1; onh++; @@ -366,6 +369,7 @@ static inline size_t fib_nlmsg_size(struct fib_info *fi) payload += nla_total_size((RTAX_MAX * nla_total_size(4))); if (fi->fib_nhs) { + size_t nh_encapsize = 0; /* Also handles the special case fib_nhs == 1 */ /* each nexthop is packed in an attribute */ @@ -374,8 +378,21 @@ static inline size_t fib_nlmsg_size(struct fib_info *fi) /* may contain flow and gateway attribute */ nhsize += 2 * nla_total_size(4); + /* grab encap info */ + for_nexthops(fi) { + if (nh->nh_lwtstate) { + /* RTA_ENCAP_TYPE */ + nh_encapsize += lwtunnel_get_encap_size( + nh->nh_lwtstate); + /* RTA_ENCAP */ + nh_encapsize += nla_total_size(2); + } + } endfor_nexthops(fi); + /* all nexthops are packed in a nested attribute */ - payload += nla_total_size(fi->fib_nhs * nhsize); + payload += nla_total_size((fi->fib_nhs * nhsize) + + nh_encapsize); + } return payload; @@ -421,13 +438,15 @@ static int fib_detect_death(struct fib_info *fi, int order, if (n) { state = n->nud_state; neigh_release(n); + } else { + return 0; } if (state == NUD_REACHABLE) return 0; if ((state & NUD_VALID) && order != dflt) return 0; if ((state & NUD_VALID) || - (*last_idx < 0 && order > dflt)) { + (*last_idx < 0 && order > dflt && state != NUD_INCOMPLETE)) { *last_resort = fi; *last_idx = order; } @@ -452,6 +471,9 @@ static int fib_count_nexthops(struct rtnexthop *rtnh, int remaining) static int fib_get_nhs(struct fib_info *fi, struct rtnexthop *rtnh, int remaining, struct fib_config *cfg) { + struct net *net = cfg->fc_nlinfo.nl_net; + int ret; + change_nexthops(fi) { int attrlen; @@ -475,18 +497,70 @@ static int fib_get_nhs(struct fib_info *fi, struct rtnexthop *rtnh, if (nexthop_nh->nh_tclassid) fi->fib_net->ipv4.fib_num_tclassid_users++; #endif + nla = nla_find(attrs, attrlen, RTA_ENCAP); + if (nla) { + struct lwtunnel_state *lwtstate; + struct net_device *dev = NULL; + struct nlattr *nla_entype; + + nla_entype = nla_find(attrs, attrlen, + RTA_ENCAP_TYPE); + if (!nla_entype) + goto err_inval; + if (cfg->fc_oif) + dev = __dev_get_by_index(net, cfg->fc_oif); + ret = lwtunnel_build_state(dev, nla_get_u16( + nla_entype), + nla, AF_INET, cfg, + &lwtstate); + if (ret) + goto errout; + nexthop_nh->nh_lwtstate = + lwtstate_get(lwtstate); + } } rtnh = rtnh_next(rtnh, &remaining); } endfor_nexthops(fi); return 0; + +err_inval: + ret = -EINVAL; + +errout: + return ret; } #endif +static int fib_encap_match(struct net *net, u16 encap_type, + struct nlattr *encap, + int oif, const struct fib_nh *nh, + const struct fib_config *cfg) +{ + struct lwtunnel_state *lwtstate; + struct net_device *dev = NULL; + int ret, result = 0; + + if (encap_type == LWTUNNEL_ENCAP_NONE) + return 0; + + if (oif) + dev = __dev_get_by_index(net, oif); + ret = lwtunnel_build_state(dev, encap_type, encap, + AF_INET, cfg, &lwtstate); + if (!ret) { + result = lwtunnel_cmp_encap(lwtstate, nh->nh_lwtstate); + lwtstate_free(lwtstate); + } + + return result; +} + int fib_nh_match(struct fib_config *cfg, struct fib_info *fi) { + struct net *net = cfg->fc_nlinfo.nl_net; #ifdef CONFIG_IP_ROUTE_MULTIPATH struct rtnexthop *rtnh; int remaining; @@ -496,6 +570,12 @@ int fib_nh_match(struct fib_config *cfg, struct fib_info *fi) return 1; if (cfg->fc_oif || cfg->fc_gw) { + if (cfg->fc_encap) { + if (fib_encap_match(net, cfg->fc_encap_type, + cfg->fc_encap, cfg->fc_oif, + fi->fib_nh, cfg)) + return 1; + } if ((!cfg->fc_oif || cfg->fc_oif == fi->fib_nh->nh_oif) && (!cfg->fc_gw || cfg->fc_gw == fi->fib_nh->nh_gw)) return 0; @@ -585,7 +665,7 @@ int fib_nh_match(struct fib_config *cfg, struct fib_info *fi) static int fib_check_nh(struct fib_config *cfg, struct fib_info *fi, struct fib_nh *nh) { - int err; + int err = 0; struct net *net; struct net_device *dev; @@ -594,16 +674,18 @@ static int fib_check_nh(struct fib_config *cfg, struct fib_info *fi, struct fib_result res; if (nh->nh_flags & RTNH_F_ONLINK) { + unsigned int addr_type; if (cfg->fc_scope >= RT_SCOPE_LINK) return -EINVAL; - if (inet_addr_type(net, nh->nh_gw) != RTN_UNICAST) - return -EINVAL; dev = __dev_get_by_index(net, nh->nh_oif); if (!dev) return -ENODEV; if (!(dev->flags & IFF_UP)) return -ENETDOWN; + addr_type = inet_addr_type_dev_table(net, dev, nh->nh_gw); + if (addr_type != RTN_UNICAST) + return -EINVAL; if (!netif_carrier_ok(dev)) nh->nh_flags |= RTNH_F_LINKDOWN; nh->nh_dev = dev; @@ -613,6 +695,7 @@ static int fib_check_nh(struct fib_config *cfg, struct fib_info *fi, } rcu_read_lock(); { + struct fib_table *tbl = NULL; struct flowi4 fl4 = { .daddr = nh->nh_gw, .flowi4_scope = cfg->fc_scope + 1, @@ -623,8 +706,24 @@ static int fib_check_nh(struct fib_config *cfg, struct fib_info *fi, /* It is not necessary, but requires a bit of thinking */ if (fl4.flowi4_scope < RT_SCOPE_LINK) fl4.flowi4_scope = RT_SCOPE_LINK; - err = fib_lookup(net, &fl4, &res, - FIB_LOOKUP_IGNORE_LINKSTATE); + + if (cfg->fc_table) + tbl = fib_get_table(net, cfg->fc_table); + + if (tbl) + err = fib_table_lookup(tbl, &fl4, &res, + FIB_LOOKUP_IGNORE_LINKSTATE | + FIB_LOOKUP_NOREF); + + /* on error or if no table given do full lookup. This + * is needed for example when nexthops are in the local + * table rather than the given table + */ + if (!tbl || err) { + err = fib_lookup(net, &fl4, &res, + FIB_LOOKUP_IGNORE_LINKSTATE); + } + if (err) { rcu_read_unlock(); return err; @@ -760,6 +859,74 @@ __be32 fib_info_update_nh_saddr(struct net *net, struct fib_nh *nh) return nh->nh_saddr; } +static bool fib_valid_prefsrc(struct fib_config *cfg, __be32 fib_prefsrc) +{ + if (cfg->fc_type != RTN_LOCAL || !cfg->fc_dst || + fib_prefsrc != cfg->fc_dst) { + u32 tb_id = cfg->fc_table; + int rc; + + if (tb_id == RT_TABLE_MAIN) + tb_id = RT_TABLE_LOCAL; + + rc = inet_addr_type_table(cfg->fc_nlinfo.nl_net, + fib_prefsrc, tb_id); + + if (rc != RTN_LOCAL && tb_id != RT_TABLE_LOCAL) { + rc = inet_addr_type_table(cfg->fc_nlinfo.nl_net, + fib_prefsrc, RT_TABLE_LOCAL); + } + + if (rc != RTN_LOCAL) + return false; + } + return true; +} + +static int +fib_convert_metrics(struct fib_info *fi, const struct fib_config *cfg) +{ + bool ecn_ca = false; + struct nlattr *nla; + int remaining; + + if (!cfg->fc_mx) + return 0; + + nla_for_each_attr(nla, cfg->fc_mx, cfg->fc_mx_len, remaining) { + int type = nla_type(nla); + u32 val; + + if (!type) + continue; + if (type > RTAX_MAX) + return -EINVAL; + + if (type == RTAX_CC_ALGO) { + char tmp[TCP_CA_NAME_MAX]; + + nla_strlcpy(tmp, nla, sizeof(tmp)); + val = tcp_ca_get_key_by_name(tmp, &ecn_ca); + if (val == TCP_CA_UNSPEC) + return -EINVAL; + } else { + val = nla_get_u32(nla); + } + if (type == RTAX_ADVMSS && val > 65535 - 40) + val = 65535 - 40; + if (type == RTAX_MTU && val > 65535 - 15) + val = 65535 - 15; + if (type == RTAX_FEATURES && (val & ~RTAX_FEATURE_MASK)) + return -EINVAL; + fi->fib_metrics[type - 1] = val; + } + + if (ecn_ca) + fi->fib_metrics[RTAX_FEATURES - 1] |= DST_FEATURE_ECN_CA; + + return 0; +} + struct fib_info *fib_create_info(struct fib_config *cfg) { int err; @@ -832,36 +999,9 @@ struct fib_info *fib_create_info(struct fib_config *cfg) goto failure; } endfor_nexthops(fi) - if (cfg->fc_mx) { - struct nlattr *nla; - int remaining; - - nla_for_each_attr(nla, cfg->fc_mx, cfg->fc_mx_len, remaining) { - int type = nla_type(nla); - - if (type) { - u32 val; - - if (type > RTAX_MAX) - goto err_inval; - if (type == RTAX_CC_ALGO) { - char tmp[TCP_CA_NAME_MAX]; - - nla_strlcpy(tmp, nla, sizeof(tmp)); - val = tcp_ca_get_key_by_name(tmp); - if (val == TCP_CA_UNSPEC) - goto err_inval; - } else { - val = nla_get_u32(nla); - } - if (type == RTAX_ADVMSS && val > 65535 - 40) - val = 65535 - 40; - if (type == RTAX_MTU && val > 65535 - 15) - val = 65535 - 15; - fi->fib_metrics[type - 1] = val; - } - } - } + err = fib_convert_metrics(fi, cfg); + if (err) + goto failure; if (cfg->fc_mp) { #ifdef CONFIG_IP_ROUTE_MULTIPATH @@ -882,6 +1022,22 @@ struct fib_info *fib_create_info(struct fib_config *cfg) } else { struct fib_nh *nh = fi->fib_nh; + if (cfg->fc_encap) { + struct lwtunnel_state *lwtstate; + struct net_device *dev = NULL; + + if (cfg->fc_encap_type == LWTUNNEL_ENCAP_NONE) + goto err_inval; + if (cfg->fc_oif) + dev = __dev_get_by_index(net, cfg->fc_oif); + err = lwtunnel_build_state(dev, cfg->fc_encap_type, + cfg->fc_encap, AF_INET, cfg, + &lwtstate); + if (err) + goto failure; + + nh->nh_lwtstate = lwtstate_get(lwtstate); + } nh->nh_oif = cfg->fc_oif; nh->nh_gw = cfg->fc_gw; nh->nh_flags = cfg->fc_flags; @@ -940,12 +1096,8 @@ struct fib_info *fib_create_info(struct fib_config *cfg) fi->fib_flags |= RTNH_F_LINKDOWN; } - if (fi->fib_prefsrc) { - if (cfg->fc_type != RTN_LOCAL || !cfg->fc_dst || - fi->fib_prefsrc != cfg->fc_dst) - if (inet_addr_type(net, fi->fib_prefsrc) != RTN_LOCAL) - goto err_inval; - } + if (fi->fib_prefsrc && !fib_valid_prefsrc(cfg, fi->fib_prefsrc)) + goto err_inval; change_nexthops(fi) { fib_info_update_nh_saddr(net, nexthop_nh); @@ -1055,6 +1207,8 @@ int fib_dump_info(struct sk_buff *skb, u32 portid, u32 seq, int event, nla_put_u32(skb, RTA_FLOW, fi->fib_nh[0].nh_tclassid)) goto nla_put_failure; #endif + if (fi->fib_nh->nh_lwtstate) + lwtunnel_fill_encap(skb, fi->fib_nh->nh_lwtstate); } #ifdef CONFIG_IP_ROUTE_MULTIPATH if (fi->fib_nhs > 1) { @@ -1090,6 +1244,8 @@ int fib_dump_info(struct sk_buff *skb, u32 portid, u32 seq, int event, nla_put_u32(skb, RTA_FLOW, nh->nh_tclassid)) goto nla_put_failure; #endif + if (nh->nh_lwtstate) + lwtunnel_fill_encap(skb, nh->nh_lwtstate); /* length of rtnetlink header + attributes */ rtnh->rtnh_len = nlmsg_get_pos(skb) - (void *) rtnh; } endfor_nexthops(fi); @@ -1132,7 +1288,13 @@ int fib_sync_down_addr(struct net *net, __be32 local) return ret; } -int fib_sync_down_dev(struct net_device *dev, unsigned long event) +/* Event force Flags Description + * NETDEV_CHANGE 0 LINKDOWN Carrier OFF, not for scope host + * NETDEV_DOWN 0 LINKDOWN|DEAD Link down, not for scope host + * NETDEV_DOWN 1 LINKDOWN|DEAD Last address removed + * NETDEV_UNREGISTER 1 LINKDOWN|DEAD Device removed + */ +int fib_sync_down_dev(struct net_device *dev, unsigned long event, bool force) { int ret = 0; int scope = RT_SCOPE_NOWHERE; @@ -1141,8 +1303,7 @@ int fib_sync_down_dev(struct net_device *dev, unsigned long event) struct hlist_head *head = &fib_info_devhash[hash]; struct fib_nh *nh; - if (event == NETDEV_UNREGISTER || - event == NETDEV_DOWN) + if (force) scope = -1; hlist_for_each_entry(nh, head, nh_hash) { @@ -1291,6 +1452,13 @@ int fib_sync_up(struct net_device *dev, unsigned int nh_flags) if (!(dev->flags & IFF_UP)) return 0; + if (nh_flags & RTNH_F_DEAD) { + unsigned int flags = dev_get_flags(dev); + + if (flags & (IFF_RUNNING | IFF_LOWER_UP)) + nh_flags |= RTNH_F_LINKDOWN; + } + prev_fi = NULL; hash = fib_devindex_hashfn(dev->ifindex); head = &fib_info_devhash[hash]; |