diff options
Diffstat (limited to 'include/net')
67 files changed, 1505 insertions, 507 deletions
diff --git a/include/net/6lowpan.h b/include/net/6lowpan.h index dc03d77ad..a2f59ec98 100644 --- a/include/net/6lowpan.h +++ b/include/net/6lowpan.h @@ -197,6 +197,27 @@ #define LOWPAN_NHC_UDP_CS_P_11 0xF3 /* source & dest = 0xF0B + 4bit inline */ #define LOWPAN_NHC_UDP_CS_C 0x04 /* checksum elided */ +#define LOWPAN_PRIV_SIZE(llpriv_size) \ + (sizeof(struct lowpan_priv) + llpriv_size) + +enum lowpan_lltypes { + LOWPAN_LLTYPE_BTLE, + LOWPAN_LLTYPE_IEEE802154, +}; + +struct lowpan_priv { + enum lowpan_lltypes lltype; + + /* must be last */ + u8 priv[0] __aligned(sizeof(void *)); +}; + +static inline +struct lowpan_priv *lowpan_priv(const struct net_device *dev) +{ + return netdev_priv(dev); +} + #ifdef DEBUG /* print data in line */ static inline void raw_dump_inline(const char *caller, char *msg, @@ -372,6 +393,8 @@ lowpan_uncompress_size(const struct sk_buff *skb, u16 *dgram_offset) return skb->len + uncomp_header - ret; } +void lowpan_netdev_setup(struct net_device *dev, enum lowpan_lltypes lltype); + int lowpan_header_decompress(struct sk_buff *skb, struct net_device *dev, const u8 *saddr, const u8 saddr_type, diff --git a/include/net/act_api.h b/include/net/act_api.h index 931738bc5..9d446f136 100644 --- a/include/net/act_api.h +++ b/include/net/act_api.h @@ -21,6 +21,8 @@ struct tcf_common { struct gnet_stats_rate_est64 tcfc_rate_est; spinlock_t tcfc_lock; struct rcu_head tcfc_rcu; + struct gnet_stats_basic_cpu __percpu *cpu_bstats; + struct gnet_stats_queue __percpu *cpu_qstats; }; #define tcf_head common.tcfc_head #define tcf_index common.tcfc_index @@ -68,6 +70,17 @@ static inline void tcf_hashinfo_destroy(struct tcf_hashinfo *hf) kfree(hf->htab); } +/* Update lastuse only if needed, to avoid dirtying a cache line. + * We use a temp variable to avoid fetching jiffies twice. + */ +static inline void tcf_lastuse_update(struct tcf_t *tm) +{ + unsigned long now = jiffies; + + if (tm->lastuse != now) + tm->lastuse = now; +} + #ifdef CONFIG_NET_CLS_ACT #define ACT_P_CREATED 1 @@ -98,11 +111,10 @@ struct tc_action_ops { }; int tcf_hash_search(struct tc_action *a, u32 index); -void tcf_hash_destroy(struct tc_action *a); u32 tcf_hash_new_index(struct tcf_hashinfo *hinfo); int tcf_hash_check(u32 index, struct tc_action *a, int bind); int tcf_hash_create(u32 index, struct nlattr *est, struct tc_action *a, - int size, int bind); + int size, int bind, bool cpustats); void tcf_hash_cleanup(struct tc_action *a, struct nlattr *est); void tcf_hash_insert(struct tc_action *a); diff --git a/include/net/addrconf.h b/include/net/addrconf.h index def59d3a3..b5474b1fc 100644 --- a/include/net/addrconf.h +++ b/include/net/addrconf.h @@ -91,6 +91,37 @@ int ipv6_rcv_saddr_equal(const struct sock *sk, const struct sock *sk2); void addrconf_join_solict(struct net_device *dev, const struct in6_addr *addr); void addrconf_leave_solict(struct inet6_dev *idev, const struct in6_addr *addr); +static inline int addrconf_ifid_eui48(u8 *eui, struct net_device *dev) +{ + if (dev->addr_len != ETH_ALEN) + return -1; + memcpy(eui, dev->dev_addr, 3); + memcpy(eui + 5, dev->dev_addr + 3, 3); + + /* + * The zSeries OSA network cards can be shared among various + * OS instances, but the OSA cards have only one MAC address. + * This leads to duplicate address conflicts in conjunction + * with IPv6 if more than one instance uses the same card. + * + * The driver for these cards can deliver a unique 16-bit + * identifier for each instance sharing the same card. It is + * placed instead of 0xFFFE in the interface identifier. The + * "u" bit of the interface identifier is not inverted in this + * case. Hence the resulting interface identifier has local + * scope according to RFC2373. + */ + if (dev->dev_id) { + eui[3] = (dev->dev_id >> 8) & 0xFF; + eui[4] = dev->dev_id & 0xFF; + } else { + eui[3] = 0xFF; + eui[4] = 0xFE; + eui[0] ^= 2; + } + return 0; +} + static inline unsigned long addrconf_timeout_fixup(u32 timeout, unsigned int unit) { @@ -158,8 +189,8 @@ struct ipv6_stub { const struct in6_addr *addr); int (*ipv6_sock_mc_drop)(struct sock *sk, int ifindex, const struct in6_addr *addr); - int (*ipv6_dst_lookup)(struct sock *sk, struct dst_entry **dst, - struct flowi6 *fl6); + int (*ipv6_dst_lookup)(struct net *net, struct sock *sk, + struct dst_entry **dst, struct flowi6 *fl6); void (*udpv6_encap_enable)(void); void (*ndisc_send_na)(struct net_device *dev, struct neighbour *neigh, const struct in6_addr *daddr, diff --git a/include/net/af_unix.h b/include/net/af_unix.h index cb1b9bbda..b36d837c7 100644 --- a/include/net/af_unix.h +++ b/include/net/af_unix.h @@ -64,7 +64,7 @@ struct unix_sock { struct socket_wq peer_wq; }; -static inline struct unix_sock *unix_sk(struct sock *sk) +static inline struct unix_sock *unix_sk(const struct sock *sk) { return (struct unix_sock *)sk; } diff --git a/include/net/bluetooth/hci_core.h b/include/net/bluetooth/hci_core.h index 3bd618d3e..544a0201a 100644 --- a/include/net/bluetooth/hci_core.h +++ b/include/net/bluetooth/hci_core.h @@ -469,6 +469,7 @@ struct hci_conn { struct delayed_work auto_accept_work; struct delayed_work idle_work; struct delayed_work le_conn_timeout; + struct work_struct le_scan_cleanup; struct device dev; struct dentry *debugfs; @@ -512,9 +513,11 @@ struct hci_conn_params { HCI_AUTO_CONN_DIRECT, HCI_AUTO_CONN_ALWAYS, HCI_AUTO_CONN_LINK_LOSS, + HCI_AUTO_CONN_EXPLICIT, } auto_connect; struct hci_conn *conn; + bool explicit_connect; }; extern struct list_head hci_dev_list; @@ -639,6 +642,7 @@ enum { HCI_CONN_DROP, HCI_CONN_PARAM_REMOVAL_PEND, HCI_CONN_NEW_LINK_KEY, + HCI_CONN_SCANNING, }; static inline bool hci_conn_ssp_enabled(struct hci_conn *conn) @@ -808,6 +812,26 @@ static inline struct hci_conn *hci_conn_hash_lookup_state(struct hci_dev *hdev, return NULL; } +static inline struct hci_conn *hci_lookup_le_connect(struct hci_dev *hdev) +{ + struct hci_conn_hash *h = &hdev->conn_hash; + struct hci_conn *c; + + rcu_read_lock(); + + list_for_each_entry_rcu(c, &h->list, list) { + if (c->type == LE_LINK && c->state == BT_CONNECT && + !test_bit(HCI_CONN_SCANNING, &c->flags)) { + rcu_read_unlock(); + return c; + } + } + + rcu_read_unlock(); + + return NULL; +} + int hci_disconnect(struct hci_conn *conn, __u8 reason); bool hci_setup_sync(struct hci_conn *conn, __u16 handle); void hci_sco_setup(struct hci_conn *conn, __u8 status); @@ -823,6 +847,9 @@ void hci_chan_del(struct hci_chan *chan); void hci_chan_list_flush(struct hci_conn *conn); struct hci_chan *hci_chan_lookup_handle(struct hci_dev *hdev, __u16 handle); +struct hci_conn *hci_connect_le_scan(struct hci_dev *hdev, bdaddr_t *dst, + u8 dst_type, u8 sec_level, + u16 conn_timeout, u8 role); struct hci_conn *hci_connect_le(struct hci_dev *hdev, bdaddr_t *dst, u8 dst_type, u8 sec_level, u16 conn_timeout, u8 role); @@ -988,6 +1015,9 @@ void hci_conn_params_clear_disabled(struct hci_dev *hdev); struct hci_conn_params *hci_pend_le_action_lookup(struct list_head *list, bdaddr_t *addr, u8 addr_type); +struct hci_conn_params *hci_explicit_connect_lookup(struct hci_dev *hdev, + bdaddr_t *addr, + u8 addr_type); void hci_uuids_clear(struct hci_dev *hdev); @@ -1297,7 +1327,7 @@ static inline int hci_check_conn_params(u16 min, u16 max, u16 latency, if (max >= to_multiplier * 8) return -EINVAL; - max_latency = (to_multiplier * 8 / max) - 1; + max_latency = (to_multiplier * 4 / max) - 1; if (latency > 499 || latency > max_latency) return -EINVAL; diff --git a/include/net/bluetooth/l2cap.h b/include/net/bluetooth/l2cap.h index 2239a3753..c98afc08c 100644 --- a/include/net/bluetooth/l2cap.h +++ b/include/net/bluetooth/l2cap.h @@ -55,6 +55,8 @@ #define L2CAP_INFO_TIMEOUT msecs_to_jiffies(4000) #define L2CAP_MOVE_TIMEOUT msecs_to_jiffies(4000) #define L2CAP_MOVE_ERTX_TIMEOUT msecs_to_jiffies(60000) +#define L2CAP_WAIT_ACK_POLL_PERIOD msecs_to_jiffies(200) +#define L2CAP_WAIT_ACK_TIMEOUT msecs_to_jiffies(10000) #define L2CAP_A2MP_DEFAULT_MTU 670 diff --git a/include/net/bond_options.h b/include/net/bond_options.h index c28aca253..1797235cd 100644 --- a/include/net/bond_options.h +++ b/include/net/bond_options.h @@ -66,6 +66,7 @@ enum { BOND_OPT_AD_ACTOR_SYS_PRIO, BOND_OPT_AD_ACTOR_SYSTEM, BOND_OPT_AD_USER_PORT_KEY, + BOND_OPT_NUM_PEER_NOTIF_ALIAS, BOND_OPT_LAST }; diff --git a/include/net/bonding.h b/include/net/bonding.h index 20defc035..c1740a279 100644 --- a/include/net/bonding.h +++ b/include/net/bonding.h @@ -310,6 +310,13 @@ static inline bool bond_uses_primary(struct bonding *bond) return bond_mode_uses_primary(BOND_MODE(bond)); } +static inline struct net_device *bond_option_active_slave_get_rcu(struct bonding *bond) +{ + struct slave *slave = rcu_dereference(bond->curr_active_slave); + + return bond_uses_primary(bond) && slave ? slave->dev : NULL; +} + static inline bool bond_slave_is_up(struct slave *slave) { return netif_running(slave->dev) && netif_carrier_ok(slave->dev); diff --git a/include/net/cfg80211.h b/include/net/cfg80211.h index 883fe1e7c..f0889a247 100644 --- a/include/net/cfg80211.h +++ b/include/net/cfg80211.h @@ -2369,8 +2369,7 @@ struct cfg80211_qos_map { * method returns 0.) * * @mgmt_frame_register: Notify driver that a management frame type was - * registered. Note that this callback may not sleep, and cannot run - * concurrently with itself. + * registered. The callback is allowed to sleep. * * @set_antenna: Set antenna configuration (tx_ant, rx_ant) on the device. * Parameters are bitmaps of allowed antennas to use for TX/RX. Drivers may diff --git a/include/net/cfg802154.h b/include/net/cfg802154.h index 290a9a69a..76b1ffaea 100644 --- a/include/net/cfg802154.h +++ b/include/net/cfg802154.h @@ -34,6 +34,8 @@ struct cfg802154_ops { int type); void (*del_virtual_intf_deprecated)(struct wpan_phy *wpan_phy, struct net_device *dev); + int (*suspend)(struct wpan_phy *wpan_phy); + int (*resume)(struct wpan_phy *wpan_phy); int (*add_virtual_intf)(struct wpan_phy *wpan_phy, const char *name, unsigned char name_assign_type, @@ -61,6 +63,8 @@ struct cfg802154_ops { s8 max_frame_retries); int (*set_lbt_mode)(struct wpan_phy *wpan_phy, struct wpan_dev *wpan_dev, bool mode); + int (*set_ackreq_default)(struct wpan_phy *wpan_phy, + struct wpan_dev *wpan_dev, bool ackreq); }; static inline bool @@ -171,6 +175,9 @@ struct wpan_dev { struct list_head list; struct net_device *netdev; + /* lowpan interface, set when the wpan_dev belongs to one lowpan_dev */ + struct net_device *lowpan_dev; + u32 identifier; /* MAC PIB */ @@ -191,6 +198,9 @@ struct wpan_dev { bool lbt; bool promiscuous_mode; + + /* fallback for acknowledgment bit setting */ + bool ackreq; }; #define to_phy(_dev) container_of(_dev, struct wpan_phy, dev) diff --git a/include/net/checksum.h b/include/net/checksum.h index 2d1d73cb7..9fcaedf99 100644 --- a/include/net/checksum.h +++ b/include/net/checksum.h @@ -140,14 +140,16 @@ static inline void csum_replace2(__sum16 *sum, __be16 old, __be16 new) struct sk_buff; void inet_proto_csum_replace4(__sum16 *sum, struct sk_buff *skb, - __be32 from, __be32 to, int pseudohdr); + __be32 from, __be32 to, bool pseudohdr); void inet_proto_csum_replace16(__sum16 *sum, struct sk_buff *skb, const __be32 *from, const __be32 *to, - int pseudohdr); + bool pseudohdr); +void inet_proto_csum_replace_by_diff(__sum16 *sum, struct sk_buff *skb, + __wsum diff, bool pseudohdr); static inline void inet_proto_csum_replace2(__sum16 *sum, struct sk_buff *skb, __be16 from, __be16 to, - int pseudohdr) + bool pseudohdr) { inet_proto_csum_replace4(sum, skb, (__force __be32)from, (__force __be32)to, pseudohdr); diff --git a/include/net/cls_cgroup.h b/include/net/cls_cgroup.h index c15d39456..ccd6d8bff 100644 --- a/include/net/cls_cgroup.h +++ b/include/net/cls_cgroup.h @@ -49,9 +49,38 @@ static inline void sock_update_classid(struct sock *sk) if (classid != sk->sk_classid) sk->sk_classid = classid; } + +static inline u32 task_get_classid(const struct sk_buff *skb) +{ + u32 classid = task_cls_state(current)->classid; + + /* Due to the nature of the classifier it is required to ignore all + * packets originating from softirq context as accessing `current' + * would lead to false results. + * + * This test assumes that all callers of dev_queue_xmit() explicitly + * disable bh. Knowing this, it is possible to detect softirq based + * calls by looking at the number of nested bh disable calls because + * softirqs always disables bh. + */ + if (in_serving_softirq()) { + /* If there is an sk_classid we'll use that. */ + if (!skb->sk) + return 0; + + classid = skb->sk->sk_classid; + } + + return classid; +} #else /* !CONFIG_CGROUP_NET_CLASSID */ static inline void sock_update_classid(struct sock *sk) { } + +static inline u32 task_get_classid(const struct sk_buff *skb) +{ + return 0; +} #endif /* CONFIG_CGROUP_NET_CLASSID */ #endif /* _NET_CLS_CGROUP_H */ diff --git a/include/net/dsa.h b/include/net/dsa.h index fbca63ba8..b34d812bc 100644 --- a/include/net/dsa.h +++ b/include/net/dsa.h @@ -171,6 +171,11 @@ static inline bool dsa_is_cpu_port(struct dsa_switch *ds, int p) return !!(ds->index == ds->dst->cpu_switch && p == ds->dst->cpu_port); } +static inline bool dsa_is_dsa_port(struct dsa_switch *ds, int p) +{ + return !!((ds->dsa_port_mask) & (1 << p)); +} + static inline bool dsa_is_port_initialized(struct dsa_switch *ds, int p) { return ds->phys_port_mask & (1 << p) && ds->ports[p]; @@ -296,12 +301,28 @@ struct dsa_switch_driver { u32 br_port_mask); int (*port_stp_update)(struct dsa_switch *ds, int port, u8 state); - int (*fdb_add)(struct dsa_switch *ds, int port, - const unsigned char *addr, u16 vid); - int (*fdb_del)(struct dsa_switch *ds, int port, - const unsigned char *addr, u16 vid); - int (*fdb_getnext)(struct dsa_switch *ds, int port, - unsigned char *addr, bool *is_static); + + /* + * VLAN support + */ + int (*port_pvid_get)(struct dsa_switch *ds, int port, u16 *pvid); + int (*port_pvid_set)(struct dsa_switch *ds, int port, u16 pvid); + int (*port_vlan_add)(struct dsa_switch *ds, int port, u16 vid, + bool untagged); + int (*port_vlan_del)(struct dsa_switch *ds, int port, u16 vid); + int (*vlan_getnext)(struct dsa_switch *ds, u16 *vid, + unsigned long *ports, unsigned long *untagged); + + /* + * Forwarding database + */ + int (*port_fdb_add)(struct dsa_switch *ds, int port, + const unsigned char *addr, u16 vid); + int (*port_fdb_del)(struct dsa_switch *ds, int port, + const unsigned char *addr, u16 vid); + int (*port_fdb_getnext)(struct dsa_switch *ds, int port, + unsigned char *addr, u16 *vid, + bool *is_static); }; void register_switch_driver(struct dsa_switch_driver *type); diff --git a/include/net/dst.h b/include/net/dst.h index 2bc73f8a0..9261d9283 100644 --- a/include/net/dst.h +++ b/include/net/dst.h @@ -57,6 +57,7 @@ struct dst_entry { #define DST_FAKE_RTABLE 0x0040 #define DST_XFRM_TUNNEL 0x0080 #define DST_XFRM_QUEUE 0x0100 +#define DST_METADATA 0x0200 unsigned short pending_confirm; @@ -83,12 +84,13 @@ struct dst_entry { __u32 __pad2; #endif +#ifdef CONFIG_64BIT + struct lwtunnel_state *lwtstate; /* * Align __refcnt to a 64 bytes alignment * (L1_CACHE_SIZE would be too much) */ -#ifdef CONFIG_64BIT - long __pad_to_align_refcnt[2]; + long __pad_to_align_refcnt[1]; #endif /* * __refcnt wants to be on a different cache line from @@ -97,6 +99,9 @@ struct dst_entry { atomic_t __refcnt; /* client references */ int __use; unsigned long lastuse; +#ifndef CONFIG_64BIT + struct lwtunnel_state *lwtstate; +#endif union { struct dst_entry *next; struct rtable __rcu *rt_next; @@ -202,6 +207,12 @@ static inline void dst_metric_set(struct dst_entry *dst, int metric, u32 val) p[metric-1] = val; } +/* Kernel-internal feature bits that are unallocated in user space. */ +#define DST_FEATURE_ECN_CA (1 << 31) + +#define DST_FEATURE_MASK (DST_FEATURE_ECN_CA) +#define DST_FEATURE_ECN_MASK (DST_FEATURE_ECN_CA | RTAX_FEATURE_ECN) + static inline u32 dst_feature(const struct dst_entry *dst, u32 feature) { @@ -284,13 +295,18 @@ static inline void skb_dst_drop(struct sk_buff *skb) } } -static inline void skb_dst_copy(struct sk_buff *nskb, const struct sk_buff *oskb) +static inline void __skb_dst_copy(struct sk_buff *nskb, unsigned long refdst) { - nskb->_skb_refdst = oskb->_skb_refdst; + nskb->_skb_refdst = refdst; if (!(nskb->_skb_refdst & SKB_DST_NOREF)) dst_clone(skb_dst(nskb)); } +static inline void skb_dst_copy(struct sk_buff *nskb, const struct sk_buff *oskb) +{ + __skb_dst_copy(nskb, oskb->_skb_refdst); +} + /** * skb_dst_force - makes sure skb dst is refcounted * @skb: buffer @@ -356,6 +372,9 @@ static inline int dst_discard(struct sk_buff *skb) } void *dst_alloc(struct dst_ops *ops, struct net_device *dev, int initial_ref, int initial_obsolete, unsigned short flags); +void dst_init(struct dst_entry *dst, struct dst_ops *ops, + struct net_device *dev, int initial_ref, int initial_obsolete, + unsigned short flags); void __dst_free(struct dst_entry *dst); struct dst_entry *dst_destroy(struct dst_entry *dst); @@ -457,7 +476,7 @@ static inline struct dst_entry *dst_check(struct dst_entry *dst, u32 cookie) return dst; } -void dst_init(void); +void dst_subsys_init(void); /* Flags for xfrm_lookup flags argument. */ enum { diff --git a/include/net/dst_metadata.h b/include/net/dst_metadata.h new file mode 100644 index 000000000..6816f0fa5 --- /dev/null +++ b/include/net/dst_metadata.h @@ -0,0 +1,141 @@ +#ifndef __NET_DST_METADATA_H +#define __NET_DST_METADATA_H 1 + +#include <linux/skbuff.h> +#include <net/ip_tunnels.h> +#include <net/dst.h> + +struct metadata_dst { + struct dst_entry dst; + union { + struct ip_tunnel_info tun_info; + } u; +}; + +static inline struct metadata_dst *skb_metadata_dst(struct sk_buff *skb) +{ + struct metadata_dst *md_dst = (struct metadata_dst *) skb_dst(skb); + + if (md_dst && md_dst->dst.flags & DST_METADATA) + return md_dst; + + return NULL; +} + +static inline struct ip_tunnel_info *skb_tunnel_info(struct sk_buff *skb) +{ + struct metadata_dst *md_dst = skb_metadata_dst(skb); + struct dst_entry *dst; + + if (md_dst) + return &md_dst->u.tun_info; + + dst = skb_dst(skb); + if (dst && dst->lwtstate) + return lwt_tun_info(dst->lwtstate); + + return NULL; +} + +static inline bool skb_valid_dst(const struct sk_buff *skb) +{ + struct dst_entry *dst = skb_dst(skb); + + return dst && !(dst->flags & DST_METADATA); +} + +struct metadata_dst *metadata_dst_alloc(u8 optslen, gfp_t flags); +struct metadata_dst __percpu *metadata_dst_alloc_percpu(u8 optslen, gfp_t flags); + +static inline struct metadata_dst *tun_rx_dst(int md_size) +{ + struct metadata_dst *tun_dst; + + tun_dst = metadata_dst_alloc(md_size, GFP_ATOMIC); + if (!tun_dst) + return NULL; + + tun_dst->u.tun_info.options_len = 0; + tun_dst->u.tun_info.mode = 0; + return tun_dst; +} + +static inline struct metadata_dst *tun_dst_unclone(struct sk_buff *skb) +{ + struct metadata_dst *md_dst = skb_metadata_dst(skb); + int md_size; + struct metadata_dst *new_md; + + if (!md_dst) + return ERR_PTR(-EINVAL); + + md_size = md_dst->u.tun_info.options_len; + new_md = metadata_dst_alloc(md_size, GFP_ATOMIC); + if (!new_md) + return ERR_PTR(-ENOMEM); + + memcpy(&new_md->u.tun_info, &md_dst->u.tun_info, + sizeof(struct ip_tunnel_info) + md_size); + skb_dst_drop(skb); + dst_hold(&new_md->dst); + skb_dst_set(skb, &new_md->dst); + return new_md; +} + +static inline struct ip_tunnel_info *skb_tunnel_info_unclone(struct sk_buff *skb) +{ + struct metadata_dst *dst; + + dst = tun_dst_unclone(skb); + if (IS_ERR(dst)) + return NULL; + + return &dst->u.tun_info; +} + +static inline struct metadata_dst *ip_tun_rx_dst(struct sk_buff *skb, + __be16 flags, + __be64 tunnel_id, + int md_size) +{ + const struct iphdr *iph = ip_hdr(skb); + struct metadata_dst *tun_dst; + + tun_dst = tun_rx_dst(md_size); + if (!tun_dst) + return NULL; + + ip_tunnel_key_init(&tun_dst->u.tun_info.key, + iph->saddr, iph->daddr, iph->tos, iph->ttl, + 0, 0, tunnel_id, flags); + return tun_dst; +} + +static inline struct metadata_dst *ipv6_tun_rx_dst(struct sk_buff *skb, + __be16 flags, + __be64 tunnel_id, + int md_size) +{ + const struct ipv6hdr *ip6h = ipv6_hdr(skb); + struct metadata_dst *tun_dst; + struct ip_tunnel_info *info; + + tun_dst = tun_rx_dst(md_size); + if (!tun_dst) + return NULL; + + info = &tun_dst->u.tun_info; + info->mode = IP_TUNNEL_INFO_IPV6; + info->key.tun_flags = flags; + info->key.tun_id = tunnel_id; + info->key.tp_src = 0; + info->key.tp_dst = 0; + + info->key.u.ipv6.src = ip6h->saddr; + info->key.u.ipv6.dst = ip6h->daddr; + info->key.tos = ipv6_get_dsfield(ip6h); + info->key.ttl = ip6h->hop_limit; + return tun_dst; +} + +#endif /* __NET_DST_METADATA_H */ diff --git a/include/net/fib_rules.h b/include/net/fib_rules.h index 903a55efb..59160de70 100644 --- a/include/net/fib_rules.h +++ b/include/net/fib_rules.h @@ -19,6 +19,7 @@ struct fib_rule { u8 action; /* 3 bytes hole, try to use */ u32 target; + __be64 tun_id; struct fib_rule __rcu *ctarget; struct net *fr_net; @@ -65,7 +66,6 @@ struct fib_rules_ops { struct nlattr **); int (*fill)(struct fib_rule *, struct sk_buff *, struct fib_rule_hdr *); - u32 (*default_pref)(struct fib_rules_ops *ops); size_t (*nlmsg_payload)(struct fib_rule *); /* Called after modifications to the rules set, must flush @@ -117,5 +117,4 @@ int fib_rules_lookup(struct fib_rules_ops *, struct flowi *, int flags, struct fib_lookup_arg *); int fib_default_rule_add(struct fib_rules_ops *, u32 pref, u32 table, u32 flags); -u32 fib_default_rule_pref(struct fib_rules_ops *ops); #endif diff --git a/include/net/flow.h b/include/net/flow.h index 8109a159d..9b85db85f 100644 --- a/include/net/flow.h +++ b/include/net/flow.h @@ -10,6 +10,7 @@ #include <linux/socket.h> #include <linux/in6.h> #include <linux/atomic.h> +#include <net/flow_dissector.h> /* * ifindex generation is per-net namespace, and loopback is @@ -19,6 +20,10 @@ #define LOOPBACK_IFINDEX 1 +struct flowi_tunnel { + __be64 tun_id; +}; + struct flowi_common { int flowic_oif; int flowic_iif; @@ -29,7 +34,10 @@ struct flowi_common { __u8 flowic_flags; #define FLOWI_FLAG_ANYSRC 0x01 #define FLOWI_FLAG_KNOWN_NH 0x02 +#define FLOWI_FLAG_VRFSRC 0x04 +#define FLOWI_FLAG_SKIP_NH_OIF 0x08 __u32 flowic_secid; + struct flowi_tunnel flowic_tun_key; }; union flowi_uli { @@ -66,6 +74,7 @@ struct flowi4 { #define flowi4_proto __fl_common.flowic_proto #define flowi4_flags __fl_common.flowic_flags #define flowi4_secid __fl_common.flowic_secid +#define flowi4_tun_key __fl_common.flowic_tun_key /* (saddr,daddr) must be grouped, same order as in IP header */ __be32 saddr; @@ -95,6 +104,7 @@ static inline void flowi4_init_output(struct flowi4 *fl4, int oif, fl4->flowi4_proto = proto; fl4->flowi4_flags = flags; fl4->flowi4_secid = 0; + fl4->flowi4_tun_key.tun_id = 0; fl4->daddr = daddr; fl4->saddr = saddr; fl4->fl4_dport = dport; @@ -122,6 +132,7 @@ struct flowi6 { #define flowi6_proto __fl_common.flowic_proto #define flowi6_flags __fl_common.flowic_flags #define flowi6_secid __fl_common.flowic_secid +#define flowi6_tun_key __fl_common.flowic_tun_key struct in6_addr daddr; struct in6_addr saddr; __be32 flowlabel; @@ -165,6 +176,7 @@ struct flowi { #define flowi_proto u.__fl_common.flowic_proto #define flowi_flags u.__fl_common.flowic_flags #define flowi_secid u.__fl_common.flowic_secid +#define flowi_tun_key u.__fl_common.flowic_tun_key } __attribute__((__aligned__(BITS_PER_LONG/8))); static inline struct flowi *flowi4_to_flowi(struct flowi4 *fl4) @@ -233,4 +245,22 @@ void flow_cache_flush(struct net *net); void flow_cache_flush_deferred(struct net *net); extern atomic_t flow_cache_genid; +__u32 __get_hash_from_flowi6(const struct flowi6 *fl6, struct flow_keys *keys); + +static inline __u32 get_hash_from_flowi6(const struct flowi6 *fl6) +{ + struct flow_keys keys; + + return __get_hash_from_flowi6(fl6, &keys); +} + +__u32 __get_hash_from_flowi4(const struct flowi4 *fl4, struct flow_keys *keys); + +static inline __u32 get_hash_from_flowi4(const struct flowi4 *fl4) +{ + struct flow_keys keys; + + return __get_hash_from_flowi4(fl4, &keys); +} + #endif diff --git a/include/net/flow_dissector.h b/include/net/flow_dissector.h index 1a8c22419..8c8548cf5 100644 --- a/include/net/flow_dissector.h +++ b/include/net/flow_dissector.h @@ -2,7 +2,6 @@ #define _NET_FLOW_DISSECTOR_H #include <linux/types.h> -#include <linux/skbuff.h> #include <linux/in6.h> #include <uapi/linux/if_ether.h> @@ -13,8 +12,13 @@ struct flow_dissector_key_control { u16 thoff; u16 addr_type; + u32 flags; }; +#define FLOW_DIS_IS_FRAGMENT BIT(0) +#define FLOW_DIS_FIRST_FRAG BIT(1) +#define FLOW_DIS_ENCAPSULATION BIT(2) + /** * struct flow_dissector_key_basic: * @thoff: Transport header offset @@ -123,6 +127,11 @@ enum flow_dissector_key_id { FLOW_DISSECTOR_KEY_MAX, }; +#define FLOW_DISSECTOR_F_PARSE_1ST_FRAG BIT(0) +#define FLOW_DISSECTOR_F_STOP_AT_L3 BIT(1) +#define FLOW_DISSECTOR_F_STOP_AT_FLOW_LABEL BIT(2) +#define FLOW_DISSECTOR_F_STOP_AT_ENCAP BIT(3) + struct flow_dissector_key { enum flow_dissector_key_id key_id; size_t offset; /* offset of struct flow_dissector_key_* @@ -134,23 +143,6 @@ struct flow_dissector { unsigned short int offset[FLOW_DISSECTOR_KEY_MAX]; }; -void skb_flow_dissector_init(struct flow_dissector *flow_dissector, - const struct flow_dissector_key *key, - unsigned int key_count); - -bool __skb_flow_dissect(const struct sk_buff *skb, - struct flow_dissector *flow_dissector, - void *target_container, - void *data, __be16 proto, int nhoff, int hlen); - -static inline bool skb_flow_dissect(const struct sk_buff *skb, - struct flow_dissector *flow_dissector, - void *target_container) -{ - return __skb_flow_dissect(skb, flow_dissector, target_container, - NULL, 0, 0, 0); -} - struct flow_keys { struct flow_dissector_key_control control; #define FLOW_KEYS_HASH_START_FIELD basic @@ -170,38 +162,6 @@ __be32 flow_get_u32_dst(const struct flow_keys *flow); extern struct flow_dissector flow_keys_dissector; extern struct flow_dissector flow_keys_buf_dissector; -static inline bool skb_flow_dissect_flow_keys(const struct sk_buff *skb, - struct flow_keys *flow) -{ - memset(flow, 0, sizeof(*flow)); - return __skb_flow_dissect(skb, &flow_keys_dissector, flow, - NULL, 0, 0, 0); -} - -static inline bool skb_flow_dissect_flow_keys_buf(struct flow_keys *flow, - void *data, __be16 proto, - int nhoff, int hlen) -{ - memset(flow, 0, sizeof(*flow)); - return __skb_flow_dissect(NULL, &flow_keys_buf_dissector, flow, - data, proto, nhoff, hlen); -} - -__be32 __skb_flow_get_ports(const struct sk_buff *skb, int thoff, u8 ip_proto, - void *data, int hlen_proto); - -static inline __be32 skb_flow_get_ports(const struct sk_buff *skb, - int thoff, u8 ip_proto) -{ - return __skb_flow_get_ports(skb, thoff, ip_proto, NULL, 0); -} - -u32 flow_hash_from_keys(struct flow_keys *keys); -void __skb_get_hash(struct sk_buff *skb); -u32 skb_get_poff(const struct sk_buff *skb); -u32 __skb_get_poff(const struct sk_buff *skb, void *data, - const struct flow_keys *keys, int hlen); - /* struct flow_keys_digest: * * This structure is used to hold a digest of the full flow keys. This is a @@ -217,4 +177,11 @@ struct flow_keys_digest { void make_flow_keys_digest(struct flow_keys_digest *digest, const struct flow_keys *flow); +static inline bool flow_keys_have_l4(struct flow_keys *keys) +{ + return (keys->ports.ports || keys->tags.flow_label); +} + +u32 flow_hash_from_keys(struct flow_keys *keys); + #endif diff --git a/include/net/geneve.h b/include/net/geneve.h index 2a0543a18..3106ed6ea 100644 --- a/include/net/geneve.h +++ b/include/net/geneve.h @@ -62,40 +62,9 @@ struct genevehdr { struct geneve_opt options[]; }; -static inline struct genevehdr *geneve_hdr(const struct sk_buff *skb) -{ - return (struct genevehdr *)(udp_hdr(skb) + 1); -} - #ifdef CONFIG_INET -struct geneve_sock; - -typedef void (geneve_rcv_t)(struct geneve_sock *gs, struct sk_buff *skb); - -struct geneve_sock { - struct list_head list; - geneve_rcv_t *rcv; - void *rcv_data; - struct socket *sock; - struct rcu_head rcu; - int refcnt; - struct udp_offload udp_offloads; -}; - -#define GENEVE_VER 0 -#define GENEVE_BASE_HLEN (sizeof(struct udphdr) + sizeof(struct genevehdr)) - -struct geneve_sock *geneve_sock_add(struct net *net, __be16 port, - geneve_rcv_t *rcv, void *data, - bool no_share, bool ipv6); - -void geneve_sock_release(struct geneve_sock *vs); - -int geneve_xmit_skb(struct geneve_sock *gs, struct rtable *rt, - struct sk_buff *skb, __be32 src, __be32 dst, __u8 tos, - __u8 ttl, __be16 df, __be16 src_port, __be16 dst_port, - __be16 tun_flags, u8 vni[3], u8 opt_len, u8 *opt, - bool csum, bool xnet); +struct net_device *geneve_dev_create_fb(struct net *net, const char *name, + u8 name_assign_type, u16 dst_port); #endif /*ifdef CONFIG_INET */ #endif /*ifdef__NET_GENEVE_H */ diff --git a/include/net/gre.h b/include/net/gre.h index b53182018..97eafdc47 100644 --- a/include/net/gre.h +++ b/include/net/gre.h @@ -4,6 +4,12 @@ #include <linux/skbuff.h> #include <net/ip_tunnels.h> +struct gre_base_hdr { + __be16 flags; + __be16 protocol; +}; +#define GRE_HEADER_SECTION 4 + #define GREPROTO_CISCO 0 #define GREPROTO_PPTP 1 #define GREPROTO_MAX 2 @@ -14,91 +20,9 @@ struct gre_protocol { void (*err_handler)(struct sk_buff *skb, u32 info); }; -struct gre_base_hdr { - __be16 flags; - __be16 protocol; -}; -#define GRE_HEADER_SECTION 4 - int gre_add_protocol(const struct gre_protocol *proto, u8 version); int gre_del_protocol(const struct gre_protocol *proto, u8 version); -struct gre_cisco_protocol { - int (*handler)(struct sk_buff *skb, const struct tnl_ptk_info *tpi); - int (*err_handler)(struct sk_buff *skb, u32 info, - const struct tnl_ptk_info *tpi); - u8 priority; -}; - -int gre_cisco_register(struct gre_cisco_protocol *proto); -int gre_cisco_unregister(struct gre_cisco_protocol *proto); - -void gre_build_header(struct sk_buff *skb, const struct tnl_ptk_info *tpi, - int hdr_len); - -static inline struct sk_buff *gre_handle_offloads(struct sk_buff *skb, - bool csum) -{ - return iptunnel_handle_offloads(skb, csum, - csum ? SKB_GSO_GRE_CSUM : SKB_GSO_GRE); -} - - -static inline int ip_gre_calc_hlen(__be16 o_flags) -{ - int addend = 4; - - if (o_flags&TUNNEL_CSUM) - addend += 4; - if (o_flags&TUNNEL_KEY) - addend += 4; - if (o_flags&TUNNEL_SEQ) - addend += 4; - return addend; -} - -static inline __be16 gre_flags_to_tnl_flags(__be16 flags) -{ - __be16 tflags = 0; - - if (flags & GRE_CSUM) - tflags |= TUNNEL_CSUM; - if (flags & GRE_ROUTING) - tflags |= TUNNEL_ROUTING; - if (flags & GRE_KEY) - tflags |= TUNNEL_KEY; - if (flags & GRE_SEQ) - tflags |= TUNNEL_SEQ; - if (flags & GRE_STRICT) - tflags |= TUNNEL_STRICT; - if (flags & GRE_REC) - tflags |= TUNNEL_REC; - if (flags & GRE_VERSION) - tflags |= TUNNEL_VERSION; - - return tflags; -} - -static inline __be16 tnl_flags_to_gre_flags(__be16 tflags) -{ - __be16 flags = 0; - - if (tflags & TUNNEL_CSUM) - flags |= GRE_CSUM; - if (tflags & TUNNEL_ROUTING) - flags |= GRE_ROUTING; - if (tflags & TUNNEL_KEY) - flags |= GRE_KEY; - if (tflags & TUNNEL_SEQ) - flags |= GRE_SEQ; - if (tflags & TUNNEL_STRICT) - flags |= GRE_STRICT; - if (tflags & TUNNEL_REC) - flags |= GRE_REC; - if (tflags & TUNNEL_VERSION) - flags |= GRE_VERSION; - - return flags; -} - +struct net_device *gretap_fb_dev_create(struct net *net, const char *name, + u8 name_assign_type); #endif diff --git a/include/net/gro_cells.h b/include/net/gro_cells.h index 0f712c0bc..cf6c74550 100644 --- a/include/net/gro_cells.h +++ b/include/net/gro_cells.h @@ -32,37 +32,28 @@ static inline void gro_cells_receive(struct gro_cells *gcells, struct sk_buff *s return; } - /* We run in BH context */ - spin_lock(&cell->napi_skbs.lock); - __skb_queue_tail(&cell->napi_skbs, skb); if (skb_queue_len(&cell->napi_skbs) == 1) napi_schedule(&cell->napi); - - spin_unlock(&cell->napi_skbs.lock); } -/* called unser BH context */ +/* called under BH context */ static inline int gro_cell_poll(struct napi_struct *napi, int budget) { struct gro_cell *cell = container_of(napi, struct gro_cell, napi); struct sk_buff *skb; int work_done = 0; - spin_lock(&cell->napi_skbs.lock); while (work_done < budget) { skb = __skb_dequeue(&cell->napi_skbs); if (!skb) break; - spin_unlock(&cell->napi_skbs.lock); napi_gro_receive(napi, skb); work_done++; - spin_lock(&cell->napi_skbs.lock); } if (work_done < budget) - napi_complete(napi); - spin_unlock(&cell->napi_skbs.lock); + napi_complete_done(napi, work_done); return work_done; } @@ -77,7 +68,7 @@ static inline int gro_cells_init(struct gro_cells *gcells, struct net_device *de for_each_possible_cpu(i) { struct gro_cell *cell = per_cpu_ptr(gcells->cells, i); - skb_queue_head_init(&cell->napi_skbs); + __skb_queue_head_init(&cell->napi_skbs); netif_napi_add(dev, &cell->napi, gro_cell_poll, 64); napi_enable(&cell->napi); } @@ -92,8 +83,9 @@ static inline void gro_cells_destroy(struct gro_cells *gcells) return; for_each_possible_cpu(i) { struct gro_cell *cell = per_cpu_ptr(gcells->cells, i); + netif_napi_del(&cell->napi); - skb_queue_purge(&cell->napi_skbs); + __skb_queue_purge(&cell->napi_skbs); } free_percpu(gcells->cells); gcells->cells = NULL; diff --git a/include/net/inet_common.h b/include/net/inet_common.h index 279f83591..109e3ee91 100644 --- a/include/net/inet_common.h +++ b/include/net/inet_common.h @@ -41,7 +41,8 @@ int inet_recv_error(struct sock *sk, struct msghdr *msg, int len, static inline void inet_ctl_sock_destroy(struct sock *sk) { - sock_release(sk->sk_socket); + if (sk) + sock_release(sk->sk_socket); } #endif diff --git a/include/net/inet_hashtables.h b/include/net/inet_hashtables.h index b73c88a19..b07d12669 100644 --- a/include/net/inet_hashtables.h +++ b/include/net/inet_hashtables.h @@ -205,8 +205,8 @@ void inet_put_port(struct sock *sk); void inet_hashinfo_init(struct inet_hashinfo *h); -int __inet_hash_nolisten(struct sock *sk, struct inet_timewait_sock *tw); -int __inet_hash(struct sock *sk, struct inet_timewait_sock *tw); +void __inet_hash_nolisten(struct sock *sk, struct sock *osk); +void __inet_hash(struct sock *sk, struct sock *osk); void inet_hash(struct sock *sk); void inet_unhash(struct sock *sk); diff --git a/include/net/inet_timewait_sock.h b/include/net/inet_timewait_sock.h index 360c48022..fc1937698 100644 --- a/include/net/inet_timewait_sock.h +++ b/include/net/inet_timewait_sock.h @@ -100,10 +100,8 @@ static inline struct inet_timewait_sock *inet_twsk(const struct sock *sk) void inet_twsk_free(struct inet_timewait_sock *tw); void inet_twsk_put(struct inet_timewait_sock *tw); -int inet_twsk_unhash(struct inet_timewait_sock *tw); - -int inet_twsk_bind_unhash(struct inet_timewait_sock *tw, - struct inet_hashinfo *hashinfo); +void inet_twsk_bind_unhash(struct inet_timewait_sock *tw, + struct inet_hashinfo *hashinfo); struct inet_timewait_sock *inet_twsk_alloc(const struct sock *sk, struct inet_timewait_death_row *dr, @@ -112,8 +110,20 @@ struct inet_timewait_sock *inet_twsk_alloc(const struct sock *sk, void __inet_twsk_hashdance(struct inet_timewait_sock *tw, struct sock *sk, struct inet_hashinfo *hashinfo); -void inet_twsk_schedule(struct inet_timewait_sock *tw, const int timeo); -void inet_twsk_deschedule(struct inet_timewait_sock *tw); +void __inet_twsk_schedule(struct inet_timewait_sock *tw, int timeo, + bool rearm); + +static inline void inet_twsk_schedule(struct inet_timewait_sock *tw, int timeo) +{ + __inet_twsk_schedule(tw, timeo, false); +} + +static inline void inet_twsk_reschedule(struct inet_timewait_sock *tw, int timeo) +{ + __inet_twsk_schedule(tw, timeo, true); +} + +void inet_twsk_deschedule_put(struct inet_timewait_sock *tw); void inet_twsk_purge(struct inet_hashinfo *hashinfo, struct inet_timewait_death_row *twdr, int family); diff --git a/include/net/inetpeer.h b/include/net/inetpeer.h index d5332ddce..4a6009d44 100644 --- a/include/net/inetpeer.h +++ b/include/net/inetpeer.h @@ -15,16 +15,20 @@ #include <net/ipv6.h> #include <linux/atomic.h> -struct inetpeer_addr_base { - union { - __be32 a4; - __be32 a6[4]; - struct in6_addr in6; - }; +/* IPv4 address key for cache lookups */ +struct ipv4_addr_key { + __be32 addr; + int vif; }; +#define INETPEER_MAXKEYSZ (sizeof(struct in6_addr) / sizeof(u32)) + struct inetpeer_addr { - struct inetpeer_addr_base addr; + union { + struct ipv4_addr_key a4; + struct in6_addr a6; + u32 key[INETPEER_MAXKEYSZ]; + }; __u16 family; }; @@ -65,69 +69,33 @@ struct inet_peer_base { int total; }; -#define INETPEER_BASE_BIT 0x1UL - -static inline struct inet_peer *inetpeer_ptr(unsigned long val) -{ - BUG_ON(val & INETPEER_BASE_BIT); - return (struct inet_peer *) val; -} +void inet_peer_base_init(struct inet_peer_base *); -static inline struct inet_peer_base *inetpeer_base_ptr(unsigned long val) -{ - if (!(val & INETPEER_BASE_BIT)) - return NULL; - val &= ~INETPEER_BASE_BIT; - return (struct inet_peer_base *) val; -} +void inet_initpeers(void) __init; -static inline bool inetpeer_ptr_is_peer(unsigned long val) -{ - return !(val & INETPEER_BASE_BIT); -} +#define INETPEER_METRICS_NEW (~(u32) 0) -static inline void __inetpeer_ptr_set_peer(unsigned long *val, struct inet_peer *peer) +static inline void inetpeer_set_addr_v4(struct inetpeer_addr *iaddr, __be32 ip) { - /* This implicitly clears INETPEER_BASE_BIT */ - *val = (unsigned long) peer; + iaddr->a4.addr = ip; + iaddr->family = AF_INET; } -static inline bool inetpeer_ptr_set_peer(unsigned long *ptr, struct inet_peer *peer) +static inline __be32 inetpeer_get_addr_v4(struct inetpeer_addr *iaddr) { - unsigned long val = (unsigned long) peer; - unsigned long orig = *ptr; - - if (!(orig & INETPEER_BASE_BIT) || - cmpxchg(ptr, orig, val) != orig) - return false; - return true; + return iaddr->a4.addr; } -static inline void inetpeer_init_ptr(unsigned long *ptr, struct inet_peer_base *base) +static inline void inetpeer_set_addr_v6(struct inetpeer_addr *iaddr, + struct in6_addr *in6) { - *ptr = (unsigned long) base | INETPEER_BASE_BIT; + iaddr->a6 = *in6; + iaddr->family = AF_INET6; } -static inline void inetpeer_transfer_peer(unsigned long *to, unsigned long *from) +static inline struct in6_addr *inetpeer_get_addr_v6(struct inetpeer_addr *iaddr) { - unsigned long val = *from; - - *to = val; - if (inetpeer_ptr_is_peer(val)) { - struct inet_peer *peer = inetpeer_ptr(val); - atomic_inc(&peer->refcnt); - } -} - -void inet_peer_base_init(struct inet_peer_base *); - -void inet_initpeers(void) __init; - -#define INETPEER_METRICS_NEW (~(u32) 0) - -static inline bool inet_metrics_new(const struct inet_peer *p) -{ - return p->metrics[RTAX_LOCK-1] == INETPEER_METRICS_NEW; + return &iaddr->a6; } /* can be called with or without local BH being disabled */ @@ -137,11 +105,12 @@ struct inet_peer *inet_getpeer(struct inet_peer_base *base, static inline struct inet_peer *inet_getpeer_v4(struct inet_peer_base *base, __be32 v4daddr, - int create) + int vif, int create) { struct inetpeer_addr daddr; - daddr.addr.a4 = v4daddr; + daddr.a4.addr = v4daddr; + daddr.a4.vif = vif; daddr.family = AF_INET; return inet_getpeer(base, &daddr, create); } @@ -152,23 +121,36 @@ static inline struct inet_peer *inet_getpeer_v6(struct inet_peer_base *base, { struct inetpeer_addr daddr; - daddr.addr.in6 = *v6daddr; + daddr.a6 = *v6daddr; daddr.family = AF_INET6; return inet_getpeer(base, &daddr, create); } +static inline int inetpeer_addr_cmp(const struct inetpeer_addr *a, + const struct inetpeer_addr *b) +{ + int i, n; + + if (a->family == AF_INET) + n = sizeof(a->a4) / sizeof(u32); + else + n = sizeof(a->a6) / sizeof(u32); + + for (i = 0; i < n; i++) { + if (a->key[i] == b->key[i]) + continue; + if (a->key[i] < b->key[i]) + return -1; + return 1; + } + + return 0; +} + /* can be called from BH context or outside */ void inet_putpeer(struct inet_peer *p); bool inet_peer_xrlim_allow(struct inet_peer *peer, int timeout); void inetpeer_invalidate_tree(struct inet_peer_base *); -/* - * temporary check to make sure we dont access rid, tcp_ts, - * tcp_ts_stamp if no refcount is taken on inet_peer - */ -static inline void inet_peer_refcheck(const struct inet_peer *p) -{ - WARN_ON_ONCE(atomic_read(&p->refcnt) <= 0); -} #endif /* _NET_INETPEER_H */ diff --git a/include/net/ip.h b/include/net/ip.h index d5fe9f2ab..9b9ca2839 100644 --- a/include/net/ip.h +++ b/include/net/ip.h @@ -202,10 +202,20 @@ void ip_send_unicast_reply(struct sock *sk, struct sk_buff *skb, #define NET_ADD_STATS_BH(net, field, adnd) SNMP_ADD_STATS_BH((net)->mib.net_statistics, field, adnd) #define NET_ADD_STATS_USER(net, field, adnd) SNMP_ADD_STATS_USER((net)->mib.net_statistics, field, adnd) +u64 snmp_get_cpu_field(void __percpu *mib, int cpu, int offct); unsigned long snmp_fold_field(void __percpu *mib, int offt); #if BITS_PER_LONG==32 +u64 snmp_get_cpu_field64(void __percpu *mib, int cpu, int offct, + size_t syncp_offset); u64 snmp_fold_field64(void __percpu *mib, int offt, size_t sync_off); #else +static inline u64 snmp_get_cpu_field64(void __percpu *mib, int cpu, int offct, + size_t syncp_offset) +{ + return snmp_get_cpu_field(mib, cpu, offct); + +} + static inline u64 snmp_fold_field64(void __percpu *mib, int offt, size_t syncp_off) { return snmp_fold_field(mib, offt); @@ -370,22 +380,6 @@ static inline void iph_to_flow_copy_v4addrs(struct flow_keys *flow, flow->control.addr_type = FLOW_DISSECTOR_KEY_IPV4_ADDRS; } -static inline void inet_set_txhash(struct sock *sk) -{ - struct inet_sock *inet = inet_sk(sk); - struct flow_keys keys; - - memset(&keys, 0, sizeof(keys)); - - keys.addrs.v4addrs.src = inet->inet_saddr; - keys.addrs.v4addrs.dst = inet->inet_daddr; - keys.control.addr_type = FLOW_DISSECTOR_KEY_IPV4_ADDRS; - keys.ports.src = inet->inet_sport; - keys.ports.dst = inet->inet_dport; - - sk->sk_txhash = flow_hash_from_keys(&keys); -} - static inline __wsum inet_gro_compute_pseudo(struct sk_buff *skb, int proto) { const struct iphdr *iph = skb_gro_network_header(skb); @@ -474,6 +468,11 @@ static __inline__ void inet_reset_saddr(struct sock *sk) #endif +static inline unsigned int ipv4_addr_hash(__be32 ip) +{ + return (__force unsigned int) ip; +} + bool ip_call_ra_chain(struct sk_buff *skb); /* diff --git a/include/net/ip6_fib.h b/include/net/ip6_fib.h index 3b76849c1..aaf9700fc 100644 --- a/include/net/ip6_fib.h +++ b/include/net/ip6_fib.h @@ -51,6 +51,8 @@ struct fib6_config { struct nlattr *fc_mp; struct nl_info fc_nlinfo; + struct nlattr *fc_encap; + u16 fc_encap_type; }; struct fib6_node { @@ -273,7 +275,8 @@ int fib6_add(struct fib6_node *root, struct rt6_info *rt, struct nl_info *info, struct mx6_config *mxc); int fib6_del(struct rt6_info *rt, struct nl_info *info); -void inet6_rt_notify(int event, struct rt6_info *rt, struct nl_info *info); +void inet6_rt_notify(int event, struct rt6_info *rt, struct nl_info *info, + unsigned int flags); void fib6_run_gc(unsigned long expires, struct net *net, bool force); diff --git a/include/net/ip6_tunnel.h b/include/net/ip6_tunnel.h index b8529aa1d..fa915fa0f 100644 --- a/include/net/ip6_tunnel.h +++ b/include/net/ip6_tunnel.h @@ -32,6 +32,12 @@ struct __ip6_tnl_parm { __be32 o_key; }; +struct ip6_tnl_dst { + seqlock_t lock; + struct dst_entry __rcu *dst; + u32 cookie; +}; + /* IPv6 tunnel */ struct ip6_tnl { struct ip6_tnl __rcu *next; /* next tunnel in list */ @@ -39,8 +45,7 @@ struct ip6_tnl { struct net *net; /* netns for packet i/o */ struct __ip6_tnl_parm parms; /* tunnel configuration parameters */ struct flowi fl; /* flowi template for xmit */ - struct dst_entry *dst_cache; /* cached dst */ - u32 dst_cookie; + struct ip6_tnl_dst __percpu *dst_cache; /* cached dst */ int err_count; unsigned long err_time; @@ -60,9 +65,11 @@ struct ipv6_tlv_tnl_enc_lim { __u8 encap_limit; /* tunnel encapsulation limit */ } __packed; -struct dst_entry *ip6_tnl_dst_check(struct ip6_tnl *t); +struct dst_entry *ip6_tnl_dst_get(struct ip6_tnl *t); +int ip6_tnl_dst_init(struct ip6_tnl *t); +void ip6_tnl_dst_destroy(struct ip6_tnl *t); void ip6_tnl_dst_reset(struct ip6_tnl *t); -void ip6_tnl_dst_store(struct ip6_tnl *t, struct dst_entry *dst); +void ip6_tnl_dst_set(struct ip6_tnl *t, struct dst_entry *dst); int ip6_tnl_rcv_ctl(struct ip6_tnl *t, const struct in6_addr *laddr, const struct in6_addr *raddr); int ip6_tnl_xmit_ctl(struct ip6_tnl *t, const struct in6_addr *laddr, @@ -79,7 +86,7 @@ static inline void ip6tunnel_xmit(struct sock *sk, struct sk_buff *skb, struct net_device_stats *stats = &dev->stats; int pkt_len, err; - pkt_len = skb->len; + pkt_len = skb->len - skb_inner_network_offset(skb); err = ip6_local_out_sk(sk, skb); if (net_xmit_eval(err) == 0) { diff --git a/include/net/ip_fib.h b/include/net/ip_fib.h index 5fa643b4e..965fa5b1a 100644 --- a/include/net/ip_fib.h +++ b/include/net/ip_fib.h @@ -44,7 +44,9 @@ struct fib_config { u32 fc_flow; u32 fc_nlflags; struct nl_info fc_nlinfo; - }; + struct nlattr *fc_encap; + u16 fc_encap_type; +}; struct fib_info; struct rtable; @@ -89,6 +91,7 @@ struct fib_nh { struct rtable __rcu * __percpu *nh_pcpu_rth_output; struct rtable __rcu *nh_rth_input; struct fnhe_hash_bucket __rcu *nh_exceptions; + struct lwtunnel_state *nh_lwtstate; }; /* @@ -233,8 +236,11 @@ static inline int fib_lookup(struct net *net, const struct flowi4 *flp, rcu_read_lock(); tb = fib_get_table(net, RT_TABLE_MAIN); - if (tb && !fib_table_lookup(tb, flp, res, flags | FIB_LOOKUP_NOREF)) - err = 0; + if (tb) + err = fib_table_lookup(tb, flp, res, flags | FIB_LOOKUP_NOREF); + + if (err == -EAGAIN) + err = -ENETUNREACH; rcu_read_unlock(); @@ -255,7 +261,7 @@ static inline int fib_lookup(struct net *net, struct flowi4 *flp, struct fib_result *res, unsigned int flags) { struct fib_table *tb; - int err; + int err = -ENETUNREACH; flags |= FIB_LOOKUP_NOREF; if (net->ipv4.fib_has_custom_rules) @@ -265,15 +271,20 @@ static inline int fib_lookup(struct net *net, struct flowi4 *flp, res->tclassid = 0; - for (err = 0; !err; err = -ENETUNREACH) { - tb = rcu_dereference_rtnl(net->ipv4.fib_main); - if (tb && !fib_table_lookup(tb, flp, res, flags)) - break; + tb = rcu_dereference_rtnl(net->ipv4.fib_main); + if (tb) + err = fib_table_lookup(tb, flp, res, flags); + + if (!err) + goto out; + + tb = rcu_dereference_rtnl(net->ipv4.fib_default); + if (tb) + err = fib_table_lookup(tb, flp, res, flags); - tb = rcu_dereference_rtnl(net->ipv4.fib_default); - if (tb && !fib_table_lookup(tb, flp, res, flags)) - break; - } +out: + if (err == -EAGAIN) + err = -ENETUNREACH; rcu_read_unlock(); @@ -306,7 +317,7 @@ void fib_flush_external(struct net *net); /* Exported by fib_semantics.c */ int ip_fib_check_default(__be32 gw, struct net_device *dev); -int fib_sync_down_dev(struct net_device *dev, unsigned long event); +int fib_sync_down_dev(struct net_device *dev, unsigned long event, bool force); int fib_sync_down_addr(struct net *net, __be32 local); int fib_sync_up(struct net_device *dev, unsigned int nh_flags); void fib_select_multipath(struct fib_result *res); diff --git a/include/net/ip_tunnels.h b/include/net/ip_tunnels.h index d8214cb88..f6dafec91 100644 --- a/include/net/ip_tunnels.h +++ b/include/net/ip_tunnels.h @@ -4,14 +4,15 @@ #include <linux/if_tunnel.h> #include <linux/netdevice.h> #include <linux/skbuff.h> +#include <linux/socket.h> #include <linux/types.h> #include <linux/u64_stats_sync.h> #include <net/dsfield.h> #include <net/gro_cells.h> #include <net/inet_ecn.h> -#include <net/ip.h> #include <net/netns/generic.h> #include <net/rtnetlink.h> +#include <net/lwtunnel.h> #if IS_ENABLED(CONFIG_IPV6) #include <net/ipv6.h> @@ -22,6 +23,44 @@ /* Keep error state on tunnel for 30 sec */ #define IPTUNNEL_ERR_TIMEO (30*HZ) +/* Used to memset ip_tunnel padding. */ +#define IP_TUNNEL_KEY_SIZE offsetofend(struct ip_tunnel_key, tp_dst) + +/* Used to memset ipv4 address padding. */ +#define IP_TUNNEL_KEY_IPV4_PAD offsetofend(struct ip_tunnel_key, u.ipv4.dst) +#define IP_TUNNEL_KEY_IPV4_PAD_LEN \ + (FIELD_SIZEOF(struct ip_tunnel_key, u) - \ + FIELD_SIZEOF(struct ip_tunnel_key, u.ipv4)) + +struct ip_tunnel_key { + __be64 tun_id; + union { + struct { + __be32 src; + __be32 dst; + } ipv4; + struct { + struct in6_addr src; + struct in6_addr dst; + } ipv6; + } u; + __be16 tun_flags; + u8 tos; /* TOS for IPv4, TC for IPv6 */ + u8 ttl; /* TTL for IPv4, HL for IPv6 */ + __be16 tp_src; + __be16 tp_dst; +}; + +/* Flags for ip_tunnel_info mode. */ +#define IP_TUNNEL_INFO_TX 0x01 /* represents tx tunnel parameters */ +#define IP_TUNNEL_INFO_IPV6 0x02 /* key contains IPv6 addresses */ + +struct ip_tunnel_info { + struct ip_tunnel_key key; + u8 options_len; + u8 mode; +}; + /* 6rd prefix/relay information */ #ifdef CONFIG_IPV6_SIT_6RD struct ip_tunnel_6rd_parm { @@ -33,8 +72,8 @@ struct ip_tunnel_6rd_parm { #endif struct ip_tunnel_encap { - __u16 type; - __u16 flags; + u16 type; + u16 flags; __be16 sport; __be16 dport; }; @@ -51,6 +90,8 @@ struct ip_tunnel_dst { __be32 saddr; }; +struct metadata_dst; + struct ip_tunnel { struct ip_tunnel __rcu *next; struct hlist_node hash_node; @@ -62,8 +103,8 @@ struct ip_tunnel { * arrived */ /* These four fields used only by GRE */ - __u32 i_seqno; /* The last seen seqno */ - __u32 o_seqno; /* The last output seqno */ + u32 i_seqno; /* The last seen seqno */ + u32 o_seqno; /* The last output seqno */ int tun_hlen; /* Precalculated header length */ int mlink; @@ -84,6 +125,7 @@ struct ip_tunnel { unsigned int prl_count; /* # of entries in PRL */ int ip_tnl_net_id; struct gro_cells gro_cells; + bool collect_md; }; #define TUNNEL_CSUM __cpu_to_be16(0x01) @@ -118,6 +160,7 @@ struct tnl_ptk_info { struct ip_tunnel_net { struct net_device *fb_tunnel_dev; struct hlist_head tunnels[IP_TNL_HASH_SIZE]; + struct ip_tunnel __rcu *collect_md_tun; }; struct ip_tunnel_encap_ops { @@ -136,6 +179,40 @@ int ip_tunnel_encap_add_ops(const struct ip_tunnel_encap_ops *op, int ip_tunnel_encap_del_ops(const struct ip_tunnel_encap_ops *op, unsigned int num); +static inline void ip_tunnel_key_init(struct ip_tunnel_key *key, + __be32 saddr, __be32 daddr, + u8 tos, u8 ttl, + __be16 tp_src, __be16 tp_dst, + __be64 tun_id, __be16 tun_flags) +{ + key->tun_id = tun_id; + key->u.ipv4.src = saddr; + key->u.ipv4.dst = daddr; + memset((unsigned char *)key + IP_TUNNEL_KEY_IPV4_PAD, + 0, IP_TUNNEL_KEY_IPV4_PAD_LEN); + key->tos = tos; + key->ttl = ttl; + key->tun_flags = tun_flags; + + /* For the tunnel types on the top of IPsec, the tp_src and tp_dst of + * the upper tunnel are used. + * E.g: GRE over IPSEC, the tp_src and tp_port are zero. + */ + key->tp_src = tp_src; + key->tp_dst = tp_dst; + + /* Clear struct padding. */ + if (sizeof(*key) != IP_TUNNEL_KEY_SIZE) + memset((unsigned char *)key + IP_TUNNEL_KEY_SIZE, + 0, sizeof(*key) - IP_TUNNEL_KEY_SIZE); +} + +static inline unsigned short ip_tunnel_info_af(const struct ip_tunnel_info + *tun_info) +{ + return tun_info->mode & IP_TUNNEL_INFO_IPV6 ? AF_INET6 : AF_INET; +} + #ifdef CONFIG_INET int ip_tunnel_init(struct net_device *dev); @@ -163,7 +240,8 @@ struct ip_tunnel *ip_tunnel_lookup(struct ip_tunnel_net *itn, __be32 key); int ip_tunnel_rcv(struct ip_tunnel *tunnel, struct sk_buff *skb, - const struct tnl_ptk_info *tpi, bool log_ecn_error); + const struct tnl_ptk_info *tpi, struct metadata_dst *tun_dst, + bool log_ecn_error); int ip_tunnel_changelink(struct net_device *dev, struct nlattr *tb[], struct ip_tunnel_parm *p); int ip_tunnel_newlink(struct net_device *dev, struct nlattr *tb[], @@ -196,8 +274,10 @@ static inline u8 ip_tunnel_ecn_encap(u8 tos, const struct iphdr *iph, int iptunnel_pull_header(struct sk_buff *skb, int hdr_len, __be16 inner_proto); int iptunnel_xmit(struct sock *sk, struct rtable *rt, struct sk_buff *skb, - __be32 src, __be32 dst, __u8 proto, - __u8 tos, __u8 ttl, __be16 df, bool xnet); + __be32 src, __be32 dst, u8 proto, + u8 tos, u8 ttl, __be16 df, bool xnet); +struct metadata_dst *iptunnel_metadata_reply(struct metadata_dst *md, + gfp_t flags); struct sk_buff *iptunnel_handle_offloads(struct sk_buff *skb, bool gre_csum, int gso_type_mask); @@ -221,6 +301,57 @@ static inline void iptunnel_xmit_stats(int err, } } +static inline void *ip_tunnel_info_opts(struct ip_tunnel_info *info) +{ + return info + 1; +} + +static inline void ip_tunnel_info_opts_get(void *to, + const struct ip_tunnel_info *info) +{ + memcpy(to, info + 1, info->options_len); +} + +static inline void ip_tunnel_info_opts_set(struct ip_tunnel_info *info, + const void *from, int len) +{ + memcpy(ip_tunnel_info_opts(info), from, len); + info->options_len = len; +} + +static inline struct ip_tunnel_info *lwt_tun_info(struct lwtunnel_state *lwtstate) +{ + return (struct ip_tunnel_info *)lwtstate->data; +} + +extern struct static_key ip_tunnel_metadata_cnt; + +/* Returns > 0 if metadata should be collected */ +static inline int ip_tunnel_collect_metadata(void) +{ + return static_key_false(&ip_tunnel_metadata_cnt); +} + +void __init ip_tunnel_core_init(void); + +void ip_tunnel_need_metadata(void); +void ip_tunnel_unneed_metadata(void); + +#else /* CONFIG_INET */ + +static inline struct ip_tunnel_info *lwt_tun_info(struct lwtunnel_state *lwtstate) +{ + return NULL; +} + +static inline void ip_tunnel_need_metadata(void) +{ +} + +static inline void ip_tunnel_unneed_metadata(void) +{ +} + #endif /* CONFIG_INET */ #endif /* __NET_IP_TUNNELS_H */ diff --git a/include/net/ip_vs.h b/include/net/ip_vs.h index 4e3731ee4..9b9ca87a4 100644 --- a/include/net/ip_vs.h +++ b/include/net/ip_vs.h @@ -846,6 +846,17 @@ struct ipvs_master_sync_state { /* How much time to keep dests in trash */ #define IP_VS_DEST_TRASH_PERIOD (120 * HZ) +struct ipvs_sync_daemon_cfg { + union nf_inet_addr mcast_group; + int syncid; + u16 sync_maxlen; + u16 mcast_port; + u8 mcast_af; + u8 mcast_ttl; + /* multicast interface name */ + char mcast_ifn[IP_VS_IFNAME_MAXLEN]; +}; + /* IPVS in network namespace */ struct netns_ipvs { int gen; /* Generation */ @@ -961,15 +972,10 @@ struct netns_ipvs { spinlock_t sync_buff_lock; struct task_struct **backup_threads; int threads_mask; - int send_mesg_maxlen; - int recv_mesg_maxlen; volatile int sync_state; - volatile int master_syncid; - volatile int backup_syncid; struct mutex sync_mutex; - /* multicast interface name */ - char master_mcast_ifn[IP_VS_IFNAME_MAXLEN]; - char backup_mcast_ifn[IP_VS_IFNAME_MAXLEN]; + struct ipvs_sync_daemon_cfg mcfg; /* Master Configuration */ + struct ipvs_sync_daemon_cfg bcfg; /* Backup Configuration */ /* net name space ptr */ struct net *net; /* Needed by timer routines */ /* Number of heterogeneous destinations, needed becaus heterogeneous @@ -1408,7 +1414,8 @@ static inline void ip_vs_dest_put_and_free(struct ip_vs_dest *dest) /* IPVS sync daemon data and function prototypes * (from ip_vs_sync.c) */ -int start_sync_thread(struct net *net, int state, char *mcast_ifn, __u8 syncid); +int start_sync_thread(struct net *net, struct ipvs_sync_daemon_cfg *cfg, + int state); int stop_sync_thread(struct net *net, int state); void ip_vs_sync_conn(struct net *net, struct ip_vs_conn *cp, int pkts); diff --git a/include/net/ipv6.h b/include/net/ipv6.h index 82dbdb092..711cca428 100644 --- a/include/net/ipv6.h +++ b/include/net/ipv6.h @@ -707,54 +707,69 @@ static inline void iph_to_flow_copy_v6addrs(struct flow_keys *flow, } #if IS_ENABLED(CONFIG_IPV6) -static inline void ip6_set_txhash(struct sock *sk) -{ - struct inet_sock *inet = inet_sk(sk); - struct ipv6_pinfo *np = inet6_sk(sk); - struct flow_keys keys; - memset(&keys, 0, sizeof(keys)); +/* Sysctl settings for net ipv6.auto_flowlabels */ +#define IP6_AUTO_FLOW_LABEL_OFF 0 +#define IP6_AUTO_FLOW_LABEL_OPTOUT 1 +#define IP6_AUTO_FLOW_LABEL_OPTIN 2 +#define IP6_AUTO_FLOW_LABEL_FORCED 3 - memcpy(&keys.addrs.v6addrs.src, &np->saddr, - sizeof(keys.addrs.v6addrs.src)); - memcpy(&keys.addrs.v6addrs.dst, &sk->sk_v6_daddr, - sizeof(keys.addrs.v6addrs.dst)); - keys.control.addr_type = FLOW_DISSECTOR_KEY_IPV6_ADDRS; - keys.ports.src = inet->inet_sport; - keys.ports.dst = inet->inet_dport; +#define IP6_AUTO_FLOW_LABEL_MAX IP6_AUTO_FLOW_LABEL_FORCED - sk->sk_txhash = flow_hash_from_keys(&keys); -} +#define IP6_DEFAULT_AUTO_FLOW_LABELS IP6_AUTO_FLOW_LABEL_OPTOUT static inline __be32 ip6_make_flowlabel(struct net *net, struct sk_buff *skb, - __be32 flowlabel, bool autolabel) + __be32 flowlabel, bool autolabel, + struct flowi6 *fl6) { - if (!flowlabel && (autolabel || net->ipv6.sysctl.auto_flowlabels)) { - u32 hash; + u32 hash; + + if (flowlabel || + net->ipv6.sysctl.auto_flowlabels == IP6_AUTO_FLOW_LABEL_OFF || + (!autolabel && + net->ipv6.sysctl.auto_flowlabels != IP6_AUTO_FLOW_LABEL_FORCED)) + return flowlabel; - hash = skb_get_hash(skb); + hash = skb_get_hash_flowi6(skb, fl6); - /* Since this is being sent on the wire obfuscate hash a bit - * to minimize possbility that any useful information to an - * attacker is leaked. Only lower 20 bits are relevant. - */ - hash ^= hash >> 12; + /* Since this is being sent on the wire obfuscate hash a bit + * to minimize possbility that any useful information to an + * attacker is leaked. Only lower 20 bits are relevant. + */ + rol32(hash, 16); - flowlabel = (__force __be32)hash & IPV6_FLOWLABEL_MASK; + flowlabel = (__force __be32)hash & IPV6_FLOWLABEL_MASK; - if (net->ipv6.sysctl.flowlabel_state_ranges) - flowlabel |= IPV6_FLOWLABEL_STATELESS_FLAG; - } + if (net->ipv6.sysctl.flowlabel_state_ranges) + flowlabel |= IPV6_FLOWLABEL_STATELESS_FLAG; return flowlabel; } + +static inline int ip6_default_np_autolabel(struct net *net) +{ + switch (net->ipv6.sysctl.auto_flowlabels) { + case IP6_AUTO_FLOW_LABEL_OFF: + case IP6_AUTO_FLOW_LABEL_OPTIN: + default: + return 0; + case IP6_AUTO_FLOW_LABEL_OPTOUT: + case IP6_AUTO_FLOW_LABEL_FORCED: + return 1; + } +} #else static inline void ip6_set_txhash(struct sock *sk) { } static inline __be32 ip6_make_flowlabel(struct net *net, struct sk_buff *skb, - __be32 flowlabel, bool autolabel) + __be32 flowlabel, bool autolabel, + struct flowi6 *fl6) { return flowlabel; } +static inline int ip6_default_np_autolabel(struct net *net) +{ + return 0; +} #endif @@ -832,7 +847,8 @@ static inline struct sk_buff *ip6_finish_skb(struct sock *sk) &inet6_sk(sk)->cork); } -int ip6_dst_lookup(struct sock *sk, struct dst_entry **dst, struct flowi6 *fl6); +int ip6_dst_lookup(struct net *net, struct sock *sk, struct dst_entry **dst, + struct flowi6 *fl6); struct dst_entry *ip6_dst_lookup_flow(struct sock *sk, struct flowi6 *fl6, const struct in6_addr *final_dst); struct dst_entry *ip6_sk_dst_lookup_flow(struct sock *sk, struct flowi6 *fl6, diff --git a/include/net/lwtunnel.h b/include/net/lwtunnel.h new file mode 100644 index 000000000..fce0e35e7 --- /dev/null +++ b/include/net/lwtunnel.h @@ -0,0 +1,175 @@ +#ifndef __NET_LWTUNNEL_H +#define __NET_LWTUNNEL_H 1 + +#include <linux/lwtunnel.h> +#include <linux/netdevice.h> +#include <linux/skbuff.h> +#include <linux/types.h> +#include <net/route.h> + +#define LWTUNNEL_HASH_BITS 7 +#define LWTUNNEL_HASH_SIZE (1 << LWTUNNEL_HASH_BITS) + +/* lw tunnel state flags */ +#define LWTUNNEL_STATE_OUTPUT_REDIRECT BIT(0) +#define LWTUNNEL_STATE_INPUT_REDIRECT BIT(1) + +struct lwtunnel_state { + __u16 type; + __u16 flags; + atomic_t refcnt; + int (*orig_output)(struct sock *sk, struct sk_buff *skb); + int (*orig_input)(struct sk_buff *); + int len; + __u8 data[0]; +}; + +struct lwtunnel_encap_ops { + int (*build_state)(struct net_device *dev, struct nlattr *encap, + unsigned int family, const void *cfg, + struct lwtunnel_state **ts); + int (*output)(struct sock *sk, struct sk_buff *skb); + int (*input)(struct sk_buff *skb); + int (*fill_encap)(struct sk_buff *skb, + struct lwtunnel_state *lwtstate); + int (*get_encap_size)(struct lwtunnel_state *lwtstate); + int (*cmp_encap)(struct lwtunnel_state *a, struct lwtunnel_state *b); +}; + +#ifdef CONFIG_LWTUNNEL +static inline void lwtstate_free(struct lwtunnel_state *lws) +{ + kfree(lws); +} + +static inline struct lwtunnel_state * +lwtstate_get(struct lwtunnel_state *lws) +{ + if (lws) + atomic_inc(&lws->refcnt); + + return lws; +} + +static inline void lwtstate_put(struct lwtunnel_state *lws) +{ + if (!lws) + return; + + if (atomic_dec_and_test(&lws->refcnt)) + lwtstate_free(lws); +} + +static inline bool lwtunnel_output_redirect(struct lwtunnel_state *lwtstate) +{ + if (lwtstate && (lwtstate->flags & LWTUNNEL_STATE_OUTPUT_REDIRECT)) + return true; + + return false; +} + +static inline bool lwtunnel_input_redirect(struct lwtunnel_state *lwtstate) +{ + if (lwtstate && (lwtstate->flags & LWTUNNEL_STATE_INPUT_REDIRECT)) + return true; + + return false; +} +int lwtunnel_encap_add_ops(const struct lwtunnel_encap_ops *op, + unsigned int num); +int lwtunnel_encap_del_ops(const struct lwtunnel_encap_ops *op, + unsigned int num); +int lwtunnel_build_state(struct net_device *dev, u16 encap_type, + struct nlattr *encap, + unsigned int family, const void *cfg, + struct lwtunnel_state **lws); +int lwtunnel_fill_encap(struct sk_buff *skb, + struct lwtunnel_state *lwtstate); +int lwtunnel_get_encap_size(struct lwtunnel_state *lwtstate); +struct lwtunnel_state *lwtunnel_state_alloc(int hdr_len); +int lwtunnel_cmp_encap(struct lwtunnel_state *a, struct lwtunnel_state *b); +int lwtunnel_output(struct sock *sk, struct sk_buff *skb); +int lwtunnel_input(struct sk_buff *skb); + +#else + +static inline void lwtstate_free(struct lwtunnel_state *lws) +{ +} + +static inline struct lwtunnel_state * +lwtstate_get(struct lwtunnel_state *lws) +{ + return lws; +} + +static inline void lwtstate_put(struct lwtunnel_state *lws) +{ +} + +static inline bool lwtunnel_output_redirect(struct lwtunnel_state *lwtstate) +{ + return false; +} + +static inline bool lwtunnel_input_redirect(struct lwtunnel_state *lwtstate) +{ + return false; +} + +static inline int lwtunnel_encap_add_ops(const struct lwtunnel_encap_ops *op, + unsigned int num) +{ + return -EOPNOTSUPP; + +} + +static inline int lwtunnel_encap_del_ops(const struct lwtunnel_encap_ops *op, + unsigned int num) +{ + return -EOPNOTSUPP; +} + +static inline int lwtunnel_build_state(struct net_device *dev, u16 encap_type, + struct nlattr *encap, + unsigned int family, const void *cfg, + struct lwtunnel_state **lws) +{ + return -EOPNOTSUPP; +} + +static inline int lwtunnel_fill_encap(struct sk_buff *skb, + struct lwtunnel_state *lwtstate) +{ + return 0; +} + +static inline int lwtunnel_get_encap_size(struct lwtunnel_state *lwtstate) +{ + return 0; +} + +static inline struct lwtunnel_state *lwtunnel_state_alloc(int hdr_len) +{ + return NULL; +} + +static inline int lwtunnel_cmp_encap(struct lwtunnel_state *a, + struct lwtunnel_state *b) +{ + return 0; +} + +static inline int lwtunnel_output(struct sock *sk, struct sk_buff *skb) +{ + return -EOPNOTSUPP; +} + +static inline int lwtunnel_input(struct sk_buff *skb) +{ + return -EOPNOTSUPP; +} + +#endif + +#endif /* __NET_LWTUNNEL_H */ diff --git a/include/net/mac80211.h b/include/net/mac80211.h index 6b1077c2a..bfc569498 100644 --- a/include/net/mac80211.h +++ b/include/net/mac80211.h @@ -477,7 +477,9 @@ struct ieee80211_event { * @chandef: Channel definition for this BSS -- the hardware might be * configured a higher bandwidth than this BSS uses, for example. * @ht_operation_mode: HT operation mode like in &struct ieee80211_ht_operation. - * This field is only valid when the channel type is one of the HT types. + * This field is only valid when the channel is a wide HT/VHT channel. + * Note that with TDLS this can be the case (channel is HT, protection must + * be used from this field) even when the BSS association isn't using HT. * @cqm_rssi_thold: Connection quality monitor RSSI threshold, a zero value * implies disabled * @cqm_rssi_hyst: Connection quality monitor RSSI hysteresis @@ -973,6 +975,10 @@ ieee80211_tx_info_clear_status(struct ieee80211_tx_info *info) * @RX_FLAG_IV_STRIPPED: The IV/ICV are stripped from this frame. * If this flag is set, the stack cannot do any replay detection * hence the driver or hardware will have to do that. + * @RX_FLAG_PN_VALIDATED: Currently only valid for CCMP/GCMP frames, this + * flag indicates that the PN was verified for replay protection. + * Note that this flag is also currently only supported when a frame + * is also decrypted (ie. @RX_FLAG_DECRYPTED must be set) * @RX_FLAG_FAILED_FCS_CRC: Set this flag if the FCS check failed on * the frame. * @RX_FLAG_FAILED_PLCP_CRC: Set this flag if the PCLP check failed on @@ -997,9 +1003,6 @@ ieee80211_tx_info_clear_status(struct ieee80211_tx_info *info) * @RX_FLAG_AMPDU_DETAILS: A-MPDU details are known, in particular the reference * number (@ampdu_reference) must be populated and be a distinct number for * each A-MPDU - * @RX_FLAG_AMPDU_REPORT_ZEROLEN: driver reports 0-length subframes - * @RX_FLAG_AMPDU_IS_ZEROLEN: This is a zero-length subframe, for - * monitoring purposes only * @RX_FLAG_AMPDU_LAST_KNOWN: last subframe is known, should be set on all * subframes of a single A-MPDU * @RX_FLAG_AMPDU_IS_LAST: this subframe is the last subframe of the A-MPDU @@ -1039,8 +1042,8 @@ enum mac80211_rx_flags { RX_FLAG_NO_SIGNAL_VAL = BIT(12), RX_FLAG_HT_GF = BIT(13), RX_FLAG_AMPDU_DETAILS = BIT(14), - RX_FLAG_AMPDU_REPORT_ZEROLEN = BIT(15), - RX_FLAG_AMPDU_IS_ZEROLEN = BIT(16), + RX_FLAG_PN_VALIDATED = BIT(15), + /* bit 16 free */ RX_FLAG_AMPDU_LAST_KNOWN = BIT(17), RX_FLAG_AMPDU_IS_LAST = BIT(18), RX_FLAG_AMPDU_DELIM_CRC_ERROR = BIT(19), @@ -1491,8 +1494,10 @@ enum ieee80211_key_flags { * - Temporal Authenticator Rx MIC Key (64 bits) * @icv_len: The ICV length for this key type * @iv_len: The IV length for this key type + * @drv_priv: pointer for driver use */ struct ieee80211_key_conf { + void *drv_priv; atomic64_t tx_pn; u32 cipher; u8 icv_len; @@ -1675,7 +1680,6 @@ struct ieee80211_sta_rates { * @tdls: indicates whether the STA is a TDLS peer * @tdls_initiator: indicates the STA is an initiator of the TDLS link. Only * valid if the STA is a TDLS peer in the first place. - * @mfp: indicates whether the STA uses management frame protection or not. * @txq: per-TID data TX queues (if driver uses the TXQ abstraction) */ struct ieee80211_sta { @@ -1693,7 +1697,6 @@ struct ieee80211_sta { struct ieee80211_sta_rates __rcu *rates; bool tdls; bool tdls_initiator; - bool mfp; struct ieee80211_txq *txq[IEEE80211_NUM_TIDS]; @@ -1888,6 +1891,9 @@ struct ieee80211_txq { * @IEEE80211_HW_SINGLE_SCAN_ON_ALL_BANDS: The HW supports scanning on all bands * in one command, mac80211 doesn't have to run separate scans per band. * + * @IEEE80211_HW_TDLS_WIDER_BW: The device/driver supports wider bandwidth + * than then BSS bandwidth for a TDLS link on the base channel. + * * @NUM_IEEE80211_HW_FLAGS: number of hardware flags, used for sizing arrays */ enum ieee80211_hw_flags { @@ -1920,6 +1926,7 @@ enum ieee80211_hw_flags { IEEE80211_HW_CHANCTX_STA_CSA, IEEE80211_HW_SUPPORTS_CLONED_SKBS, IEEE80211_HW_SINGLE_SCAN_ON_ALL_BANDS, + IEEE80211_HW_TDLS_WIDER_BW, /* keep last, obviously */ NUM_IEEE80211_HW_FLAGS @@ -3696,20 +3703,28 @@ void ieee80211_free_hw(struct ieee80211_hw *hw); void ieee80211_restart_hw(struct ieee80211_hw *hw); /** - * ieee80211_napi_add - initialize mac80211 NAPI context - * @hw: the hardware to initialize the NAPI context on - * @napi: the NAPI context to initialize - * @napi_dev: dummy NAPI netdevice, here to not waste the space if the - * driver doesn't use NAPI - * @poll: poll function - * @weight: default weight + * ieee80211_rx_napi - receive frame from NAPI context + * + * Use this function to hand received frames to mac80211. The receive + * buffer in @skb must start with an IEEE 802.11 header. In case of a + * paged @skb is used, the driver is recommended to put the ieee80211 + * header of the frame on the linear part of the @skb to avoid memory + * allocation and/or memcpy by the stack. + * + * This function may not be called in IRQ context. Calls to this function + * for a single hardware must be synchronized against each other. Calls to + * this function, ieee80211_rx_ni() and ieee80211_rx_irqsafe() may not be + * mixed for a single hardware. Must not run concurrently with + * ieee80211_tx_status() or ieee80211_tx_status_ni(). + * + * This function must be called with BHs disabled. * - * See also netif_napi_add(). + * @hw: the hardware this frame came in on + * @skb: the buffer to receive, owned by mac80211 after this call + * @napi: the NAPI context */ -void ieee80211_napi_add(struct ieee80211_hw *hw, struct napi_struct *napi, - struct net_device *napi_dev, - int (*poll)(struct napi_struct *, int), - int weight); +void ieee80211_rx_napi(struct ieee80211_hw *hw, struct sk_buff *skb, + struct napi_struct *napi); /** * ieee80211_rx - receive frame @@ -3731,7 +3746,10 @@ void ieee80211_napi_add(struct ieee80211_hw *hw, struct napi_struct *napi, * @hw: the hardware this frame came in on * @skb: the buffer to receive, owned by mac80211 after this call */ -void ieee80211_rx(struct ieee80211_hw *hw, struct sk_buff *skb); +static inline void ieee80211_rx(struct ieee80211_hw *hw, struct sk_buff *skb) +{ + ieee80211_rx_napi(hw, skb, NULL); +} /** * ieee80211_rx_irqsafe - receive frame @@ -4315,19 +4333,6 @@ void ieee80211_get_tkip_p2k(struct ieee80211_key_conf *keyconf, struct sk_buff *skb, u8 *p2k); /** - * ieee80211_aes_cmac_calculate_k1_k2 - calculate the AES-CMAC sub keys - * - * This function computes the two AES-CMAC sub-keys, based on the - * previously installed master key. - * - * @keyconf: the parameter passed with the set key - * @k1: a buffer to be filled with the 1st sub-key - * @k2: a buffer to be filled with the 2nd sub-key - */ -void ieee80211_aes_cmac_calculate_k1_k2(struct ieee80211_key_conf *keyconf, - u8 *k1, u8 *k2); - -/** * ieee80211_get_key_tx_seq - get key TX sequence counter * * @keyconf: the parameter passed with the set key diff --git a/include/net/mac802154.h b/include/net/mac802154.h index f534a4691..b7f996152 100644 --- a/include/net/mac802154.h +++ b/include/net/mac802154.h @@ -321,23 +321,6 @@ int ieee802154_register_hw(struct ieee802154_hw *hw); void ieee802154_unregister_hw(struct ieee802154_hw *hw); /** - * ieee802154_rx - receive frame - * - * Use this function to hand received frames to mac802154. The receive - * buffer in @skb must start with an IEEE 802.15.4 header. In case of a - * paged @skb is used, the driver is recommended to put the ieee802154 - * header of the frame on the linear part of the @skb to avoid memory - * allocation and/or memcpy by the stack. - * - * This function may not be called in IRQ context. Calls to this function - * for a single hardware must be synchronized against each other. - * - * @hw: the hardware this frame came in on - * @skb: the buffer to receive, owned by mac802154 after this call - */ -void ieee802154_rx(struct ieee802154_hw *hw, struct sk_buff *skb); - -/** * ieee802154_rx_irqsafe - receive frame * * Like ieee802154_rx() but can be called in IRQ context diff --git a/include/net/mpls_iptunnel.h b/include/net/mpls_iptunnel.h new file mode 100644 index 000000000..4757997f7 --- /dev/null +++ b/include/net/mpls_iptunnel.h @@ -0,0 +1,29 @@ +/* + * Copyright (c) 2015 Cumulus Networks, Inc. + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of version 2 of the GNU General Public + * License as published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * General Public License for more details. + */ + +#ifndef _NET_MPLS_IPTUNNEL_H +#define _NET_MPLS_IPTUNNEL_H 1 + +#define MAX_NEW_LABELS 2 + +struct mpls_iptunnel_encap { + u32 label[MAX_NEW_LABELS]; + u32 labels; +}; + +static inline struct mpls_iptunnel_encap *mpls_lwtunnel_encap(struct lwtunnel_state *lwtstate) +{ + return (struct mpls_iptunnel_encap *)lwtstate->data; +} + +#endif diff --git a/include/net/ndisc.h b/include/net/ndisc.h index b3a775125..aba5695fa 100644 --- a/include/net/ndisc.h +++ b/include/net/ndisc.h @@ -182,7 +182,8 @@ int ndisc_rcv(struct sk_buff *skb); void ndisc_send_ns(struct net_device *dev, struct neighbour *neigh, const struct in6_addr *solicit, - const struct in6_addr *daddr, const struct in6_addr *saddr); + const struct in6_addr *daddr, const struct in6_addr *saddr, + struct sk_buff *oskb); void ndisc_send_rs(struct net_device *dev, const struct in6_addr *saddr, const struct in6_addr *daddr); diff --git a/include/net/neighbour.h b/include/net/neighbour.h index bd33e66f4..8b683841e 100644 --- a/include/net/neighbour.h +++ b/include/net/neighbour.h @@ -125,6 +125,7 @@ struct neigh_statistics { unsigned long forced_gc_runs; /* number of forced GC runs */ unsigned long unres_discards; /* number of unresolved drops */ + unsigned long table_fulls; /* times even gc couldn't help */ }; #define NEIGH_CACHE_STAT_INC(tbl, field) this_cpu_inc((tbl)->stats->field) diff --git a/include/net/net_namespace.h b/include/net/net_namespace.h index e951453e0..2dcea635e 100644 --- a/include/net/net_namespace.h +++ b/include/net/net_namespace.h @@ -118,6 +118,9 @@ struct net { #endif struct sock *nfnl; struct sock *nfnl_stash; +#if IS_ENABLED(CONFIG_NETFILTER_NETLINK_ACCT) + struct list_head nfnl_acct_list; +#endif #endif #ifdef CONFIG_WEXT_CORE struct sk_buff_head wext_nlevents; diff --git a/include/net/netfilter/ipv4/nf_dup_ipv4.h b/include/net/netfilter/ipv4/nf_dup_ipv4.h new file mode 100644 index 000000000..42008f10d --- /dev/null +++ b/include/net/netfilter/ipv4/nf_dup_ipv4.h @@ -0,0 +1,7 @@ +#ifndef _NF_DUP_IPV4_H_ +#define _NF_DUP_IPV4_H_ + +void nf_dup_ipv4(struct sk_buff *skb, unsigned int hooknum, + const struct in_addr *gw, int oif); + +#endif /* _NF_DUP_IPV4_H_ */ diff --git a/include/net/netfilter/ipv6/nf_dup_ipv6.h b/include/net/netfilter/ipv6/nf_dup_ipv6.h new file mode 100644 index 000000000..ed6bd66fa --- /dev/null +++ b/include/net/netfilter/ipv6/nf_dup_ipv6.h @@ -0,0 +1,7 @@ +#ifndef _NF_DUP_IPV6_H_ +#define _NF_DUP_IPV6_H_ + +void nf_dup_ipv6(struct sk_buff *skb, unsigned int hooknum, + const struct in6_addr *gw, int oif); + +#endif /* _NF_DUP_IPV6_H_ */ diff --git a/include/net/netfilter/nf_conntrack.h b/include/net/netfilter/nf_conntrack.h index 4023c4ce2..e8ad46834 100644 --- a/include/net/netfilter/nf_conntrack.h +++ b/include/net/netfilter/nf_conntrack.h @@ -250,8 +250,12 @@ void nf_ct_untracked_status_or(unsigned long bits); void nf_ct_iterate_cleanup(struct net *net, int (*iter)(struct nf_conn *i, void *data), void *data, u32 portid, int report); + +struct nf_conntrack_zone; + void nf_conntrack_free(struct nf_conn *ct); -struct nf_conn *nf_conntrack_alloc(struct net *net, u16 zone, +struct nf_conn *nf_conntrack_alloc(struct net *net, + const struct nf_conntrack_zone *zone, const struct nf_conntrack_tuple *orig, const struct nf_conntrack_tuple *repl, gfp_t gfp); @@ -291,7 +295,9 @@ extern unsigned int nf_conntrack_max; extern unsigned int nf_conntrack_hash_rnd; void init_nf_conntrack_hash_rnd(void); -struct nf_conn *nf_ct_tmpl_alloc(struct net *net, u16 zone, gfp_t flags); +struct nf_conn *nf_ct_tmpl_alloc(struct net *net, + const struct nf_conntrack_zone *zone, + gfp_t flags); void nf_ct_tmpl_free(struct nf_conn *tmpl); #define NF_CT_STAT_INC(net, count) __this_cpu_inc((net)->ct.stat->count) diff --git a/include/net/netfilter/nf_conntrack_core.h b/include/net/netfilter/nf_conntrack_core.h index f2f0fa3bb..c03f9c42b 100644 --- a/include/net/netfilter/nf_conntrack_core.h +++ b/include/net/netfilter/nf_conntrack_core.h @@ -52,7 +52,8 @@ bool nf_ct_invert_tuple(struct nf_conntrack_tuple *inverse, /* Find a connection corresponding to a tuple. */ struct nf_conntrack_tuple_hash * -nf_conntrack_find_get(struct net *net, u16 zone, +nf_conntrack_find_get(struct net *net, + const struct nf_conntrack_zone *zone, const struct nf_conntrack_tuple *tuple); int __nf_conntrack_confirm(struct sk_buff *skb); diff --git a/include/net/netfilter/nf_conntrack_expect.h b/include/net/netfilter/nf_conntrack_expect.h index 3f3aecbc8..dce56f09a 100644 --- a/include/net/netfilter/nf_conntrack_expect.h +++ b/include/net/netfilter/nf_conntrack_expect.h @@ -4,7 +4,9 @@ #ifndef _NF_CONNTRACK_EXPECT_H #define _NF_CONNTRACK_EXPECT_H + #include <net/netfilter/nf_conntrack.h> +#include <net/netfilter/nf_conntrack_zones.h> extern unsigned int nf_ct_expect_hsize; extern unsigned int nf_ct_expect_max; @@ -76,15 +78,18 @@ int nf_conntrack_expect_init(void); void nf_conntrack_expect_fini(void); struct nf_conntrack_expect * -__nf_ct_expect_find(struct net *net, u16 zone, +__nf_ct_expect_find(struct net *net, + const struct nf_conntrack_zone *zone, const struct nf_conntrack_tuple *tuple); struct nf_conntrack_expect * -nf_ct_expect_find_get(struct net *net, u16 zone, +nf_ct_expect_find_get(struct net *net, + const struct nf_conntrack_zone *zone, const struct nf_conntrack_tuple *tuple); struct nf_conntrack_expect * -nf_ct_find_expectation(struct net *net, u16 zone, +nf_ct_find_expectation(struct net *net, + const struct nf_conntrack_zone *zone, const struct nf_conntrack_tuple *tuple); void nf_ct_unlink_expect_report(struct nf_conntrack_expect *exp, diff --git a/include/net/netfilter/nf_conntrack_labels.h b/include/net/netfilter/nf_conntrack_labels.h index dec6336bf..7e2b1d025 100644 --- a/include/net/netfilter/nf_conntrack_labels.h +++ b/include/net/netfilter/nf_conntrack_labels.h @@ -54,7 +54,11 @@ int nf_connlabels_replace(struct nf_conn *ct, #ifdef CONFIG_NF_CONNTRACK_LABELS int nf_conntrack_labels_init(void); void nf_conntrack_labels_fini(void); +int nf_connlabels_get(struct net *net, unsigned int n_bits); +void nf_connlabels_put(struct net *net); #else static inline int nf_conntrack_labels_init(void) { return 0; } static inline void nf_conntrack_labels_fini(void) {} +static inline int nf_connlabels_get(struct net *net, unsigned int n_bits) { return 0; } +static inline void nf_connlabels_put(struct net *net) {} #endif diff --git a/include/net/netfilter/nf_conntrack_zones.h b/include/net/netfilter/nf_conntrack_zones.h index 034efe8d4..4e32512ce 100644 --- a/include/net/netfilter/nf_conntrack_zones.h +++ b/include/net/netfilter/nf_conntrack_zones.h @@ -1,25 +1,89 @@ #ifndef _NF_CONNTRACK_ZONES_H #define _NF_CONNTRACK_ZONES_H -#define NF_CT_DEFAULT_ZONE 0 +#include <linux/netfilter/nf_conntrack_zones_common.h> -#if defined(CONFIG_NF_CONNTRACK) || defined(CONFIG_NF_CONNTRACK_MODULE) +#if IS_ENABLED(CONFIG_NF_CONNTRACK) #include <net/netfilter/nf_conntrack_extend.h> -struct nf_conntrack_zone { - u16 id; -}; +static inline const struct nf_conntrack_zone * +nf_ct_zone(const struct nf_conn *ct) +{ + const struct nf_conntrack_zone *nf_ct_zone = NULL; + +#ifdef CONFIG_NF_CONNTRACK_ZONES + nf_ct_zone = nf_ct_ext_find(ct, NF_CT_EXT_ZONE); +#endif + return nf_ct_zone ? nf_ct_zone : &nf_ct_zone_dflt; +} + +static inline const struct nf_conntrack_zone * +nf_ct_zone_init(struct nf_conntrack_zone *zone, u16 id, u8 dir, u8 flags) +{ + zone->id = id; + zone->flags = flags; + zone->dir = dir; + + return zone; +} + +static inline const struct nf_conntrack_zone * +nf_ct_zone_tmpl(const struct nf_conn *tmpl, const struct sk_buff *skb, + struct nf_conntrack_zone *tmp) +{ + const struct nf_conntrack_zone *zone; + + if (!tmpl) + return &nf_ct_zone_dflt; + + zone = nf_ct_zone(tmpl); + if (zone->flags & NF_CT_FLAG_MARK) + zone = nf_ct_zone_init(tmp, skb->mark, zone->dir, 0); + + return zone; +} -static inline u16 nf_ct_zone(const struct nf_conn *ct) +static inline int nf_ct_zone_add(struct nf_conn *ct, gfp_t flags, + const struct nf_conntrack_zone *info) { #ifdef CONFIG_NF_CONNTRACK_ZONES struct nf_conntrack_zone *nf_ct_zone; - nf_ct_zone = nf_ct_ext_find(ct, NF_CT_EXT_ZONE); - if (nf_ct_zone) - return nf_ct_zone->id; + + nf_ct_zone = nf_ct_ext_add(ct, NF_CT_EXT_ZONE, flags); + if (!nf_ct_zone) + return -ENOMEM; + + nf_ct_zone_init(nf_ct_zone, info->id, info->dir, + info->flags); #endif - return NF_CT_DEFAULT_ZONE; + return 0; } -#endif /* CONFIG_NF_CONNTRACK || CONFIG_NF_CONNTRACK_MODULE */ +static inline bool nf_ct_zone_matches_dir(const struct nf_conntrack_zone *zone, + enum ip_conntrack_dir dir) +{ + return zone->dir & (1 << dir); +} + +static inline u16 nf_ct_zone_id(const struct nf_conntrack_zone *zone, + enum ip_conntrack_dir dir) +{ + return nf_ct_zone_matches_dir(zone, dir) ? + zone->id : NF_CT_DEFAULT_ZONE_ID; +} + +static inline bool nf_ct_zone_equal(const struct nf_conn *a, + const struct nf_conntrack_zone *b, + enum ip_conntrack_dir dir) +{ + return nf_ct_zone_id(nf_ct_zone(a), dir) == + nf_ct_zone_id(b, dir); +} + +static inline bool nf_ct_zone_equal_any(const struct nf_conn *a, + const struct nf_conntrack_zone *b) +{ + return nf_ct_zone(a)->id == b->id; +} +#endif /* IS_ENABLED(CONFIG_NF_CONNTRACK) */ #endif /* _NF_CONNTRACK_ZONES_H */ diff --git a/include/net/netfilter/nft_dup.h b/include/net/netfilter/nft_dup.h new file mode 100644 index 000000000..6b84cf649 --- /dev/null +++ b/include/net/netfilter/nft_dup.h @@ -0,0 +1,9 @@ +#ifndef _NFT_DUP_H_ +#define _NFT_DUP_H_ + +struct nft_dup_inet { + enum nft_registers sreg_addr:8; + enum nft_registers sreg_dev:8; +}; + +#endif /* _NFT_DUP_H_ */ diff --git a/include/net/netns/ipv6.h b/include/net/netns/ipv6.h index 8d93544a2..c0368db6d 100644 --- a/include/net/netns/ipv6.h +++ b/include/net/netns/ipv6.h @@ -31,6 +31,7 @@ struct netns_sysctl_ipv6 { int auto_flowlabels; int icmpv6_time; int anycast_src_echo_reply; + int ip_nonlocal_bind; int fwmark_reflect; int idgen_retries; int idgen_delay; diff --git a/include/net/netns/netfilter.h b/include/net/netns/netfilter.h index 532e4ba64..38aa4983e 100644 --- a/include/net/netns/netfilter.h +++ b/include/net/netns/netfilter.h @@ -14,5 +14,6 @@ struct netns_nf { #ifdef CONFIG_SYSCTL struct ctl_table_header *nf_log_dir_header; #endif + struct list_head hooks[NFPROTO_NUMPROTO][NF_MAX_HOOKS]; }; #endif diff --git a/include/net/nfc/nci_core.h b/include/net/nfc/nci_core.h index 01fc8c531..d0d0f1e53 100644 --- a/include/net/nfc/nci_core.h +++ b/include/net/nfc/nci_core.h @@ -79,6 +79,7 @@ struct nci_ops { int (*close)(struct nci_dev *ndev); int (*send)(struct nci_dev *ndev, struct sk_buff *skb); int (*setup)(struct nci_dev *ndev); + int (*post_setup)(struct nci_dev *ndev); int (*fw_download)(struct nci_dev *ndev, const char *firmware_name); __u32 (*get_rfprotocol)(struct nci_dev *ndev, __u8 rf_protocol); int (*discover_se)(struct nci_dev *ndev); @@ -277,6 +278,8 @@ int nci_request(struct nci_dev *ndev, unsigned long opt), unsigned long opt, __u32 timeout); int nci_prop_cmd(struct nci_dev *ndev, __u8 oid, size_t len, __u8 *payload); +int nci_core_reset(struct nci_dev *ndev); +int nci_core_init(struct nci_dev *ndev); int nci_recv_frame(struct nci_dev *ndev, struct sk_buff *skb); int nci_set_config(struct nci_dev *ndev, __u8 id, size_t len, __u8 *val); diff --git a/include/net/nfc/nfc.h b/include/net/nfc/nfc.h index f9e58ae45..30afc9a67 100644 --- a/include/net/nfc/nfc.h +++ b/include/net/nfc/nfc.h @@ -203,6 +203,7 @@ struct nfc_dev { int n_vendor_cmds; struct nfc_ops *ops; + struct genl_info *cur_cmd_info; }; #define to_nfc_dev(_dev) container_of(_dev, struct nfc_dev, dev) @@ -318,4 +319,44 @@ static inline int nfc_set_vendor_cmds(struct nfc_dev *dev, return 0; } +struct sk_buff *__nfc_alloc_vendor_cmd_reply_skb(struct nfc_dev *dev, + enum nfc_attrs attr, + u32 oui, u32 subcmd, + int approxlen); +int nfc_vendor_cmd_reply(struct sk_buff *skb); + +/** + * nfc_vendor_cmd_alloc_reply_skb - allocate vendor command reply + * @dev: nfc device + * @oui: vendor oui + * @approxlen: an upper bound of the length of the data that will + * be put into the skb + * + * This function allocates and pre-fills an skb for a reply to + * a vendor command. Since it is intended for a reply, calling + * it outside of a vendor command's doit() operation is invalid. + * + * The returned skb is pre-filled with some identifying data in + * a way that any data that is put into the skb (with skb_put(), + * nla_put() or similar) will end up being within the + * %NFC_ATTR_VENDOR_DATA attribute, so all that needs to be done + * with the skb is adding data for the corresponding userspace tool + * which can then read that data out of the vendor data attribute. + * You must not modify the skb in any other way. + * + * When done, call nfc_vendor_cmd_reply() with the skb and return + * its error code as the result of the doit() operation. + * + * Return: An allocated and pre-filled skb. %NULL if any errors happen. + */ +static inline struct sk_buff * +nfc_vendor_cmd_alloc_reply_skb(struct nfc_dev *dev, + u32 oui, u32 subcmd, int approxlen) +{ + return __nfc_alloc_vendor_cmd_reply_skb(dev, + NFC_ATTR_VENDOR_DATA, + oui, + subcmd, approxlen); +} + #endif /* __NET_NFC_H */ diff --git a/include/net/nl802154.h b/include/net/nl802154.h index b0ab530d2..cf2713d8b 100644 --- a/include/net/nl802154.h +++ b/include/net/nl802154.h @@ -52,6 +52,8 @@ enum nl802154_commands { NL802154_CMD_SET_LBT_MODE, + NL802154_CMD_SET_ACKREQ_DEFAULT, + /* add new commands above here */ /* used to define NL802154_CMD_MAX below */ @@ -104,6 +106,8 @@ enum nl802154_attrs { NL802154_ATTR_SUPPORTED_COMMANDS, + NL802154_ATTR_ACKREQ_DEFAULT, + /* add attributes here, update the policy in nl802154.c */ __NL802154_ATTR_AFTER_LAST, diff --git a/include/net/pkt_sched.h b/include/net/pkt_sched.h index 2342bf12c..401038d2f 100644 --- a/include/net/pkt_sched.h +++ b/include/net/pkt_sched.h @@ -110,10 +110,8 @@ static inline void qdisc_run(struct Qdisc *q) __qdisc_run(q); } -int tc_classify_compat(struct sk_buff *skb, const struct tcf_proto *tp, - struct tcf_result *res); int tc_classify(struct sk_buff *skb, const struct tcf_proto *tp, - struct tcf_result *res); + struct tcf_result *res, bool compat_mode); static inline __be16 tc_skb_protocol(const struct sk_buff *skb) { diff --git a/include/net/route.h b/include/net/route.h index fe22d03af..f46af2568 100644 --- a/include/net/route.h +++ b/include/net/route.h @@ -188,8 +188,12 @@ void ipv4_sk_redirect(struct sk_buff *skb, struct sock *sk); void ip_rt_send_redirect(struct sk_buff *skb); unsigned int inet_addr_type(struct net *net, __be32 addr); +unsigned int inet_addr_type_table(struct net *net, __be32 addr, u32 tb_id); unsigned int inet_dev_addr_type(struct net *net, const struct net_device *dev, __be32 addr); +unsigned int inet_addr_type_dev_table(struct net *net, + const struct net_device *dev, + __be32 addr); void ip_rt_multicast_event(struct in_device *); int ip_rt_ioctl(struct net *, unsigned int cmd, void __user *arg); void ip_rt_get_source(u8 *src, struct sk_buff *skb, struct rtable *rt); @@ -250,6 +254,9 @@ static inline void ip_route_connect_init(struct flowi4 *fl4, __be32 dst, __be32 if (inet_sk(sk)->transparent) flow_flags |= FLOWI_FLAG_ANYSRC; + if (netif_index_is_vrf(sock_net(sk), oif)) + flow_flags |= FLOWI_FLAG_VRFSRC | FLOWI_FLAG_SKIP_NH_OIF; + flowi4_init_output(fl4, oif, sk->sk_mark, tos, RT_SCOPE_UNIVERSE, protocol, flow_flags, dst, src, dport, sport); } diff --git a/include/net/rtnetlink.h b/include/net/rtnetlink.h index 343d922d1..18fdb9818 100644 --- a/include/net/rtnetlink.h +++ b/include/net/rtnetlink.h @@ -141,6 +141,7 @@ struct net_device *rtnl_create_link(struct net *net, const char *ifname, unsigned char name_assign_type, const struct rtnl_link_ops *ops, struct nlattr *tb[]); +int rtnl_delete_link(struct net_device *dev); int rtnl_configure_link(struct net_device *dev, const struct ifinfomsg *ifm); int rtnl_nla_parse_ifla(struct nlattr **tb, const struct nlattr *head, int len); diff --git a/include/net/sch_generic.h b/include/net/sch_generic.h index 2738f6f87..444faa89a 100644 --- a/include/net/sch_generic.h +++ b/include/net/sch_generic.h @@ -340,6 +340,7 @@ extern struct Qdisc noop_qdisc; extern struct Qdisc_ops noop_qdisc_ops; extern struct Qdisc_ops pfifo_fast_ops; extern struct Qdisc_ops mq_qdisc_ops; +extern struct Qdisc_ops noqueue_qdisc_ops; extern const struct Qdisc_ops *default_qdisc_ops; struct Qdisc_class_common { @@ -513,17 +514,20 @@ static inline void bstats_update(struct gnet_stats_basic_packed *bstats, bstats->packets += skb_is_gso(skb) ? skb_shinfo(skb)->gso_segs : 1; } -static inline void qdisc_bstats_update_cpu(struct Qdisc *sch, - const struct sk_buff *skb) +static inline void bstats_cpu_update(struct gnet_stats_basic_cpu *bstats, + const struct sk_buff *skb) { - struct gnet_stats_basic_cpu *bstats = - this_cpu_ptr(sch->cpu_bstats); - u64_stats_update_begin(&bstats->syncp); bstats_update(&bstats->bstats, skb); u64_stats_update_end(&bstats->syncp); } +static inline void qdisc_bstats_cpu_update(struct Qdisc *sch, + const struct sk_buff *skb) +{ + bstats_cpu_update(this_cpu_ptr(sch->cpu_bstats), skb); +} + static inline void qdisc_bstats_update(struct Qdisc *sch, const struct sk_buff *skb) { @@ -547,16 +551,24 @@ static inline void __qdisc_qstats_drop(struct Qdisc *sch, int count) sch->qstats.drops += count; } -static inline void qdisc_qstats_drop(struct Qdisc *sch) +static inline void qstats_drop_inc(struct gnet_stats_queue *qstats) { - sch->qstats.drops++; + qstats->drops++; } -static inline void qdisc_qstats_drop_cpu(struct Qdisc *sch) +static inline void qstats_overlimit_inc(struct gnet_stats_queue *qstats) { - struct gnet_stats_queue *qstats = this_cpu_ptr(sch->cpu_qstats); + qstats->overlimits++; +} - qstats->drops++; +static inline void qdisc_qstats_drop(struct Qdisc *sch) +{ + qstats_drop_inc(&sch->qstats); +} + +static inline void qdisc_qstats_cpu_drop(struct Qdisc *sch) +{ + qstats_drop_inc(this_cpu_ptr(sch->cpu_qstats)); } static inline void qdisc_qstats_overlimit(struct Qdisc *sch) diff --git a/include/net/sock.h b/include/net/sock.h index 4ca4c3fe4..e23717013 100644 --- a/include/net/sock.h +++ b/include/net/sock.h @@ -429,7 +429,9 @@ struct sock { void *sk_security; #endif __u32 sk_mark; +#ifdef CONFIG_CGROUP_NET_CLASSID u32 sk_classid; +#endif struct cg_proto *sk_cgrp; void (*sk_state_change)(struct sock *sk); void (*sk_data_ready)(struct sock *sk); @@ -1048,42 +1050,9 @@ struct proto { #endif }; -/* - * Bits in struct cg_proto.flags - */ -enum cg_proto_flags { - /* Currently active and new sockets should be assigned to cgroups */ - MEMCG_SOCK_ACTIVE, - /* It was ever activated; we must disarm static keys on destruction */ - MEMCG_SOCK_ACTIVATED, -}; - -struct cg_proto { - struct page_counter memory_allocated; /* Current allocated memory. */ - struct percpu_counter sockets_allocated; /* Current number of sockets. */ - int memory_pressure; - long sysctl_mem[3]; - unsigned long flags; - /* - * memcg field is used to find which memcg we belong directly - * Each memcg struct can hold more than one cg_proto, so container_of - * won't really cut. - * - * The elegant solution would be having an inverse function to - * proto_cgroup in struct proto, but that means polluting the structure - * for everybody, instead of just for memcg users. - */ - struct mem_cgroup *memcg; -}; - int proto_register(struct proto *prot, int alloc_slab); void proto_unregister(struct proto *prot); -static inline bool memcg_proto_active(struct cg_proto *cg_proto) -{ - return test_bit(MEMCG_SOCK_ACTIVE, &cg_proto->flags); -} - #ifdef SOCK_REFCNT_DEBUG static inline void sk_refcnt_debug_inc(struct sock *sk) { @@ -1693,6 +1662,20 @@ static inline void sock_graft(struct sock *sk, struct socket *parent) kuid_t sock_i_uid(struct sock *sk); unsigned long sock_i_ino(struct sock *sk); +static inline void sk_set_txhash(struct sock *sk) +{ + sk->sk_txhash = prandom_u32(); + + if (unlikely(!sk->sk_txhash)) + sk->sk_txhash = 1; +} + +static inline void sk_rethink_txhash(struct sock *sk) +{ + if (sk->sk_txhash) + sk_set_txhash(sk); +} + static inline struct dst_entry * __sk_dst_get(struct sock *sk) { @@ -1717,6 +1700,8 @@ static inline void dst_negative_advice(struct sock *sk) { struct dst_entry *ndst, *dst = __sk_dst_get(sk); + sk_rethink_txhash(sk); + if (dst && dst->ops->negative_advice) { ndst = dst->ops->negative_advice(dst); diff --git a/include/net/switchdev.h b/include/net/switchdev.h index d5671f118..319baab3b 100644 --- a/include/net/switchdev.h +++ b/include/net/switchdev.h @@ -72,6 +72,7 @@ struct switchdev_obj { struct switchdev_obj_fdb { /* PORT_FDB */ const unsigned char *addr; u16 vid; + u16 ndm_state; } fdb; } u; }; @@ -157,6 +158,9 @@ int switchdev_port_fdb_del(struct ndmsg *ndm, struct nlattr *tb[], int switchdev_port_fdb_dump(struct sk_buff *skb, struct netlink_callback *cb, struct net_device *dev, struct net_device *filter_dev, int idx); +void switchdev_port_fwd_mark_set(struct net_device *dev, + struct net_device *group_dev, + bool joining); #else @@ -271,6 +275,12 @@ static inline int switchdev_port_fdb_dump(struct sk_buff *skb, return -EOPNOTSUPP; } +static inline void switchdev_port_fwd_mark_set(struct net_device *dev, + struct net_device *group_dev, + bool joining) +{ +} + #endif #endif /* _LINUX_SWITCHDEV_H_ */ diff --git a/include/net/tc_act/tc_bpf.h b/include/net/tc_act/tc_bpf.h index a152e9858..958d69cfb 100644 --- a/include/net/tc_act/tc_bpf.h +++ b/include/net/tc_act/tc_bpf.h @@ -15,7 +15,7 @@ struct tcf_bpf { struct tcf_common common; - struct bpf_prog *filter; + struct bpf_prog __rcu *filter; union { u32 bpf_fd; u16 bpf_num_ops; diff --git a/include/net/tc_act/tc_gact.h b/include/net/tc_act/tc_gact.h index 9fc9b5789..592a6bc02 100644 --- a/include/net/tc_act/tc_gact.h +++ b/include/net/tc_act/tc_gact.h @@ -6,9 +6,10 @@ struct tcf_gact { struct tcf_common common; #ifdef CONFIG_GACT_PROB - u16 tcfg_ptype; - u16 tcfg_pval; - int tcfg_paction; + u16 tcfg_ptype; + u16 tcfg_pval; + int tcfg_paction; + atomic_t packets; #endif }; #define to_gact(a) \ diff --git a/include/net/tc_act/tc_mirred.h b/include/net/tc_act/tc_mirred.h index 4dd77a1c1..dae96bae1 100644 --- a/include/net/tc_act/tc_mirred.h +++ b/include/net/tc_act/tc_mirred.h @@ -8,7 +8,7 @@ struct tcf_mirred { int tcfm_eaction; int tcfm_ifindex; int tcfm_ok_push; - struct net_device *tcfm_dev; + struct net_device __rcu *tcfm_dev; struct list_head tcfm_list; }; #define to_mirred(a) \ diff --git a/include/net/tcp.h b/include/net/tcp.h index 9b73d7ee5..cfec17eb0 100644 --- a/include/net/tcp.h +++ b/include/net/tcp.h @@ -281,6 +281,8 @@ extern unsigned int sysctl_tcp_notsent_lowat; extern int sysctl_tcp_min_tso_segs; extern int sysctl_tcp_autocorking; extern int sysctl_tcp_invalid_ratelimit; +extern int sysctl_tcp_pacing_ss_ratio; +extern int sysctl_tcp_pacing_ca_ratio; extern atomic_long_t tcp_memory_allocated; extern struct percpu_counter tcp_sockets_allocated; @@ -892,7 +894,7 @@ void tcp_reno_cong_avoid(struct sock *sk, u32 ack, u32 acked); extern struct tcp_congestion_ops tcp_reno; struct tcp_congestion_ops *tcp_ca_find_key(u32 key); -u32 tcp_ca_get_key_by_name(const char *name); +u32 tcp_ca_get_key_by_name(const char *name, bool *ecn_ca); #ifdef CONFIG_INET char *tcp_ca_get_name_by_key(u32 key, char *buffer); #else @@ -995,6 +997,11 @@ static inline unsigned int tcp_packets_in_flight(const struct tcp_sock *tp) #define TCP_INFINITE_SSTHRESH 0x7fffffff +static inline bool tcp_in_slow_start(const struct tcp_sock *tp) +{ + return tp->snd_cwnd < tp->snd_ssthresh; +} + static inline bool tcp_in_initial_slowstart(const struct tcp_sock *tp) { return tp->snd_ssthresh >= TCP_INFINITE_SSTHRESH; @@ -1071,7 +1078,7 @@ static inline bool tcp_is_cwnd_limited(const struct sock *sk) const struct tcp_sock *tp = tcp_sk(sk); /* If in slow start, ensure cwnd grows to twice what was ACKed. */ - if (tp->snd_cwnd <= tp->snd_ssthresh) + if (tcp_in_slow_start(tp)) return tp->snd_cwnd < 2 * tp->max_packets_out; return tp->is_cwnd_limited; @@ -1166,6 +1173,19 @@ static inline void tcp_sack_reset(struct tcp_options_received *rx_opt) } u32 tcp_default_init_rwnd(u32 mss); +void tcp_cwnd_restart(struct sock *sk, s32 delta); + +static inline void tcp_slow_start_after_idle_check(struct sock *sk) +{ + struct tcp_sock *tp = tcp_sk(sk); + s32 delta; + + if (!sysctl_tcp_slow_start_after_idle || tp->packets_out) + return; + delta = tcp_time_stamp - tp->lsndtime; + if (delta > inet_csk(sk)->icsk_rto) + tcp_cwnd_restart(sk, delta); +} /* Determine a window scaling and initial window to offer. */ void tcp_select_initial_window(int __space, __u32 mss, __u32 *rcv_wnd, diff --git a/include/net/timewait_sock.h b/include/net/timewait_sock.h index 68f0ecad6..1a47946f9 100644 --- a/include/net/timewait_sock.h +++ b/include/net/timewait_sock.h @@ -33,9 +33,6 @@ static inline int twsk_unique(struct sock *sk, struct sock *sktw, void *twp) static inline void twsk_destructor(struct sock *sk) { - BUG_ON(sk == NULL); - BUG_ON(sk->sk_prot == NULL); - BUG_ON(sk->sk_prot->twsk_prot == NULL); if (sk->sk_prot->twsk_prot->twsk_destructor != NULL) sk->sk_prot->twsk_prot->twsk_destructor(sk); } diff --git a/include/net/udp_tunnel.h b/include/net/udp_tunnel.h index c491c1221..cb2f89f20 100644 --- a/include/net/udp_tunnel.h +++ b/include/net/udp_tunnel.h @@ -31,7 +31,8 @@ struct udp_port_cfg { __be16 peer_udp_port; unsigned int use_udp_checksums:1, use_udp6_tx_checksums:1, - use_udp6_rx_checksums:1; + use_udp6_rx_checksums:1, + ipv6_v6only:1; }; int udp_sock_create4(struct net *net, struct udp_port_cfg *cfg, @@ -93,6 +94,10 @@ int udp_tunnel6_xmit_skb(struct dst_entry *dst, struct sock *sk, void udp_tunnel_sock_release(struct socket *sock); +struct metadata_dst *udp_tun_rx_dst(struct sk_buff *skb, unsigned short family, + __be16 flags, __be64 tunnel_id, + int md_size); + static inline struct sk_buff *udp_tunnel_handle_offloads(struct sk_buff *skb, bool udp_csum) { diff --git a/include/net/vrf.h b/include/net/vrf.h new file mode 100644 index 000000000..593e6094d --- /dev/null +++ b/include/net/vrf.h @@ -0,0 +1,178 @@ +/* + * include/net/net_vrf.h - adds vrf dev structure definitions + * Copyright (c) 2015 Cumulus Networks + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + */ + +#ifndef __LINUX_NET_VRF_H +#define __LINUX_NET_VRF_H + +struct net_vrf_dev { + struct rcu_head rcu; + int ifindex; /* ifindex of master dev */ + u32 tb_id; /* table id for VRF */ +}; + +struct slave { + struct list_head list; + struct net_device *dev; +}; + +struct slave_queue { + struct list_head all_slaves; +}; + +struct net_vrf { + struct slave_queue queue; + struct rtable *rth; + u32 tb_id; +}; + + +#if IS_ENABLED(CONFIG_NET_VRF) +/* called with rcu_read_lock() */ +static inline int vrf_master_ifindex_rcu(const struct net_device *dev) +{ + struct net_vrf_dev *vrf_ptr; + int ifindex = 0; + + if (!dev) + return 0; + + if (netif_is_vrf(dev)) { + ifindex = dev->ifindex; + } else { + vrf_ptr = rcu_dereference(dev->vrf_ptr); + if (vrf_ptr) + ifindex = vrf_ptr->ifindex; + } + + return ifindex; +} + +static inline int vrf_master_ifindex(const struct net_device *dev) +{ + int ifindex; + + rcu_read_lock(); + ifindex = vrf_master_ifindex_rcu(dev); + rcu_read_unlock(); + + return ifindex; +} + +/* called with rcu_read_lock */ +static inline u32 vrf_dev_table_rcu(const struct net_device *dev) +{ + u32 tb_id = 0; + + if (dev) { + struct net_vrf_dev *vrf_ptr; + + vrf_ptr = rcu_dereference(dev->vrf_ptr); + if (vrf_ptr) + tb_id = vrf_ptr->tb_id; + } + return tb_id; +} + +static inline u32 vrf_dev_table(const struct net_device *dev) +{ + u32 tb_id; + + rcu_read_lock(); + tb_id = vrf_dev_table_rcu(dev); + rcu_read_unlock(); + + return tb_id; +} + +static inline u32 vrf_dev_table_ifindex(struct net *net, int ifindex) +{ + struct net_device *dev; + u32 tb_id = 0; + + if (!ifindex) + return 0; + + rcu_read_lock(); + + dev = dev_get_by_index_rcu(net, ifindex); + if (dev) + tb_id = vrf_dev_table_rcu(dev); + + rcu_read_unlock(); + + return tb_id; +} + +/* called with rtnl */ +static inline u32 vrf_dev_table_rtnl(const struct net_device *dev) +{ + u32 tb_id = 0; + + if (dev) { + struct net_vrf_dev *vrf_ptr; + + vrf_ptr = rtnl_dereference(dev->vrf_ptr); + if (vrf_ptr) + tb_id = vrf_ptr->tb_id; + } + return tb_id; +} + +/* caller has already checked netif_is_vrf(dev) */ +static inline struct rtable *vrf_dev_get_rth(const struct net_device *dev) +{ + struct rtable *rth = ERR_PTR(-ENETUNREACH); + struct net_vrf *vrf = netdev_priv(dev); + + if (vrf) { + rth = vrf->rth; + atomic_inc(&rth->dst.__refcnt); + } + return rth; +} + +#else +static inline int vrf_master_ifindex_rcu(const struct net_device *dev) +{ + return 0; +} + +static inline int vrf_master_ifindex(const struct net_device *dev) +{ + return 0; +} + +static inline u32 vrf_dev_table_rcu(const struct net_device *dev) +{ + return 0; +} + +static inline u32 vrf_dev_table(const struct net_device *dev) +{ + return 0; +} + +static inline u32 vrf_dev_table_ifindex(struct net *net, int ifindex) +{ + return 0; +} + +static inline u32 vrf_dev_table_rtnl(const struct net_device *dev) +{ + return 0; +} + +static inline struct rtable *vrf_dev_get_rth(const struct net_device *dev) +{ + return ERR_PTR(-ENETUNREACH); +} +#endif + +#endif /* __LINUX_NET_VRF_H */ diff --git a/include/net/vxlan.h b/include/net/vxlan.h index 0082b5d33..480a319b4 100644 --- a/include/net/vxlan.h +++ b/include/net/vxlan.h @@ -7,6 +7,7 @@ #include <linux/skbuff.h> #include <linux/netdevice.h> #include <linux/udp.h> +#include <net/dst_metadata.h> #define VNI_HASH_BITS 10 #define VNI_HASH_SIZE (1<<VNI_HASH_BITS) @@ -94,20 +95,18 @@ struct vxlanhdr { #define VXLAN_VNI_MASK (VXLAN_VID_MASK << 8) #define VXLAN_HLEN (sizeof(struct udphdr) + sizeof(struct vxlanhdr)) +#define VNI_HASH_BITS 10 +#define VNI_HASH_SIZE (1<<VNI_HASH_BITS) +#define FDB_HASH_BITS 8 +#define FDB_HASH_SIZE (1<<FDB_HASH_BITS) + struct vxlan_metadata { - __be32 vni; u32 gbp; }; -struct vxlan_sock; -typedef void (vxlan_rcv_t)(struct vxlan_sock *vh, struct sk_buff *skb, - struct vxlan_metadata *md); - /* per UDP socket information */ struct vxlan_sock { struct hlist_node hlist; - vxlan_rcv_t *rcv; - void *data; struct work_struct del_work; struct socket *sock; struct rcu_head rcu; @@ -117,6 +116,58 @@ struct vxlan_sock { u32 flags; }; +union vxlan_addr { + struct sockaddr_in sin; + struct sockaddr_in6 sin6; + struct sockaddr sa; +}; + +struct vxlan_rdst { + union vxlan_addr remote_ip; + __be16 remote_port; + u32 remote_vni; + u32 remote_ifindex; + struct list_head list; + struct rcu_head rcu; +}; + +struct vxlan_config { + union vxlan_addr remote_ip; + union vxlan_addr saddr; + u32 vni; + int remote_ifindex; + int mtu; + __be16 dst_port; + __u16 port_min; + __u16 port_max; + __u8 tos; + __u8 ttl; + u32 flags; + unsigned long age_interval; + unsigned int addrmax; + bool no_share; +}; + +/* Pseudo network device */ +struct vxlan_dev { + struct hlist_node hlist; /* vni hash table */ + struct list_head next; /* vxlan's per namespace list */ + struct vxlan_sock *vn_sock; /* listening socket */ + struct net_device *dev; + struct net *net; /* netns for packet i/o */ + struct vxlan_rdst default_dst; /* default destination */ + u32 flags; /* VXLAN_F_* in vxlan.h */ + + struct timer_list age_timer; + spinlock_t hash_lock; + unsigned int addrcnt; + struct gro_cells gro_cells; + + struct vxlan_config cfg; + + struct hlist_head fdb_head[FDB_HASH_SIZE]; +}; + #define VXLAN_F_LEARN 0x01 #define VXLAN_F_PROXY 0x02 #define VXLAN_F_RSC 0x04 @@ -130,6 +181,7 @@ struct vxlan_sock { #define VXLAN_F_REMCSUM_RX 0x400 #define VXLAN_F_GBP 0x800 #define VXLAN_F_REMCSUM_NOPARTIAL 0x1000 +#define VXLAN_F_COLLECT_METADATA 0x2000 /* Flags that are used in the receive path. These flags must match in * order for a socket to be shareable @@ -137,18 +189,16 @@ struct vxlan_sock { #define VXLAN_F_RCV_FLAGS (VXLAN_F_GBP | \ VXLAN_F_UDP_ZERO_CSUM6_RX | \ VXLAN_F_REMCSUM_RX | \ - VXLAN_F_REMCSUM_NOPARTIAL) - -struct vxlan_sock *vxlan_sock_add(struct net *net, __be16 port, - vxlan_rcv_t *rcv, void *data, - bool no_share, u32 flags); + VXLAN_F_REMCSUM_NOPARTIAL | \ + VXLAN_F_COLLECT_METADATA) -void vxlan_sock_release(struct vxlan_sock *vs); +struct net_device *vxlan_dev_create(struct net *net, const char *name, + u8 name_assign_type, struct vxlan_config *conf); -int vxlan_xmit_skb(struct rtable *rt, struct sock *sk, struct sk_buff *skb, - __be32 src, __be32 dst, __u8 tos, __u8 ttl, __be16 df, - __be16 src_port, __be16 dst_port, struct vxlan_metadata *md, - bool xnet, u32 vxflags); +static inline __be16 vxlan_dev_dst_port(struct vxlan_dev *vxlan) +{ + return inet_sk(vxlan->vn_sock->sock->sk)->inet_sport; +} static inline netdev_features_t vxlan_features_check(struct sk_buff *skb, netdev_features_t features) @@ -191,4 +241,10 @@ static inline void vxlan_get_rx_port(struct net_device *netdev) { } #endif + +static inline unsigned short vxlan_get_sk_family(struct vxlan_sock *vs) +{ + return vs->sock->sk->sk_family; +} + #endif diff --git a/include/net/xfrm.h b/include/net/xfrm.h index f0ee97eec..312e3fee9 100644 --- a/include/net/xfrm.h +++ b/include/net/xfrm.h @@ -285,10 +285,13 @@ struct xfrm_policy_afinfo { unsigned short family; struct dst_ops *dst_ops; void (*garbage_collect)(struct net *net); - struct dst_entry *(*dst_lookup)(struct net *net, int tos, + struct dst_entry *(*dst_lookup)(struct net *net, + int tos, int oif, const xfrm_address_t *saddr, const xfrm_address_t *daddr); - int (*get_saddr)(struct net *net, xfrm_address_t *saddr, xfrm_address_t *daddr); + int (*get_saddr)(struct net *net, int oif, + xfrm_address_t *saddr, + xfrm_address_t *daddr); void (*decode_session)(struct sk_buff *skb, struct flowi *fl, int reverse); |