summaryrefslogtreecommitdiff
path: root/net/core
diff options
context:
space:
mode:
Diffstat (limited to 'net/core')
-rw-r--r--net/core/Makefile1
-rw-r--r--net/core/dev.c90
-rw-r--r--net/core/dst.c112
-rw-r--r--net/core/fib_rules.c34
-rw-r--r--net/core/filter.c157
-rw-r--r--net/core/flow_dissector.c284
-rw-r--r--net/core/lwtunnel.c249
-rw-r--r--net/core/neighbour.c14
-rw-r--r--net/core/net-sysfs.c43
-rw-r--r--net/core/net-traces.c1
-rw-r--r--net/core/netpoll.c12
-rw-r--r--net/core/pktgen.c5
-rw-r--r--net/core/rtnetlink.c48
-rw-r--r--net/core/skbuff.c4
-rw-r--r--net/core/sock.c14
-rw-r--r--net/core/timestamping.c6
-rw-r--r--net/core/utils.c17
17 files changed, 940 insertions, 151 deletions
diff --git a/net/core/Makefile b/net/core/Makefile
index fec0856dd..086b01fbe 100644
--- a/net/core/Makefile
+++ b/net/core/Makefile
@@ -23,3 +23,4 @@ obj-$(CONFIG_NETWORK_PHY_TIMESTAMPING) += timestamping.o
obj-$(CONFIG_NET_PTP_CLASSIFY) += ptp_classifier.o
obj-$(CONFIG_CGROUP_NET_PRIO) += netprio_cgroup.o
obj-$(CONFIG_CGROUP_NET_CLASSID) += netclassid_cgroup.o
+obj-$(CONFIG_LWTUNNEL) += lwtunnel.o
diff --git a/net/core/dev.c b/net/core/dev.c
index a8e4dd430..c14748d05 100644
--- a/net/core/dev.c
+++ b/net/core/dev.c
@@ -99,6 +99,7 @@
#include <linux/rtnetlink.h>
#include <linux/stat.h>
#include <net/dst.h>
+#include <net/dst_metadata.h>
#include <net/pkt_sched.h>
#include <net/checksum.h>
#include <net/xfrm.h>
@@ -682,6 +683,32 @@ int dev_get_iflink(const struct net_device *dev)
EXPORT_SYMBOL(dev_get_iflink);
/**
+ * dev_fill_metadata_dst - Retrieve tunnel egress information.
+ * @dev: targeted interface
+ * @skb: The packet.
+ *
+ * For better visibility of tunnel traffic OVS needs to retrieve
+ * egress tunnel information for a packet. Following API allows
+ * user to get this info.
+ */
+int dev_fill_metadata_dst(struct net_device *dev, struct sk_buff *skb)
+{
+ struct ip_tunnel_info *info;
+
+ if (!dev->netdev_ops || !dev->netdev_ops->ndo_fill_metadata_dst)
+ return -EINVAL;
+
+ info = skb_tunnel_info_unclone(skb);
+ if (!info)
+ return -ENOMEM;
+ if (unlikely(!(info->mode & IP_TUNNEL_INFO_TX)))
+ return -EINVAL;
+
+ return dev->netdev_ops->ndo_fill_metadata_dst(dev, skb);
+}
+EXPORT_SYMBOL_GPL(dev_fill_metadata_dst);
+
+/**
* __dev_get_by_name - find a device by its name
* @net: the applicable net namespace
* @name: name to find
@@ -3061,6 +3088,16 @@ static int __dev_queue_xmit(struct sk_buff *skb, void *accel_priv)
else
skb_dst_force(skb);
+#ifdef CONFIG_NET_SWITCHDEV
+ /* Don't forward if offload device already forwarded */
+ if (skb->offload_fwd_mark &&
+ skb->offload_fwd_mark == dev->offload_fwd_mark) {
+ consume_skb(skb);
+ rc = NET_XMIT_SUCCESS;
+ goto out;
+ }
+#endif
+
txq = netdev_pick_tx(dev, skb, accel_priv);
q = rcu_dereference_bh(txq->qdisc);
@@ -3645,15 +3682,15 @@ static inline struct sk_buff *handle_ing(struct sk_buff *skb,
qdisc_skb_cb(skb)->pkt_len = skb->len;
skb->tc_verd = SET_TC_AT(skb->tc_verd, AT_INGRESS);
- qdisc_bstats_update_cpu(cl->q, skb);
+ qdisc_bstats_cpu_update(cl->q, skb);
- switch (tc_classify(skb, cl, &cl_res)) {
+ switch (tc_classify(skb, cl, &cl_res, false)) {
case TC_ACT_OK:
case TC_ACT_RECLASSIFY:
skb->tc_index = TC_H_MIN(cl_res.classid);
break;
case TC_ACT_SHOT:
- qdisc_qstats_drop_cpu(cl->q);
+ qdisc_qstats_cpu_drop(cl->q);
case TC_ACT_STOLEN:
case TC_ACT_QUEUED:
kfree_skb(skb);
@@ -4703,6 +4740,8 @@ void napi_disable(struct napi_struct *n)
while (test_and_set_bit(NAPI_STATE_SCHED, &n->state))
msleep(1);
+ while (test_and_set_bit(NAPI_STATE_NPSVC, &n->state))
+ msleep(1);
hrtimer_cancel(&n->timer);
@@ -4985,7 +5024,7 @@ EXPORT_SYMBOL(netdev_all_upper_get_next_dev_rcu);
* Gets the next netdev_adjacent->private from the dev's lower neighbour
* list, starting from iter position. The caller must hold either hold the
* RTNL lock or its own locking that guarantees that the neighbour lower
- * list will remain unchainged.
+ * list will remain unchanged.
*/
void *netdev_lower_get_next_private(struct net_device *dev,
struct list_head **iter)
@@ -5040,7 +5079,7 @@ EXPORT_SYMBOL(netdev_lower_get_next_private_rcu);
* Gets the next netdev_adjacent from the dev's lower neighbour
* list, starting from iter position. The caller must hold RTNL lock or
* its own locking that guarantees that the neighbour lower
- * list will remain unchainged.
+ * list will remain unchanged.
*/
void *netdev_lower_get_next(struct net_device *dev, struct list_head **iter)
{
@@ -5301,6 +5340,7 @@ static int __netdev_upper_dev_link(struct net_device *dev,
struct net_device *upper_dev, bool master,
void *private)
{
+ struct netdev_notifier_changeupper_info changeupper_info;
struct netdev_adjacent *i, *j, *to_i, *to_j;
int ret = 0;
@@ -5319,6 +5359,10 @@ static int __netdev_upper_dev_link(struct net_device *dev,
if (master && netdev_master_upper_dev_get(dev))
return -EBUSY;
+ changeupper_info.upper_dev = upper_dev;
+ changeupper_info.master = master;
+ changeupper_info.linking = true;
+
ret = __netdev_adjacent_dev_link_neighbour(dev, upper_dev, private,
master);
if (ret)
@@ -5357,7 +5401,8 @@ static int __netdev_upper_dev_link(struct net_device *dev,
goto rollback_lower_mesh;
}
- call_netdevice_notifiers(NETDEV_CHANGEUPPER, dev);
+ call_netdevice_notifiers_info(NETDEV_CHANGEUPPER, dev,
+ &changeupper_info.info);
return 0;
rollback_lower_mesh:
@@ -5452,9 +5497,14 @@ EXPORT_SYMBOL(netdev_master_upper_dev_link_private);
void netdev_upper_dev_unlink(struct net_device *dev,
struct net_device *upper_dev)
{
+ struct netdev_notifier_changeupper_info changeupper_info;
struct netdev_adjacent *i, *j;
ASSERT_RTNL();
+ changeupper_info.upper_dev = upper_dev;
+ changeupper_info.master = netdev_master_upper_dev_get(dev) == upper_dev;
+ changeupper_info.linking = false;
+
__netdev_adjacent_dev_unlink_neighbour(dev, upper_dev);
/* Here is the tricky part. We must remove all dev's lower
@@ -5474,7 +5524,8 @@ void netdev_upper_dev_unlink(struct net_device *dev,
list_for_each_entry(i, &upper_dev->all_adj_list.upper, list)
__netdev_adjacent_dev_unlink(dev, i->dev);
- call_netdevice_notifiers(NETDEV_CHANGEUPPER, dev);
+ call_netdevice_notifiers_info(NETDEV_CHANGEUPPER, dev,
+ &changeupper_info.info);
}
EXPORT_SYMBOL(netdev_upper_dev_unlink);
@@ -6075,6 +6126,26 @@ int dev_get_phys_port_name(struct net_device *dev,
EXPORT_SYMBOL(dev_get_phys_port_name);
/**
+ * dev_change_proto_down - update protocol port state information
+ * @dev: device
+ * @proto_down: new value
+ *
+ * This info can be used by switch drivers to set the phys state of the
+ * port.
+ */
+int dev_change_proto_down(struct net_device *dev, bool proto_down)
+{
+ const struct net_device_ops *ops = dev->netdev_ops;
+
+ if (!ops->ndo_change_proto_down)
+ return -EOPNOTSUPP;
+ if (!netif_device_present(dev))
+ return -ENODEV;
+ return ops->ndo_change_proto_down(dev, proto_down);
+}
+EXPORT_SYMBOL(dev_change_proto_down);
+
+/**
* dev_new_index - allocate an ifindex
* @net: the applicable net namespace
*
@@ -6967,6 +7038,9 @@ struct net_device *alloc_netdev_mqs(int sizeof_priv, const char *name,
dev->priv_flags = IFF_XMIT_DST_RELEASE | IFF_XMIT_DST_RELEASE_PERM;
setup(dev);
+ if (!dev->tx_queue_len)
+ dev->priv_flags |= IFF_NO_QUEUE;
+
dev->num_tx_queues = txqs;
dev->real_num_tx_queues = txqs;
if (netif_alloc_netdev_queues(dev))
@@ -7639,7 +7713,7 @@ static int __init net_dev_init(void)
open_softirq(NET_RX_SOFTIRQ, net_rx_action);
hotcpu_notifier(dev_cpu_callback, 0);
- dst_init();
+ dst_subsys_init();
rc = 0;
out:
return rc;
diff --git a/net/core/dst.c b/net/core/dst.c
index 002144bea..d6a5a0bc7 100644
--- a/net/core/dst.c
+++ b/net/core/dst.c
@@ -20,8 +20,10 @@
#include <net/net_namespace.h>
#include <linux/sched.h>
#include <linux/prefetch.h>
+#include <net/lwtunnel.h>
#include <net/dst.h>
+#include <net/dst_metadata.h>
/*
* Theory of operations:
@@ -158,19 +160,10 @@ const u32 dst_default_metrics[RTAX_MAX + 1] = {
[RTAX_MAX] = 0xdeadbeef,
};
-
-void *dst_alloc(struct dst_ops *ops, struct net_device *dev,
- int initial_ref, int initial_obsolete, unsigned short flags)
+void dst_init(struct dst_entry *dst, struct dst_ops *ops,
+ struct net_device *dev, int initial_ref, int initial_obsolete,
+ unsigned short flags)
{
- struct dst_entry *dst;
-
- if (ops->gc && dst_entries_get_fast(ops) > ops->gc_thresh) {
- if (ops->gc(ops))
- return NULL;
- }
- dst = kmem_cache_alloc(ops->kmem_cachep, GFP_ATOMIC);
- if (!dst)
- return NULL;
dst->child = NULL;
dst->dev = dev;
if (dev)
@@ -192,6 +185,7 @@ void *dst_alloc(struct dst_ops *ops, struct net_device *dev,
#ifdef CONFIG_IP_ROUTE_CLASSID
dst->tclassid = 0;
#endif
+ dst->lwtstate = NULL;
atomic_set(&dst->__refcnt, initial_ref);
dst->__use = 0;
dst->lastuse = jiffies;
@@ -200,6 +194,25 @@ void *dst_alloc(struct dst_ops *ops, struct net_device *dev,
dst->next = NULL;
if (!(flags & DST_NOCOUNT))
dst_entries_add(ops, 1);
+}
+EXPORT_SYMBOL(dst_init);
+
+void *dst_alloc(struct dst_ops *ops, struct net_device *dev,
+ int initial_ref, int initial_obsolete, unsigned short flags)
+{
+ struct dst_entry *dst;
+
+ if (ops->gc && dst_entries_get_fast(ops) > ops->gc_thresh) {
+ if (ops->gc(ops))
+ return NULL;
+ }
+
+ dst = kmem_cache_alloc(ops->kmem_cachep, GFP_ATOMIC);
+ if (!dst)
+ return NULL;
+
+ dst_init(dst, ops, dev, initial_ref, initial_obsolete, flags);
+
return dst;
}
EXPORT_SYMBOL(dst_alloc);
@@ -248,7 +261,13 @@ again:
dst->ops->destroy(dst);
if (dst->dev)
dev_put(dst->dev);
- kmem_cache_free(dst->ops->kmem_cachep, dst);
+
+ lwtstate_put(dst->lwtstate);
+
+ if (dst->flags & DST_METADATA)
+ kfree(dst);
+ else
+ kmem_cache_free(dst->ops->kmem_cachep, dst);
dst = child;
if (dst) {
@@ -287,7 +306,7 @@ void dst_release(struct dst_entry *dst)
if (unlikely(newrefcnt < 0))
net_warn_ratelimited("%s: dst:%p refcnt:%d\n",
__func__, dst, newrefcnt);
- if (unlikely(dst->flags & DST_NOCACHE) && !newrefcnt)
+ if (!newrefcnt && unlikely(dst->flags & DST_NOCACHE))
call_rcu(&dst->rcu_head, dst_destroy_rcu);
}
}
@@ -329,6 +348,69 @@ void __dst_destroy_metrics_generic(struct dst_entry *dst, unsigned long old)
}
EXPORT_SYMBOL(__dst_destroy_metrics_generic);
+static struct dst_ops md_dst_ops = {
+ .family = AF_UNSPEC,
+};
+
+static int dst_md_discard_sk(struct sock *sk, struct sk_buff *skb)
+{
+ WARN_ONCE(1, "Attempting to call output on metadata dst\n");
+ kfree_skb(skb);
+ return 0;
+}
+
+static int dst_md_discard(struct sk_buff *skb)
+{
+ WARN_ONCE(1, "Attempting to call input on metadata dst\n");
+ kfree_skb(skb);
+ return 0;
+}
+
+static void __metadata_dst_init(struct metadata_dst *md_dst, u8 optslen)
+{
+ struct dst_entry *dst;
+
+ dst = &md_dst->dst;
+ dst_init(dst, &md_dst_ops, NULL, 1, DST_OBSOLETE_NONE,
+ DST_METADATA | DST_NOCACHE | DST_NOCOUNT);
+
+ dst->input = dst_md_discard;
+ dst->output = dst_md_discard_sk;
+
+ memset(dst + 1, 0, sizeof(*md_dst) + optslen - sizeof(*dst));
+}
+
+struct metadata_dst *metadata_dst_alloc(u8 optslen, gfp_t flags)
+{
+ struct metadata_dst *md_dst;
+
+ md_dst = kmalloc(sizeof(*md_dst) + optslen, flags);
+ if (!md_dst)
+ return NULL;
+
+ __metadata_dst_init(md_dst, optslen);
+
+ return md_dst;
+}
+EXPORT_SYMBOL_GPL(metadata_dst_alloc);
+
+struct metadata_dst __percpu *metadata_dst_alloc_percpu(u8 optslen, gfp_t flags)
+{
+ int cpu;
+ struct metadata_dst __percpu *md_dst;
+
+ md_dst = __alloc_percpu_gfp(sizeof(struct metadata_dst) + optslen,
+ __alignof__(struct metadata_dst), flags);
+ if (!md_dst)
+ return NULL;
+
+ for_each_possible_cpu(cpu)
+ __metadata_dst_init(per_cpu_ptr(md_dst, cpu), optslen);
+
+ return md_dst;
+}
+EXPORT_SYMBOL_GPL(metadata_dst_alloc_percpu);
+
/* Dirty hack. We did it in 2.2 (in __dst_free),
* we have _very_ good reasons not to repeat
* this mistake in 2.3, but we have no choice
@@ -393,7 +475,7 @@ static struct notifier_block dst_dev_notifier = {
.priority = -10, /* must be called after other network notifiers */
};
-void __init dst_init(void)
+void __init dst_subsys_init(void)
{
register_netdevice_notifier(&dst_dev_notifier);
}
diff --git a/net/core/fib_rules.c b/net/core/fib_rules.c
index 0ad144fb0..365de6643 100644
--- a/net/core/fib_rules.c
+++ b/net/core/fib_rules.c
@@ -16,6 +16,7 @@
#include <net/net_namespace.h>
#include <net/sock.h>
#include <net/fib_rules.h>
+#include <net/ip_tunnels.h>
int fib_default_rule_add(struct fib_rules_ops *ops,
u32 pref, u32 table, u32 flags)
@@ -43,7 +44,7 @@ int fib_default_rule_add(struct fib_rules_ops *ops,
}
EXPORT_SYMBOL(fib_default_rule_add);
-u32 fib_default_rule_pref(struct fib_rules_ops *ops)
+static u32 fib_default_rule_pref(struct fib_rules_ops *ops)
{
struct list_head *pos;
struct fib_rule *rule;
@@ -59,7 +60,6 @@ u32 fib_default_rule_pref(struct fib_rules_ops *ops)
return 0;
}
-EXPORT_SYMBOL(fib_default_rule_pref);
static void notify_rule_change(int event, struct fib_rule *rule,
struct fib_rules_ops *ops, struct nlmsghdr *nlh,
@@ -186,6 +186,9 @@ static int fib_rule_match(struct fib_rule *rule, struct fib_rules_ops *ops,
if ((rule->mark ^ fl->flowi_mark) & rule->mark_mask)
goto out;
+ if (rule->tun_id && (rule->tun_id != fl->flowi_tun_key.tun_id))
+ goto out;
+
ret = ops->match(rule, fl, flags);
out:
return (rule->flags & FIB_RULE_INVERT) ? !ret : ret;
@@ -295,8 +298,8 @@ static int fib_nl_newrule(struct sk_buff *skb, struct nlmsghdr* nlh)
}
rule->fr_net = net;
- if (tb[FRA_PRIORITY])
- rule->pref = nla_get_u32(tb[FRA_PRIORITY]);
+ rule->pref = tb[FRA_PRIORITY] ? nla_get_u32(tb[FRA_PRIORITY])
+ : fib_default_rule_pref(ops);
if (tb[FRA_IIFNAME]) {
struct net_device *dev;
@@ -330,6 +333,9 @@ static int fib_nl_newrule(struct sk_buff *skb, struct nlmsghdr* nlh)
if (tb[FRA_FWMASK])
rule->mark_mask = nla_get_u32(tb[FRA_FWMASK]);
+ if (tb[FRA_TUN_ID])
+ rule->tun_id = nla_get_be64(tb[FRA_TUN_ID]);
+
rule->action = frh->action;
rule->flags = frh->flags;
rule->table = frh_get_table(frh, tb);
@@ -343,9 +349,6 @@ static int fib_nl_newrule(struct sk_buff *skb, struct nlmsghdr* nlh)
else
rule->suppress_ifgroup = -1;
- if (!tb[FRA_PRIORITY] && ops->default_pref)
- rule->pref = ops->default_pref(ops);
-
err = -EINVAL;
if (tb[FRA_GOTO]) {
if (rule->action != FR_ACT_GOTO)
@@ -407,6 +410,9 @@ static int fib_nl_newrule(struct sk_buff *skb, struct nlmsghdr* nlh)
if (unresolved)
ops->unresolved_rules++;
+ if (rule->tun_id)
+ ip_tunnel_need_metadata();
+
notify_rule_change(RTM_NEWRULE, rule, ops, nlh, NETLINK_CB(skb).portid);
flush_route_cache(ops);
rules_ops_put(ops);
@@ -473,6 +479,10 @@ static int fib_nl_delrule(struct sk_buff *skb, struct nlmsghdr* nlh)
(rule->mark_mask != nla_get_u32(tb[FRA_FWMASK])))
continue;
+ if (tb[FRA_TUN_ID] &&
+ (rule->tun_id != nla_get_be64(tb[FRA_TUN_ID])))
+ continue;
+
if (!ops->compare(rule, frh, tb))
continue;
@@ -487,6 +497,9 @@ static int fib_nl_delrule(struct sk_buff *skb, struct nlmsghdr* nlh)
goto errout;
}
+ if (rule->tun_id)
+ ip_tunnel_unneed_metadata();
+
list_del_rcu(&rule->list);
if (rule->action == FR_ACT_GOTO) {
@@ -535,7 +548,8 @@ static inline size_t fib_rule_nlmsg_size(struct fib_rules_ops *ops,
+ nla_total_size(4) /* FRA_SUPPRESS_PREFIXLEN */
+ nla_total_size(4) /* FRA_SUPPRESS_IFGROUP */
+ nla_total_size(4) /* FRA_FWMARK */
- + nla_total_size(4); /* FRA_FWMASK */
+ + nla_total_size(4) /* FRA_FWMASK */
+ + nla_total_size(8); /* FRA_TUN_ID */
if (ops->nlmsg_payload)
payload += ops->nlmsg_payload(rule);
@@ -591,7 +605,9 @@ static int fib_nl_fill_rule(struct sk_buff *skb, struct fib_rule *rule,
((rule->mark_mask || rule->mark) &&
nla_put_u32(skb, FRA_FWMASK, rule->mark_mask)) ||
(rule->target &&
- nla_put_u32(skb, FRA_GOTO, rule->target)))
+ nla_put_u32(skb, FRA_GOTO, rule->target)) ||
+ (rule->tun_id &&
+ nla_put_be64(skb, FRA_TUN_ID, rule->tun_id)))
goto nla_put_failure;
if (rule->suppress_ifgroup != -1) {
diff --git a/net/core/filter.c b/net/core/filter.c
index 8dcdd86b6..bb18c3680 100644
--- a/net/core/filter.c
+++ b/net/core/filter.c
@@ -47,6 +47,8 @@
#include <linux/if_vlan.h>
#include <linux/bpf.h>
#include <net/sch_generic.h>
+#include <net/cls_cgroup.h>
+#include <net/dst_metadata.h>
/**
* sk_filter - run a packet through a socket filter
@@ -476,9 +478,9 @@ do_pass:
bpf_src = BPF_X;
} else {
insn->dst_reg = BPF_REG_A;
- insn->src_reg = BPF_REG_X;
insn->imm = fp->k;
bpf_src = BPF_SRC(fp->code);
+ insn->src_reg = bpf_src == BPF_X ? BPF_REG_X : 0;
}
/* Common case where 'jump_false' is next insn. */
@@ -1122,6 +1124,7 @@ int bpf_prog_create_from_user(struct bpf_prog **pfp, struct sock_fprog *fprog,
*pfp = fp;
return 0;
}
+EXPORT_SYMBOL_GPL(bpf_prog_create_from_user);
void bpf_prog_destroy(struct bpf_prog *fp)
{
@@ -1346,7 +1349,7 @@ const struct bpf_func_proto bpf_l3_csum_replace_proto = {
static u64 bpf_l4_csum_replace(u64 r1, u64 r2, u64 from, u64 to, u64 flags)
{
struct sk_buff *skb = (struct sk_buff *) (long) r1;
- u32 is_pseudo = BPF_IS_PSEUDO_HEADER(flags);
+ bool is_pseudo = !!BPF_IS_PSEUDO_HEADER(flags);
int offset = (int) r2;
__sum16 sum, *ptr;
@@ -1425,6 +1428,139 @@ const struct bpf_func_proto bpf_clone_redirect_proto = {
.arg3_type = ARG_ANYTHING,
};
+static u64 bpf_get_cgroup_classid(u64 r1, u64 r2, u64 r3, u64 r4, u64 r5)
+{
+ return task_get_classid((struct sk_buff *) (unsigned long) r1);
+}
+
+static const struct bpf_func_proto bpf_get_cgroup_classid_proto = {
+ .func = bpf_get_cgroup_classid,
+ .gpl_only = false,
+ .ret_type = RET_INTEGER,
+ .arg1_type = ARG_PTR_TO_CTX,
+};
+
+static u64 bpf_skb_vlan_push(u64 r1, u64 r2, u64 vlan_tci, u64 r4, u64 r5)
+{
+ struct sk_buff *skb = (struct sk_buff *) (long) r1;
+ __be16 vlan_proto = (__force __be16) r2;
+
+ if (unlikely(vlan_proto != htons(ETH_P_8021Q) &&
+ vlan_proto != htons(ETH_P_8021AD)))
+ vlan_proto = htons(ETH_P_8021Q);
+
+ return skb_vlan_push(skb, vlan_proto, vlan_tci);
+}
+
+const struct bpf_func_proto bpf_skb_vlan_push_proto = {
+ .func = bpf_skb_vlan_push,
+ .gpl_only = false,
+ .ret_type = RET_INTEGER,
+ .arg1_type = ARG_PTR_TO_CTX,
+ .arg2_type = ARG_ANYTHING,
+ .arg3_type = ARG_ANYTHING,
+};
+EXPORT_SYMBOL_GPL(bpf_skb_vlan_push_proto);
+
+static u64 bpf_skb_vlan_pop(u64 r1, u64 r2, u64 r3, u64 r4, u64 r5)
+{
+ struct sk_buff *skb = (struct sk_buff *) (long) r1;
+
+ return skb_vlan_pop(skb);
+}
+
+const struct bpf_func_proto bpf_skb_vlan_pop_proto = {
+ .func = bpf_skb_vlan_pop,
+ .gpl_only = false,
+ .ret_type = RET_INTEGER,
+ .arg1_type = ARG_PTR_TO_CTX,
+};
+EXPORT_SYMBOL_GPL(bpf_skb_vlan_pop_proto);
+
+bool bpf_helper_changes_skb_data(void *func)
+{
+ if (func == bpf_skb_vlan_push)
+ return true;
+ if (func == bpf_skb_vlan_pop)
+ return true;
+ return false;
+}
+
+static u64 bpf_skb_get_tunnel_key(u64 r1, u64 r2, u64 size, u64 flags, u64 r5)
+{
+ struct sk_buff *skb = (struct sk_buff *) (long) r1;
+ struct bpf_tunnel_key *to = (struct bpf_tunnel_key *) (long) r2;
+ struct ip_tunnel_info *info = skb_tunnel_info(skb);
+
+ if (unlikely(size != sizeof(struct bpf_tunnel_key) || flags || !info))
+ return -EINVAL;
+ if (ip_tunnel_info_af(info) != AF_INET)
+ return -EINVAL;
+
+ to->tunnel_id = be64_to_cpu(info->key.tun_id);
+ to->remote_ipv4 = be32_to_cpu(info->key.u.ipv4.src);
+
+ return 0;
+}
+
+const struct bpf_func_proto bpf_skb_get_tunnel_key_proto = {
+ .func = bpf_skb_get_tunnel_key,
+ .gpl_only = false,
+ .ret_type = RET_INTEGER,
+ .arg1_type = ARG_PTR_TO_CTX,
+ .arg2_type = ARG_PTR_TO_STACK,
+ .arg3_type = ARG_CONST_STACK_SIZE,
+ .arg4_type = ARG_ANYTHING,
+};
+
+static struct metadata_dst __percpu *md_dst;
+
+static u64 bpf_skb_set_tunnel_key(u64 r1, u64 r2, u64 size, u64 flags, u64 r5)
+{
+ struct sk_buff *skb = (struct sk_buff *) (long) r1;
+ struct bpf_tunnel_key *from = (struct bpf_tunnel_key *) (long) r2;
+ struct metadata_dst *md = this_cpu_ptr(md_dst);
+ struct ip_tunnel_info *info;
+
+ if (unlikely(size != sizeof(struct bpf_tunnel_key) || flags))
+ return -EINVAL;
+
+ skb_dst_drop(skb);
+ dst_hold((struct dst_entry *) md);
+ skb_dst_set(skb, (struct dst_entry *) md);
+
+ info = &md->u.tun_info;
+ info->mode = IP_TUNNEL_INFO_TX;
+ info->key.tun_flags = TUNNEL_KEY;
+ info->key.tun_id = cpu_to_be64(from->tunnel_id);
+ info->key.u.ipv4.dst = cpu_to_be32(from->remote_ipv4);
+
+ return 0;
+}
+
+const struct bpf_func_proto bpf_skb_set_tunnel_key_proto = {
+ .func = bpf_skb_set_tunnel_key,
+ .gpl_only = false,
+ .ret_type = RET_INTEGER,
+ .arg1_type = ARG_PTR_TO_CTX,
+ .arg2_type = ARG_PTR_TO_STACK,
+ .arg3_type = ARG_CONST_STACK_SIZE,
+ .arg4_type = ARG_ANYTHING,
+};
+
+static const struct bpf_func_proto *bpf_get_skb_set_tunnel_key_proto(void)
+{
+ if (!md_dst) {
+ /* race is not possible, since it's called from
+ * verifier that is holding verifier mutex
+ */
+ md_dst = metadata_dst_alloc_percpu(0, GFP_KERNEL);
+ if (!md_dst)
+ return NULL;
+ }
+ return &bpf_skb_set_tunnel_key_proto;
+}
+
static const struct bpf_func_proto *
sk_filter_func_proto(enum bpf_func_id func_id)
{
@@ -1462,6 +1598,16 @@ tc_cls_act_func_proto(enum bpf_func_id func_id)
return &bpf_l4_csum_replace_proto;
case BPF_FUNC_clone_redirect:
return &bpf_clone_redirect_proto;
+ case BPF_FUNC_get_cgroup_classid:
+ return &bpf_get_cgroup_classid_proto;
+ case BPF_FUNC_skb_vlan_push:
+ return &bpf_skb_vlan_push_proto;
+ case BPF_FUNC_skb_vlan_pop:
+ return &bpf_skb_vlan_pop_proto;
+ case BPF_FUNC_skb_get_tunnel_key:
+ return &bpf_skb_get_tunnel_key_proto;
+ case BPF_FUNC_skb_set_tunnel_key:
+ return bpf_get_skb_set_tunnel_key_proto();
default:
return sk_filter_func_proto(func_id);
}
@@ -1570,6 +1716,13 @@ static u32 bpf_net_convert_ctx_access(enum bpf_access_type type, int dst_reg,
offsetof(struct net_device, ifindex));
break;
+ case offsetof(struct __sk_buff, hash):
+ BUILD_BUG_ON(FIELD_SIZEOF(struct sk_buff, hash) != 4);
+
+ *insn++ = BPF_LDX_MEM(BPF_W, dst_reg, src_reg,
+ offsetof(struct sk_buff, hash));
+ break;
+
case offsetof(struct __sk_buff, mark):
BUILD_BUG_ON(FIELD_SIZEOF(struct sk_buff, mark) != 4);
diff --git a/net/core/flow_dissector.c b/net/core/flow_dissector.c
index 2a834c617..d79699c9d 100644
--- a/net/core/flow_dissector.c
+++ b/net/core/flow_dissector.c
@@ -19,14 +19,14 @@
#include <net/flow_dissector.h>
#include <scsi/fc/fc_fcoe.h>
-static bool skb_flow_dissector_uses_key(struct flow_dissector *flow_dissector,
- enum flow_dissector_key_id key_id)
+static bool dissector_uses_key(const struct flow_dissector *flow_dissector,
+ enum flow_dissector_key_id key_id)
{
return flow_dissector->used_keys & (1 << key_id);
}
-static void skb_flow_dissector_set_key(struct flow_dissector *flow_dissector,
- enum flow_dissector_key_id key_id)
+static void dissector_set_key(struct flow_dissector *flow_dissector,
+ enum flow_dissector_key_id key_id)
{
flow_dissector->used_keys |= (1 << key_id);
}
@@ -51,20 +51,20 @@ void skb_flow_dissector_init(struct flow_dissector *flow_dissector,
* boundaries of unsigned short.
*/
BUG_ON(key->offset > USHRT_MAX);
- BUG_ON(skb_flow_dissector_uses_key(flow_dissector,
- key->key_id));
+ BUG_ON(dissector_uses_key(flow_dissector,
+ key->key_id));
- skb_flow_dissector_set_key(flow_dissector, key->key_id);
+ dissector_set_key(flow_dissector, key->key_id);
flow_dissector->offset[key->key_id] = key->offset;
}
/* Ensure that the dissector always includes control and basic key.
* That way we are able to avoid handling lack of these in fast path.
*/
- BUG_ON(!skb_flow_dissector_uses_key(flow_dissector,
- FLOW_DISSECTOR_KEY_CONTROL));
- BUG_ON(!skb_flow_dissector_uses_key(flow_dissector,
- FLOW_DISSECTOR_KEY_BASIC));
+ BUG_ON(!dissector_uses_key(flow_dissector,
+ FLOW_DISSECTOR_KEY_CONTROL));
+ BUG_ON(!dissector_uses_key(flow_dissector,
+ FLOW_DISSECTOR_KEY_BASIC));
}
EXPORT_SYMBOL(skb_flow_dissector_init);
@@ -121,7 +121,8 @@ EXPORT_SYMBOL(__skb_flow_get_ports);
bool __skb_flow_dissect(const struct sk_buff *skb,
struct flow_dissector *flow_dissector,
void *target_container,
- void *data, __be16 proto, int nhoff, int hlen)
+ void *data, __be16 proto, int nhoff, int hlen,
+ unsigned int flags)
{
struct flow_dissector_key_control *key_control;
struct flow_dissector_key_basic *key_basic;
@@ -130,6 +131,7 @@ bool __skb_flow_dissect(const struct sk_buff *skb,
struct flow_dissector_key_tags *key_tags;
struct flow_dissector_key_keyid *key_keyid;
u8 ip_proto = 0;
+ bool ret = false;
if (!data) {
data = skb->data;
@@ -152,8 +154,8 @@ bool __skb_flow_dissect(const struct sk_buff *skb,
FLOW_DISSECTOR_KEY_BASIC,
target_container);
- if (skb_flow_dissector_uses_key(flow_dissector,
- FLOW_DISSECTOR_KEY_ETH_ADDRS)) {
+ if (dissector_uses_key(flow_dissector,
+ FLOW_DISSECTOR_KEY_ETH_ADDRS)) {
struct ethhdr *eth = eth_hdr(skb);
struct flow_dissector_key_eth_addrs *key_eth_addrs;
@@ -171,15 +173,13 @@ again:
ip:
iph = __skb_header_pointer(skb, nhoff, sizeof(_iph), data, hlen, &_iph);
if (!iph || iph->ihl < 5)
- return false;
+ goto out_bad;
nhoff += iph->ihl * 4;
ip_proto = iph->protocol;
- if (ip_is_fragment(iph))
- ip_proto = 0;
- if (!skb_flow_dissector_uses_key(flow_dissector,
- FLOW_DISSECTOR_KEY_IPV4_ADDRS))
+ if (!dissector_uses_key(flow_dissector,
+ FLOW_DISSECTOR_KEY_IPV4_ADDRS))
break;
key_addrs = skb_flow_dissector_target(flow_dissector,
@@ -187,6 +187,22 @@ ip:
memcpy(&key_addrs->v4addrs, &iph->saddr,
sizeof(key_addrs->v4addrs));
key_control->addr_type = FLOW_DISSECTOR_KEY_IPV4_ADDRS;
+
+ if (ip_is_fragment(iph)) {
+ key_control->flags |= FLOW_DIS_IS_FRAGMENT;
+
+ if (iph->frag_off & htons(IP_OFFSET)) {
+ goto out_good;
+ } else {
+ key_control->flags |= FLOW_DIS_FIRST_FRAG;
+ if (!(flags & FLOW_DISSECTOR_F_PARSE_1ST_FRAG))
+ goto out_good;
+ }
+ }
+
+ if (flags & FLOW_DISSECTOR_F_STOP_AT_L3)
+ goto out_good;
+
break;
}
case htons(ETH_P_IPV6): {
@@ -197,13 +213,13 @@ ip:
ipv6:
iph = __skb_header_pointer(skb, nhoff, sizeof(_iph), data, hlen, &_iph);
if (!iph)
- return false;
+ goto out_bad;
ip_proto = iph->nexthdr;
nhoff += sizeof(struct ipv6hdr);
- if (skb_flow_dissector_uses_key(flow_dissector,
- FLOW_DISSECTOR_KEY_IPV6_ADDRS)) {
+ if (dissector_uses_key(flow_dissector,
+ FLOW_DISSECTOR_KEY_IPV6_ADDRS)) {
struct flow_dissector_key_ipv6_addrs *key_ipv6_addrs;
key_ipv6_addrs = skb_flow_dissector_target(flow_dissector,
@@ -216,15 +232,20 @@ ipv6:
flow_label = ip6_flowlabel(iph);
if (flow_label) {
- if (skb_flow_dissector_uses_key(flow_dissector,
- FLOW_DISSECTOR_KEY_FLOW_LABEL)) {
+ if (dissector_uses_key(flow_dissector,
+ FLOW_DISSECTOR_KEY_FLOW_LABEL)) {
key_tags = skb_flow_dissector_target(flow_dissector,
FLOW_DISSECTOR_KEY_FLOW_LABEL,
target_container);
key_tags->flow_label = ntohl(flow_label);
}
+ if (flags & FLOW_DISSECTOR_F_STOP_AT_FLOW_LABEL)
+ goto out_good;
}
+ if (flags & FLOW_DISSECTOR_F_STOP_AT_L3)
+ goto out_good;
+
break;
}
case htons(ETH_P_8021AD):
@@ -234,10 +255,10 @@ ipv6:
vlan = __skb_header_pointer(skb, nhoff, sizeof(_vlan), data, hlen, &_vlan);
if (!vlan)
- return false;
+ goto out_bad;
- if (skb_flow_dissector_uses_key(flow_dissector,
- FLOW_DISSECTOR_KEY_VLANID)) {
+ if (dissector_uses_key(flow_dissector,
+ FLOW_DISSECTOR_KEY_VLANID)) {
key_tags = skb_flow_dissector_target(flow_dissector,
FLOW_DISSECTOR_KEY_VLANID,
target_container);
@@ -256,7 +277,7 @@ ipv6:
} *hdr, _hdr;
hdr = __skb_header_pointer(skb, nhoff, sizeof(_hdr), data, hlen, &_hdr);
if (!hdr)
- return false;
+ goto out_bad;
proto = hdr->proto;
nhoff += PPPOE_SES_HLEN;
switch (proto) {
@@ -265,7 +286,7 @@ ipv6:
case htons(PPP_IPV6):
goto ipv6;
default:
- return false;
+ goto out_bad;
}
}
case htons(ETH_P_TIPC): {
@@ -275,19 +296,17 @@ ipv6:
} *hdr, _hdr;
hdr = __skb_header_pointer(skb, nhoff, sizeof(_hdr), data, hlen, &_hdr);
if (!hdr)
- return false;
- key_basic->n_proto = proto;
- key_control->thoff = (u16)nhoff;
+ goto out_bad;
- if (skb_flow_dissector_uses_key(flow_dissector,
- FLOW_DISSECTOR_KEY_TIPC_ADDRS)) {
+ if (dissector_uses_key(flow_dissector,
+ FLOW_DISSECTOR_KEY_TIPC_ADDRS)) {
key_addrs = skb_flow_dissector_target(flow_dissector,
FLOW_DISSECTOR_KEY_TIPC_ADDRS,
target_container);
key_addrs->tipcaddrs.srcnode = hdr->srcnode;
key_control->addr_type = FLOW_DISSECTOR_KEY_TIPC_ADDRS;
}
- return true;
+ goto out_good;
}
case htons(ETH_P_MPLS_UC):
@@ -297,12 +316,12 @@ mpls:
hdr = __skb_header_pointer(skb, nhoff, sizeof(_hdr), data,
hlen, &_hdr);
if (!hdr)
- return false;
+ goto out_bad;
if ((ntohl(hdr[0].entry) & MPLS_LS_LABEL_MASK) >>
MPLS_LS_LABEL_SHIFT == MPLS_LABEL_ENTROPY) {
- if (skb_flow_dissector_uses_key(flow_dissector,
- FLOW_DISSECTOR_KEY_MPLS_ENTROPY)) {
+ if (dissector_uses_key(flow_dissector,
+ FLOW_DISSECTOR_KEY_MPLS_ENTROPY)) {
key_keyid = skb_flow_dissector_target(flow_dissector,
FLOW_DISSECTOR_KEY_MPLS_ENTROPY,
target_container);
@@ -310,21 +329,17 @@ mpls:
htonl(MPLS_LS_LABEL_MASK);
}
- key_basic->n_proto = proto;
- key_basic->ip_proto = ip_proto;
- key_control->thoff = (u16)nhoff;
-
- return true;
+ goto out_good;
}
- return true;
+ goto out_good;
}
case htons(ETH_P_FCOE):
key_control->thoff = (u16)(nhoff + FCOE_HEADER_LEN);
/* fall through */
default:
- return false;
+ goto out_bad;
}
ip_proto_again:
@@ -337,7 +352,7 @@ ip_proto_again:
hdr = __skb_header_pointer(skb, nhoff, sizeof(_hdr), data, hlen, &_hdr);
if (!hdr)
- return false;
+ goto out_bad;
/*
* Only look inside GRE if version zero and no
* routing
@@ -357,10 +372,10 @@ ip_proto_again:
data, hlen, &_keyid);
if (!keyid)
- return false;
+ goto out_bad;
- if (skb_flow_dissector_uses_key(flow_dissector,
- FLOW_DISSECTOR_KEY_GRE_KEYID)) {
+ if (dissector_uses_key(flow_dissector,
+ FLOW_DISSECTOR_KEY_GRE_KEYID)) {
key_keyid = skb_flow_dissector_target(flow_dissector,
FLOW_DISSECTOR_KEY_GRE_KEYID,
target_container);
@@ -378,10 +393,15 @@ ip_proto_again:
sizeof(_eth),
data, hlen, &_eth);
if (!eth)
- return false;
+ goto out_bad;
proto = eth->h_proto;
nhoff += sizeof(*eth);
}
+
+ key_control->flags |= FLOW_DIS_ENCAPSULATION;
+ if (flags & FLOW_DISSECTOR_F_STOP_AT_ENCAP)
+ goto out_good;
+
goto again;
}
case NEXTHDR_HOP:
@@ -395,18 +415,53 @@ ip_proto_again:
opthdr = __skb_header_pointer(skb, nhoff, sizeof(_opthdr),
data, hlen, &_opthdr);
if (!opthdr)
- return false;
+ goto out_bad;
ip_proto = opthdr[0];
nhoff += (opthdr[1] + 1) << 3;
goto ip_proto_again;
}
+ case NEXTHDR_FRAGMENT: {
+ struct frag_hdr _fh, *fh;
+
+ if (proto != htons(ETH_P_IPV6))
+ break;
+
+ fh = __skb_header_pointer(skb, nhoff, sizeof(_fh),
+ data, hlen, &_fh);
+
+ if (!fh)
+ goto out_bad;
+
+ key_control->flags |= FLOW_DIS_IS_FRAGMENT;
+
+ nhoff += sizeof(_fh);
+
+ if (!(fh->frag_off & htons(IP6_OFFSET))) {
+ key_control->flags |= FLOW_DIS_FIRST_FRAG;
+ if (flags & FLOW_DISSECTOR_F_PARSE_1ST_FRAG) {
+ ip_proto = fh->nexthdr;
+ goto ip_proto_again;
+ }
+ }
+ goto out_good;
+ }
case IPPROTO_IPIP:
proto = htons(ETH_P_IP);
+
+ key_control->flags |= FLOW_DIS_ENCAPSULATION;
+ if (flags & FLOW_DISSECTOR_F_STOP_AT_ENCAP)
+ goto out_good;
+
goto ip;
case IPPROTO_IPV6:
proto = htons(ETH_P_IPV6);
+
+ key_control->flags |= FLOW_DIS_ENCAPSULATION;
+ if (flags & FLOW_DISSECTOR_F_STOP_AT_ENCAP)
+ goto out_good;
+
goto ipv6;
case IPPROTO_MPLS:
proto = htons(ETH_P_MPLS_UC);
@@ -415,12 +470,8 @@ ip_proto_again:
break;
}
- key_basic->n_proto = proto;
- key_basic->ip_proto = ip_proto;
- key_control->thoff = (u16)nhoff;
-
- if (skb_flow_dissector_uses_key(flow_dissector,
- FLOW_DISSECTOR_KEY_PORTS)) {
+ if (dissector_uses_key(flow_dissector,
+ FLOW_DISSECTOR_KEY_PORTS)) {
key_ports = skb_flow_dissector_target(flow_dissector,
FLOW_DISSECTOR_KEY_PORTS,
target_container);
@@ -428,7 +479,15 @@ ip_proto_again:
data, hlen);
}
- return true;
+out_good:
+ ret = true;
+
+out_bad:
+ key_basic->n_proto = proto;
+ key_basic->ip_proto = ip_proto;
+ key_control->thoff = (u16)nhoff;
+
+ return ret;
}
EXPORT_SYMBOL(__skb_flow_dissect);
@@ -438,18 +497,21 @@ static __always_inline void __flow_hash_secret_init(void)
net_get_random_once(&hashrnd, sizeof(hashrnd));
}
-static __always_inline u32 __flow_hash_words(u32 *words, u32 length, u32 keyval)
+static __always_inline u32 __flow_hash_words(const u32 *words, u32 length,
+ u32 keyval)
{
return jhash2(words, length, keyval);
}
-static inline void *flow_keys_hash_start(struct flow_keys *flow)
+static inline const u32 *flow_keys_hash_start(const struct flow_keys *flow)
{
+ const void *p = flow;
+
BUILD_BUG_ON(FLOW_KEYS_HASH_OFFSET % sizeof(u32));
- return (void *)flow + FLOW_KEYS_HASH_OFFSET;
+ return (const u32 *)(p + FLOW_KEYS_HASH_OFFSET);
}
-static inline size_t flow_keys_hash_length(struct flow_keys *flow)
+static inline size_t flow_keys_hash_length(const struct flow_keys *flow)
{
size_t diff = FLOW_KEYS_HASH_OFFSET + sizeof(flow->addrs);
BUILD_BUG_ON((sizeof(*flow) - FLOW_KEYS_HASH_OFFSET) % sizeof(u32));
@@ -539,7 +601,7 @@ static inline u32 __flow_hash_from_keys(struct flow_keys *keys, u32 keyval)
__flow_hash_consistentify(keys);
- hash = __flow_hash_words((u32 *)flow_keys_hash_start(keys),
+ hash = __flow_hash_words(flow_keys_hash_start(keys),
flow_keys_hash_length(keys), keyval);
if (!hash)
hash = 1;
@@ -557,8 +619,8 @@ EXPORT_SYMBOL(flow_hash_from_keys);
static inline u32 ___skb_get_hash(const struct sk_buff *skb,
struct flow_keys *keys, u32 keyval)
{
- if (!skb_flow_dissect_flow_keys(skb, keys))
- return 0;
+ skb_flow_dissect_flow_keys(skb, keys,
+ FLOW_DISSECTOR_F_STOP_AT_FLOW_LABEL);
return __flow_hash_from_keys(keys, keyval);
}
@@ -602,17 +664,11 @@ EXPORT_SYMBOL(make_flow_keys_digest);
void __skb_get_hash(struct sk_buff *skb)
{
struct flow_keys keys;
- u32 hash;
__flow_hash_secret_init();
- hash = ___skb_get_hash(skb, &keys, hashrnd);
- if (!hash)
- return;
- if (keys.ports.ports)
- skb->l4_hash = 1;
- skb->sw_hash = 1;
- skb->hash = hash;
+ __skb_set_sw_hash(skb, ___skb_get_hash(skb, &keys, hashrnd),
+ flow_keys_have_l4(&keys));
}
EXPORT_SYMBOL(__skb_get_hash);
@@ -624,6 +680,51 @@ __u32 skb_get_hash_perturb(const struct sk_buff *skb, u32 perturb)
}
EXPORT_SYMBOL(skb_get_hash_perturb);
+__u32 __skb_get_hash_flowi6(struct sk_buff *skb, const struct flowi6 *fl6)
+{
+ struct flow_keys keys;
+
+ memset(&keys, 0, sizeof(keys));
+
+ memcpy(&keys.addrs.v6addrs.src, &fl6->saddr,
+ sizeof(keys.addrs.v6addrs.src));
+ memcpy(&keys.addrs.v6addrs.dst, &fl6->daddr,
+ sizeof(keys.addrs.v6addrs.dst));
+ keys.control.addr_type = FLOW_DISSECTOR_KEY_IPV6_ADDRS;
+ keys.ports.src = fl6->fl6_sport;
+ keys.ports.dst = fl6->fl6_dport;
+ keys.keyid.keyid = fl6->fl6_gre_key;
+ keys.tags.flow_label = (__force u32)fl6->flowlabel;
+ keys.basic.ip_proto = fl6->flowi6_proto;
+
+ __skb_set_sw_hash(skb, flow_hash_from_keys(&keys),
+ flow_keys_have_l4(&keys));
+
+ return skb->hash;
+}
+EXPORT_SYMBOL(__skb_get_hash_flowi6);
+
+__u32 __skb_get_hash_flowi4(struct sk_buff *skb, const struct flowi4 *fl4)
+{
+ struct flow_keys keys;
+
+ memset(&keys, 0, sizeof(keys));
+
+ keys.addrs.v4addrs.src = fl4->saddr;
+ keys.addrs.v4addrs.dst = fl4->daddr;
+ keys.control.addr_type = FLOW_DISSECTOR_KEY_IPV4_ADDRS;
+ keys.ports.src = fl4->fl4_sport;
+ keys.ports.dst = fl4->fl4_dport;
+ keys.keyid.keyid = fl4->fl4_gre_key;
+ keys.basic.ip_proto = fl4->flowi4_proto;
+
+ __skb_set_sw_hash(skb, flow_hash_from_keys(&keys),
+ flow_keys_have_l4(&keys));
+
+ return skb->hash;
+}
+EXPORT_SYMBOL(__skb_get_hash_flowi4);
+
u32 __skb_get_poff(const struct sk_buff *skb, void *data,
const struct flow_keys *keys, int hlen)
{
@@ -683,12 +784,47 @@ u32 skb_get_poff(const struct sk_buff *skb)
{
struct flow_keys keys;
- if (!skb_flow_dissect_flow_keys(skb, &keys))
+ if (!skb_flow_dissect_flow_keys(skb, &keys, 0))
return 0;
return __skb_get_poff(skb, skb->data, &keys, skb_headlen(skb));
}
+__u32 __get_hash_from_flowi6(const struct flowi6 *fl6, struct flow_keys *keys)
+{
+ memset(keys, 0, sizeof(*keys));
+
+ memcpy(&keys->addrs.v6addrs.src, &fl6->saddr,
+ sizeof(keys->addrs.v6addrs.src));
+ memcpy(&keys->addrs.v6addrs.dst, &fl6->daddr,
+ sizeof(keys->addrs.v6addrs.dst));
+ keys->control.addr_type = FLOW_DISSECTOR_KEY_IPV6_ADDRS;
+ keys->ports.src = fl6->fl6_sport;
+ keys->ports.dst = fl6->fl6_dport;
+ keys->keyid.keyid = fl6->fl6_gre_key;
+ keys->tags.flow_label = (__force u32)fl6->flowlabel;
+ keys->basic.ip_proto = fl6->flowi6_proto;
+
+ return flow_hash_from_keys(keys);
+}
+EXPORT_SYMBOL(__get_hash_from_flowi6);
+
+__u32 __get_hash_from_flowi4(const struct flowi4 *fl4, struct flow_keys *keys)
+{
+ memset(keys, 0, sizeof(*keys));
+
+ keys->addrs.v4addrs.src = fl4->saddr;
+ keys->addrs.v4addrs.dst = fl4->daddr;
+ keys->control.addr_type = FLOW_DISSECTOR_KEY_IPV4_ADDRS;
+ keys->ports.src = fl4->fl4_sport;
+ keys->ports.dst = fl4->fl4_dport;
+ keys->keyid.keyid = fl4->fl4_gre_key;
+ keys->basic.ip_proto = fl4->flowi4_proto;
+
+ return flow_hash_from_keys(keys);
+}
+EXPORT_SYMBOL(__get_hash_from_flowi4);
+
static const struct flow_dissector_key flow_keys_dissector_keys[] = {
{
.key_id = FLOW_DISSECTOR_KEY_CONTROL,
diff --git a/net/core/lwtunnel.c b/net/core/lwtunnel.c
new file mode 100644
index 000000000..dfb1a9ca0
--- /dev/null
+++ b/net/core/lwtunnel.c
@@ -0,0 +1,249 @@
+/*
+ * lwtunnel Infrastructure for light weight tunnels like mpls
+ *
+ * Authors: Roopa Prabhu, <roopa@cumulusnetworks.com>
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version
+ * 2 of the License, or (at your option) any later version.
+ *
+ */
+
+#include <linux/capability.h>
+#include <linux/module.h>
+#include <linux/types.h>
+#include <linux/kernel.h>
+#include <linux/slab.h>
+#include <linux/uaccess.h>
+#include <linux/skbuff.h>
+#include <linux/netdevice.h>
+#include <linux/lwtunnel.h>
+#include <linux/in.h>
+#include <linux/init.h>
+#include <linux/err.h>
+
+#include <net/lwtunnel.h>
+#include <net/rtnetlink.h>
+#include <net/ip6_fib.h>
+
+struct lwtunnel_state *lwtunnel_state_alloc(int encap_len)
+{
+ struct lwtunnel_state *lws;
+
+ lws = kzalloc(sizeof(*lws) + encap_len, GFP_ATOMIC);
+
+ return lws;
+}
+EXPORT_SYMBOL(lwtunnel_state_alloc);
+
+static const struct lwtunnel_encap_ops __rcu *
+ lwtun_encaps[LWTUNNEL_ENCAP_MAX + 1] __read_mostly;
+
+int lwtunnel_encap_add_ops(const struct lwtunnel_encap_ops *ops,
+ unsigned int num)
+{
+ if (num > LWTUNNEL_ENCAP_MAX)
+ return -ERANGE;
+
+ return !cmpxchg((const struct lwtunnel_encap_ops **)
+ &lwtun_encaps[num],
+ NULL, ops) ? 0 : -1;
+}
+EXPORT_SYMBOL(lwtunnel_encap_add_ops);
+
+int lwtunnel_encap_del_ops(const struct lwtunnel_encap_ops *ops,
+ unsigned int encap_type)
+{
+ int ret;
+
+ if (encap_type == LWTUNNEL_ENCAP_NONE ||
+ encap_type > LWTUNNEL_ENCAP_MAX)
+ return -ERANGE;
+
+ ret = (cmpxchg((const struct lwtunnel_encap_ops **)
+ &lwtun_encaps[encap_type],
+ ops, NULL) == ops) ? 0 : -1;
+
+ synchronize_net();
+
+ return ret;
+}
+EXPORT_SYMBOL(lwtunnel_encap_del_ops);
+
+int lwtunnel_build_state(struct net_device *dev, u16 encap_type,
+ struct nlattr *encap, unsigned int family,
+ const void *cfg, struct lwtunnel_state **lws)
+{
+ const struct lwtunnel_encap_ops *ops;
+ int ret = -EINVAL;
+
+ if (encap_type == LWTUNNEL_ENCAP_NONE ||
+ encap_type > LWTUNNEL_ENCAP_MAX)
+ return ret;
+
+ ret = -EOPNOTSUPP;
+ rcu_read_lock();
+ ops = rcu_dereference(lwtun_encaps[encap_type]);
+ if (likely(ops && ops->build_state))
+ ret = ops->build_state(dev, encap, family, cfg, lws);
+ rcu_read_unlock();
+
+ return ret;
+}
+EXPORT_SYMBOL(lwtunnel_build_state);
+
+int lwtunnel_fill_encap(struct sk_buff *skb, struct lwtunnel_state *lwtstate)
+{
+ const struct lwtunnel_encap_ops *ops;
+ struct nlattr *nest;
+ int ret = -EINVAL;
+
+ if (!lwtstate)
+ return 0;
+
+ if (lwtstate->type == LWTUNNEL_ENCAP_NONE ||
+ lwtstate->type > LWTUNNEL_ENCAP_MAX)
+ return 0;
+
+ ret = -EOPNOTSUPP;
+ nest = nla_nest_start(skb, RTA_ENCAP);
+ rcu_read_lock();
+ ops = rcu_dereference(lwtun_encaps[lwtstate->type]);
+ if (likely(ops && ops->fill_encap))
+ ret = ops->fill_encap(skb, lwtstate);
+ rcu_read_unlock();
+
+ if (ret)
+ goto nla_put_failure;
+ nla_nest_end(skb, nest);
+ ret = nla_put_u16(skb, RTA_ENCAP_TYPE, lwtstate->type);
+ if (ret)
+ goto nla_put_failure;
+
+ return 0;
+
+nla_put_failure:
+ nla_nest_cancel(skb, nest);
+
+ return (ret == -EOPNOTSUPP ? 0 : ret);
+}
+EXPORT_SYMBOL(lwtunnel_fill_encap);
+
+int lwtunnel_get_encap_size(struct lwtunnel_state *lwtstate)
+{
+ const struct lwtunnel_encap_ops *ops;
+ int ret = 0;
+
+ if (!lwtstate)
+ return 0;
+
+ if (lwtstate->type == LWTUNNEL_ENCAP_NONE ||
+ lwtstate->type > LWTUNNEL_ENCAP_MAX)
+ return 0;
+
+ rcu_read_lock();
+ ops = rcu_dereference(lwtun_encaps[lwtstate->type]);
+ if (likely(ops && ops->get_encap_size))
+ ret = nla_total_size(ops->get_encap_size(lwtstate));
+ rcu_read_unlock();
+
+ return ret;
+}
+EXPORT_SYMBOL(lwtunnel_get_encap_size);
+
+int lwtunnel_cmp_encap(struct lwtunnel_state *a, struct lwtunnel_state *b)
+{
+ const struct lwtunnel_encap_ops *ops;
+ int ret = 0;
+
+ if (!a && !b)
+ return 0;
+
+ if (!a || !b)
+ return 1;
+
+ if (a->type != b->type)
+ return 1;
+
+ if (a->type == LWTUNNEL_ENCAP_NONE ||
+ a->type > LWTUNNEL_ENCAP_MAX)
+ return 0;
+
+ rcu_read_lock();
+ ops = rcu_dereference(lwtun_encaps[a->type]);
+ if (likely(ops && ops->cmp_encap))
+ ret = ops->cmp_encap(a, b);
+ rcu_read_unlock();
+
+ return ret;
+}
+EXPORT_SYMBOL(lwtunnel_cmp_encap);
+
+int lwtunnel_output(struct sock *sk, struct sk_buff *skb)
+{
+ struct dst_entry *dst = skb_dst(skb);
+ const struct lwtunnel_encap_ops *ops;
+ struct lwtunnel_state *lwtstate;
+ int ret = -EINVAL;
+
+ if (!dst)
+ goto drop;
+ lwtstate = dst->lwtstate;
+
+ if (lwtstate->type == LWTUNNEL_ENCAP_NONE ||
+ lwtstate->type > LWTUNNEL_ENCAP_MAX)
+ return 0;
+
+ ret = -EOPNOTSUPP;
+ rcu_read_lock();
+ ops = rcu_dereference(lwtun_encaps[lwtstate->type]);
+ if (likely(ops && ops->output))
+ ret = ops->output(sk, skb);
+ rcu_read_unlock();
+
+ if (ret == -EOPNOTSUPP)
+ goto drop;
+
+ return ret;
+
+drop:
+ kfree_skb(skb);
+
+ return ret;
+}
+EXPORT_SYMBOL(lwtunnel_output);
+
+int lwtunnel_input(struct sk_buff *skb)
+{
+ struct dst_entry *dst = skb_dst(skb);
+ const struct lwtunnel_encap_ops *ops;
+ struct lwtunnel_state *lwtstate;
+ int ret = -EINVAL;
+
+ if (!dst)
+ goto drop;
+ lwtstate = dst->lwtstate;
+
+ if (lwtstate->type == LWTUNNEL_ENCAP_NONE ||
+ lwtstate->type > LWTUNNEL_ENCAP_MAX)
+ return 0;
+
+ ret = -EOPNOTSUPP;
+ rcu_read_lock();
+ ops = rcu_dereference(lwtun_encaps[lwtstate->type]);
+ if (likely(ops && ops->input))
+ ret = ops->input(skb);
+ rcu_read_unlock();
+
+ if (ret == -EOPNOTSUPP)
+ goto drop;
+
+ return ret;
+
+drop:
+ kfree_skb(skb);
+
+ return ret;
+}
+EXPORT_SYMBOL(lwtunnel_input);
diff --git a/net/core/neighbour.c b/net/core/neighbour.c
index 84195dacb..2b515ba7e 100644
--- a/net/core/neighbour.c
+++ b/net/core/neighbour.c
@@ -274,8 +274,12 @@ static struct neighbour *neigh_alloc(struct neigh_table *tbl, struct net_device
(entries >= tbl->gc_thresh2 &&
time_after(now, tbl->last_flush + 5 * HZ))) {
if (!neigh_forced_gc(tbl) &&
- entries >= tbl->gc_thresh3)
+ entries >= tbl->gc_thresh3) {
+ net_info_ratelimited("%s: neighbor table overflow!\n",
+ tbl->id);
+ NEIGH_CACHE_STAT_INC(tbl, table_fulls);
goto out_entries;
+ }
}
n = kzalloc(tbl->entry_size + dev->neigh_priv_len, GFP_ATOMIC);
@@ -1849,6 +1853,7 @@ static int neightbl_fill_info(struct sk_buff *skb, struct neigh_table *tbl,
ndst.ndts_rcv_probes_ucast += st->rcv_probes_ucast;
ndst.ndts_periodic_gc_runs += st->periodic_gc_runs;
ndst.ndts_forced_gc_runs += st->forced_gc_runs;
+ ndst.ndts_table_fulls += st->table_fulls;
}
if (nla_put(skb, NDTA_STATS, sizeof(ndst), &ndst))
@@ -2717,12 +2722,12 @@ static int neigh_stat_seq_show(struct seq_file *seq, void *v)
struct neigh_statistics *st = v;
if (v == SEQ_START_TOKEN) {
- seq_printf(seq, "entries allocs destroys hash_grows lookups hits res_failed rcv_probes_mcast rcv_probes_ucast periodic_gc_runs forced_gc_runs unresolved_discards\n");
+ seq_printf(seq, "entries allocs destroys hash_grows lookups hits res_failed rcv_probes_mcast rcv_probes_ucast periodic_gc_runs forced_gc_runs unresolved_discards table_fulls\n");
return 0;
}
seq_printf(seq, "%08x %08lx %08lx %08lx %08lx %08lx %08lx "
- "%08lx %08lx %08lx %08lx %08lx\n",
+ "%08lx %08lx %08lx %08lx %08lx %08lx\n",
atomic_read(&tbl->entries),
st->allocs,
@@ -2739,7 +2744,8 @@ static int neigh_stat_seq_show(struct seq_file *seq, void *v)
st->periodic_gc_runs,
st->forced_gc_runs,
- st->unres_discards
+ st->unres_discards,
+ st->table_fulls
);
return 0;
diff --git a/net/core/net-sysfs.c b/net/core/net-sysfs.c
index 18b34d771..830f8a7c1 100644
--- a/net/core/net-sysfs.c
+++ b/net/core/net-sysfs.c
@@ -31,7 +31,6 @@
static const char fmt_hex[] = "%#x\n";
static const char fmt_long_hex[] = "%#lx\n";
static const char fmt_dec[] = "%d\n";
-static const char fmt_udec[] = "%u\n";
static const char fmt_ulong[] = "%lu\n";
static const char fmt_u64[] = "%llu\n";
@@ -202,7 +201,7 @@ static ssize_t speed_show(struct device *dev,
if (netif_running(netdev)) {
struct ethtool_cmd cmd;
if (!__ethtool_get_settings(netdev, &cmd))
- ret = sprintf(buf, fmt_udec, ethtool_cmd_speed(&cmd));
+ ret = sprintf(buf, fmt_dec, ethtool_cmd_speed(&cmd));
}
rtnl_unlock();
return ret;
@@ -404,6 +403,19 @@ static ssize_t group_store(struct device *dev, struct device_attribute *attr,
NETDEVICE_SHOW(group, fmt_dec);
static DEVICE_ATTR(netdev_group, S_IRUGO | S_IWUSR, group_show, group_store);
+static int change_proto_down(struct net_device *dev, unsigned long proto_down)
+{
+ return dev_change_proto_down(dev, (bool) proto_down);
+}
+
+static ssize_t proto_down_store(struct device *dev,
+ struct device_attribute *attr,
+ const char *buf, size_t len)
+{
+ return netdev_store(dev, attr, buf, len, change_proto_down);
+}
+NETDEVICE_SHOW_RW(proto_down, fmt_dec);
+
static ssize_t phys_port_id_show(struct device *dev,
struct device_attribute *attr, char *buf)
{
@@ -501,6 +513,7 @@ static struct attribute *net_class_attrs[] = {
&dev_attr_phys_port_id.attr,
&dev_attr_phys_port_name.attr,
&dev_attr_phys_switch_id.attr,
+ &dev_attr_proto_down.attr,
NULL,
};
ATTRIBUTE_GROUPS(net_class);
@@ -675,7 +688,7 @@ static ssize_t store_rps_map(struct netdev_rx_queue *queue,
struct rps_map *old_map, *map;
cpumask_var_t mask;
int err, cpu, i;
- static DEFINE_SPINLOCK(rps_map_lock);
+ static DEFINE_MUTEX(rps_map_mutex);
if (!capable(CAP_NET_ADMIN))
return -EPERM;
@@ -708,18 +721,21 @@ static ssize_t store_rps_map(struct netdev_rx_queue *queue,
map = NULL;
}
- spin_lock(&rps_map_lock);
+ mutex_lock(&rps_map_mutex);
old_map = rcu_dereference_protected(queue->rps_map,
- lockdep_is_held(&rps_map_lock));
+ mutex_is_locked(&rps_map_mutex));
rcu_assign_pointer(queue->rps_map, map);
- spin_unlock(&rps_map_lock);
if (map)
static_key_slow_inc(&rps_needed);
- if (old_map) {
- kfree_rcu(old_map, rcu);
+ if (old_map)
static_key_slow_dec(&rps_needed);
- }
+
+ mutex_unlock(&rps_map_mutex);
+
+ if (old_map)
+ kfree_rcu(old_map, rcu);
+
free_cpumask_var(mask);
return len;
}
@@ -1464,6 +1480,15 @@ static int of_dev_node_match(struct device *dev, const void *data)
return ret == 0 ? dev->of_node == data : ret;
}
+/*
+ * of_find_net_device_by_node - lookup the net device for the device node
+ * @np: OF device node
+ *
+ * Looks up the net_device structure corresponding with the device node.
+ * If successful, returns a pointer to the net_device with the embedded
+ * struct device refcount incremented by one, or NULL on failure. The
+ * refcount must be dropped when done with the net_device.
+ */
struct net_device *of_find_net_device_by_node(struct device_node *np)
{
struct device *dev;
diff --git a/net/core/net-traces.c b/net/core/net-traces.c
index ba3c01207..adef015b2 100644
--- a/net/core/net-traces.c
+++ b/net/core/net-traces.c
@@ -31,6 +31,7 @@
#include <trace/events/napi.h>
#include <trace/events/sock.h>
#include <trace/events/udp.h>
+#include <trace/events/fib.h>
EXPORT_TRACEPOINT_SYMBOL_GPL(kfree_skb);
diff --git a/net/core/netpoll.c b/net/core/netpoll.c
index c126a878c..8bdada242 100644
--- a/net/core/netpoll.c
+++ b/net/core/netpoll.c
@@ -142,7 +142,7 @@ static void queue_process(struct work_struct *work)
*/
static int poll_one_napi(struct napi_struct *napi, int budget)
{
- int work;
+ int work = 0;
/* net_rx_action's ->poll() invocations and our's are
* synchronized by this test which is only made while
@@ -151,7 +151,12 @@ static int poll_one_napi(struct napi_struct *napi, int budget)
if (!test_bit(NAPI_STATE_SCHED, &napi->state))
return budget;
- set_bit(NAPI_STATE_NPSVC, &napi->state);
+ /* If we set this bit but see that it has already been set,
+ * that indicates that napi has been disabled and we need
+ * to abort this operation
+ */
+ if (test_and_set_bit(NAPI_STATE_NPSVC, &napi->state))
+ goto out;
work = napi->poll(napi, budget);
WARN_ONCE(work > budget, "%pF exceeded budget in poll\n", napi->poll);
@@ -159,6 +164,7 @@ static int poll_one_napi(struct napi_struct *napi, int budget)
clear_bit(NAPI_STATE_NPSVC, &napi->state);
+out:
return budget - work;
}
@@ -380,6 +386,8 @@ void netpoll_send_udp(struct netpoll *np, const char *msg, int len)
static atomic_t ip_ident;
struct ipv6hdr *ip6h;
+ WARN_ON_ONCE(!irqs_disabled());
+
udp_len = len + sizeof(*udph);
if (np->ipv6)
ip_len = udp_len + sizeof(*ip6h);
diff --git a/net/core/pktgen.c b/net/core/pktgen.c
index 1cbd20919..de8d5cc5e 100644
--- a/net/core/pktgen.c
+++ b/net/core/pktgen.c
@@ -273,7 +273,6 @@ struct pktgen_dev {
/* runtime counters relating to clone_skb */
- __u64 allocated_skbs;
__u32 clone_count;
int last_ok; /* Was last skb sent?
* Or a failed transmit of some sort?
@@ -2279,7 +2278,7 @@ static void spin(struct pktgen_dev *pkt_dev, ktime_t spin_until)
static inline void set_pkt_overhead(struct pktgen_dev *pkt_dev)
{
- pkt_dev->pkt_overhead = 0;
+ pkt_dev->pkt_overhead = LL_RESERVED_SPACE(pkt_dev->odev);
pkt_dev->pkt_overhead += pkt_dev->nr_labels*sizeof(u32);
pkt_dev->pkt_overhead += VLAN_TAG_SIZE(pkt_dev);
pkt_dev->pkt_overhead += SVLAN_TAG_SIZE(pkt_dev);
@@ -2788,6 +2787,7 @@ static struct sk_buff *pktgen_alloc_skb(struct net_device *dev,
} else {
skb = __netdev_alloc_skb(dev, size, GFP_NOWAIT);
}
+ skb_reserve(skb, LL_RESERVED_SPACE(dev));
return skb;
}
@@ -3397,7 +3397,6 @@ static void pktgen_xmit(struct pktgen_dev *pkt_dev)
return;
}
pkt_dev->last_pkt_size = pkt_dev->skb->len;
- pkt_dev->allocated_skbs++;
pkt_dev->clone_count = 0; /* reset counter */
}
diff --git a/net/core/rtnetlink.c b/net/core/rtnetlink.c
index 0861018be..0ec48403e 100644
--- a/net/core/rtnetlink.c
+++ b/net/core/rtnetlink.c
@@ -678,6 +678,12 @@ int rtnetlink_put_metrics(struct sk_buff *skb, u32 *metrics)
continue;
if (nla_put_string(skb, i + 1, name))
goto nla_put_failure;
+ } else if (i == RTAX_FEATURES - 1) {
+ u32 user_features = metrics[i] & RTAX_FEATURE_MASK;
+
+ BUILD_BUG_ON(RTAX_FEATURE_MASK & DST_FEATURE_MASK);
+ if (nla_put_u32(skb, i + 1, user_features))
+ goto nla_put_failure;
} else {
if (nla_put_u32(skb, i + 1, metrics[i]))
goto nla_put_failure;
@@ -896,7 +902,9 @@ static noinline size_t if_nlmsg_size(const struct net_device *dev,
+ rtnl_link_get_size(dev) /* IFLA_LINKINFO */
+ rtnl_link_get_af_size(dev) /* IFLA_AF_SPEC */
+ nla_total_size(MAX_PHYS_ITEM_ID_LEN) /* IFLA_PHYS_PORT_ID */
- + nla_total_size(MAX_PHYS_ITEM_ID_LEN); /* IFLA_PHYS_SWITCH_ID */
+ + nla_total_size(MAX_PHYS_ITEM_ID_LEN) /* IFLA_PHYS_SWITCH_ID */
+ + nla_total_size(1); /* IFLA_PROTO_DOWN */
+
}
static int rtnl_vf_ports_fill(struct sk_buff *skb, struct net_device *dev)
@@ -1082,7 +1090,8 @@ static int rtnl_fill_ifinfo(struct sk_buff *skb, struct net_device *dev,
(dev->ifalias &&
nla_put_string(skb, IFLA_IFALIAS, dev->ifalias)) ||
nla_put_u32(skb, IFLA_CARRIER_CHANGES,
- atomic_read(&dev->carrier_changes)))
+ atomic_read(&dev->carrier_changes)) ||
+ nla_put_u8(skb, IFLA_PROTO_DOWN, dev->proto_down))
goto nla_put_failure;
if (1) {
@@ -1319,6 +1328,7 @@ static const struct nla_policy ifla_policy[IFLA_MAX+1] = {
[IFLA_CARRIER_CHANGES] = { .type = NLA_U32 }, /* ignored */
[IFLA_PHYS_SWITCH_ID] = { .type = NLA_BINARY, .len = MAX_PHYS_ITEM_ID_LEN },
[IFLA_LINK_NETNSID] = { .type = NLA_S32 },
+ [IFLA_PROTO_DOWN] = { .type = NLA_U8 },
};
static const struct nla_policy ifla_info_policy[IFLA_INFO_MAX+1] = {
@@ -1861,6 +1871,14 @@ static int do_setlink(const struct sk_buff *skb,
}
err = 0;
+ if (tb[IFLA_PROTO_DOWN]) {
+ err = dev_change_proto_down(dev,
+ nla_get_u8(tb[IFLA_PROTO_DOWN]));
+ if (err)
+ goto errout;
+ status |= DO_SETLINK_NOTIFY;
+ }
+
errout:
if (status & DO_SETLINK_MODIFIED) {
if (status & DO_SETLINK_NOTIFY)
@@ -1951,16 +1969,30 @@ static int rtnl_group_dellink(const struct net *net, int group)
return 0;
}
+int rtnl_delete_link(struct net_device *dev)
+{
+ const struct rtnl_link_ops *ops;
+ LIST_HEAD(list_kill);
+
+ ops = dev->rtnl_link_ops;
+ if (!ops || !ops->dellink)
+ return -EOPNOTSUPP;
+
+ ops->dellink(dev, &list_kill);
+ unregister_netdevice_many(&list_kill);
+
+ return 0;
+}
+EXPORT_SYMBOL_GPL(rtnl_delete_link);
+
static int rtnl_dellink(struct sk_buff *skb, struct nlmsghdr *nlh)
{
struct net *net = sock_net(skb->sk);
- const struct rtnl_link_ops *ops;
struct net_device *dev;
struct ifinfomsg *ifm;
char ifname[IFNAMSIZ];
struct nlattr *tb[IFLA_MAX+1];
int err;
- LIST_HEAD(list_kill);
err = nlmsg_parse(nlh, sizeof(*ifm), tb, IFLA_MAX, ifla_policy);
if (err < 0)
@@ -1982,13 +2014,7 @@ static int rtnl_dellink(struct sk_buff *skb, struct nlmsghdr *nlh)
if (!dev)
return -ENODEV;
- ops = dev->rtnl_link_ops;
- if (!ops || !ops->dellink)
- return -EOPNOTSUPP;
-
- ops->dellink(dev, &list_kill);
- unregister_netdevice_many(&list_kill);
- return 0;
+ return rtnl_delete_link(dev);
}
int rtnl_configure_link(struct net_device *dev, const struct ifinfomsg *ifm)
diff --git a/net/core/skbuff.c b/net/core/skbuff.c
index 7bfa18746..fab4599ba 100644
--- a/net/core/skbuff.c
+++ b/net/core/skbuff.c
@@ -392,7 +392,7 @@ EXPORT_SYMBOL(napi_alloc_frag);
/**
* __netdev_alloc_skb - allocate an skbuff for rx on a specific device
* @dev: network device to receive on
- * @length: length to allocate
+ * @len: length to allocate
* @gfp_mask: get_free_pages mask, passed to alloc_skb
*
* Allocate a new &sk_buff and assign it a usage count of one. The
@@ -461,7 +461,7 @@ EXPORT_SYMBOL(__netdev_alloc_skb);
/**
* __napi_alloc_skb - allocate skbuff for rx in a specific NAPI instance
* @napi: napi instance this buffer was allocated for
- * @length: length to allocate
+ * @len: length to allocate
* @gfp_mask: get_free_pages mask, passed to alloc_skb and alloc_pages
*
* Allocate a new sk_buff for use in NAPI receive. This buffer will
diff --git a/net/core/sock.c b/net/core/sock.c
index 193901d09..3307c0224 100644
--- a/net/core/sock.c
+++ b/net/core/sock.c
@@ -2078,7 +2078,7 @@ suppress_allocation:
EXPORT_SYMBOL(__sk_mem_schedule);
/**
- * __sk_reclaim - reclaim memory_allocated
+ * __sk_mem_reclaim - reclaim memory_allocated
* @sk: socket
* @amount: number of bytes (rounded down to a SK_MEM_QUANTUM multiple)
*/
@@ -2740,10 +2740,8 @@ static void req_prot_cleanup(struct request_sock_ops *rsk_prot)
return;
kfree(rsk_prot->slab_name);
rsk_prot->slab_name = NULL;
- if (rsk_prot->slab) {
- kmem_cache_destroy(rsk_prot->slab);
- rsk_prot->slab = NULL;
- }
+ kmem_cache_destroy(rsk_prot->slab);
+ rsk_prot->slab = NULL;
}
static int req_prot_init(const struct proto *prot)
@@ -2828,10 +2826,8 @@ void proto_unregister(struct proto *prot)
list_del(&prot->node);
mutex_unlock(&proto_list_mutex);
- if (prot->slab != NULL) {
- kmem_cache_destroy(prot->slab);
- prot->slab = NULL;
- }
+ kmem_cache_destroy(prot->slab);
+ prot->slab = NULL;
req_prot_cleanup(prot->rsk_prot);
diff --git a/net/core/timestamping.c b/net/core/timestamping.c
index 43d3dd62f..42689d5c4 100644
--- a/net/core/timestamping.c
+++ b/net/core/timestamping.c
@@ -60,11 +60,15 @@ bool skb_defer_rx_timestamp(struct sk_buff *skb)
struct phy_device *phydev;
unsigned int type;
+ if (!skb->dev || !skb->dev->phydev || !skb->dev->phydev->drv)
+ return false;
+
if (skb_headroom(skb) < ETH_HLEN)
return false;
+
__skb_push(skb, ETH_HLEN);
- type = classify(skb);
+ type = ptp_classify_raw(skb);
__skb_pull(skb, ETH_HLEN);
diff --git a/net/core/utils.c b/net/core/utils.c
index a7732a068..3dffce953 100644
--- a/net/core/utils.c
+++ b/net/core/utils.c
@@ -301,7 +301,7 @@ out:
EXPORT_SYMBOL(in6_pton);
void inet_proto_csum_replace4(__sum16 *sum, struct sk_buff *skb,
- __be32 from, __be32 to, int pseudohdr)
+ __be32 from, __be32 to, bool pseudohdr)
{
if (skb->ip_summed != CHECKSUM_PARTIAL) {
csum_replace4(sum, from, to);
@@ -318,7 +318,7 @@ EXPORT_SYMBOL(inet_proto_csum_replace4);
void inet_proto_csum_replace16(__sum16 *sum, struct sk_buff *skb,
const __be32 *from, const __be32 *to,
- int pseudohdr)
+ bool pseudohdr)
{
__be32 diff[] = {
~from[0], ~from[1], ~from[2], ~from[3],
@@ -336,6 +336,19 @@ void inet_proto_csum_replace16(__sum16 *sum, struct sk_buff *skb,
}
EXPORT_SYMBOL(inet_proto_csum_replace16);
+void inet_proto_csum_replace_by_diff(__sum16 *sum, struct sk_buff *skb,
+ __wsum diff, bool pseudohdr)
+{
+ if (skb->ip_summed != CHECKSUM_PARTIAL) {
+ *sum = csum_fold(csum_add(diff, ~csum_unfold(*sum)));
+ if (skb->ip_summed == CHECKSUM_COMPLETE && pseudohdr)
+ skb->csum = ~csum_add(diff, ~skb->csum);
+ } else if (pseudohdr) {
+ *sum = ~csum_fold(csum_add(diff, csum_unfold(*sum)));
+ }
+}
+EXPORT_SYMBOL(inet_proto_csum_replace_by_diff);
+
struct __net_random_once_work {
struct work_struct work;
struct static_key *key;