diff options
author | André Fabian Silva Delgado <emulatorman@parabola.nu> | 2016-01-20 14:01:31 -0300 |
---|---|---|
committer | André Fabian Silva Delgado <emulatorman@parabola.nu> | 2016-01-20 14:01:31 -0300 |
commit | b4b7ff4b08e691656c9d77c758fc355833128ac0 (patch) | |
tree | 82fcb00e6b918026dc9f2d1f05ed8eee83874cc0 /net/core | |
parent | 35acfa0fc609f2a2cd95cef4a6a9c3a5c38f1778 (diff) |
Linux-libre 4.4-gnupck-4.4-gnu
Diffstat (limited to 'net/core')
-rw-r--r-- | net/core/datagram.c | 2 | ||||
-rw-r--r-- | net/core/dev.c | 124 | ||||
-rw-r--r-- | net/core/dst.c | 17 | ||||
-rw-r--r-- | net/core/filter.c | 135 | ||||
-rw-r--r-- | net/core/lwtunnel.c | 4 | ||||
-rw-r--r-- | net/core/neighbour.c | 47 | ||||
-rw-r--r-- | net/core/net-sysfs.c | 11 | ||||
-rw-r--r-- | net/core/netclassid_cgroup.c | 28 | ||||
-rw-r--r-- | net/core/netpoll.c | 23 | ||||
-rw-r--r-- | net/core/netprio_cgroup.c | 9 | ||||
-rw-r--r-- | net/core/ptp_classifier.c | 16 | ||||
-rw-r--r-- | net/core/request_sock.c | 88 | ||||
-rw-r--r-- | net/core/rtnetlink.c | 296 | ||||
-rw-r--r-- | net/core/skbuff.c | 14 | ||||
-rw-r--r-- | net/core/sock.c | 98 | ||||
-rw-r--r-- | net/core/sock_diag.c | 14 | ||||
-rw-r--r-- | net/core/stream.c | 6 | ||||
-rw-r--r-- | net/core/tso.c | 18 | ||||
-rw-r--r-- | net/core/utils.c | 49 |
19 files changed, 607 insertions, 392 deletions
diff --git a/net/core/datagram.c b/net/core/datagram.c index 617088aee..d62af69ad 100644 --- a/net/core/datagram.c +++ b/net/core/datagram.c @@ -785,7 +785,7 @@ unsigned int datagram_poll(struct file *file, struct socket *sock, if (sock_writeable(sk)) mask |= POLLOUT | POLLWRNORM | POLLWRBAND; else - set_bit(SOCK_ASYNC_NOSPACE, &sk->sk_socket->flags); + sk_set_bit(SOCKWQ_ASYNC_NOSPACE, sk); return mask; } diff --git a/net/core/dev.c b/net/core/dev.c index c14748d05..ae00b894e 100644 --- a/net/core/dev.c +++ b/net/core/dev.c @@ -2403,17 +2403,20 @@ static void skb_warn_bad_offload(const struct sk_buff *skb) { static const netdev_features_t null_features = 0; struct net_device *dev = skb->dev; - const char *driver = ""; + const char *name = ""; if (!net_ratelimit()) return; - if (dev && dev->dev.parent) - driver = dev_driver_string(dev->dev.parent); - + if (dev) { + if (dev->dev.parent) + name = dev_driver_string(dev->dev.parent); + else + name = netdev_name(dev); + } WARN(1, "%s: caps=(%pNF, %pNF) len=%d data_len=%d gso_size=%d " "gso_type=%d ip_summed=%d\n", - driver, dev ? &dev->features : &null_features, + name, dev ? &dev->features : &null_features, skb->sk ? &skb->sk->sk_route_caps : &null_features, skb->len, skb->data_len, skb_shinfo(skb)->gso_size, skb_shinfo(skb)->gso_type, skb->ip_summed); @@ -2942,9 +2945,11 @@ EXPORT_SYMBOL(xmit_recursion); /** * dev_loopback_xmit - loop back @skb + * @net: network namespace this loopback is happening in + * @sk: sk needed to be a netfilter okfn * @skb: buffer to transmit */ -int dev_loopback_xmit(struct sock *sk, struct sk_buff *skb) +int dev_loopback_xmit(struct net *net, struct sock *sk, struct sk_buff *skb) { skb_reset_mac_header(skb); __skb_pull(skb, skb_network_offset(skb)); @@ -2999,6 +3004,7 @@ static u16 __netdev_pick_tx(struct net_device *dev, struct sk_buff *skb) new_index = skb_tx_hash(dev, skb); if (queue_index != new_index && sk && + sk_fullsock(sk) && rcu_access_pointer(sk->sk_dst_cache)) sk_tx_queue_set(sk, new_index); @@ -3170,11 +3176,11 @@ out: return rc; } -int dev_queue_xmit_sk(struct sock *sk, struct sk_buff *skb) +int dev_queue_xmit(struct sk_buff *skb) { return __dev_queue_xmit(skb, NULL); } -EXPORT_SYMBOL(dev_queue_xmit_sk); +EXPORT_SYMBOL(dev_queue_xmit); int dev_queue_xmit_accel(struct sk_buff *skb, void *accel_priv) { @@ -3695,6 +3701,14 @@ static inline struct sk_buff *handle_ing(struct sk_buff *skb, case TC_ACT_QUEUED: kfree_skb(skb); return NULL; + case TC_ACT_REDIRECT: + /* skb_mac_header check was done by cls/act_bpf, so + * we can safely push the L2 header back before + * redirecting to another netdev + */ + __skb_push(skb, skb->mac_len); + skb_do_redirect(skb); + return NULL; default: break; } @@ -4009,13 +4023,13 @@ static int netif_receive_skb_internal(struct sk_buff *skb) * NET_RX_SUCCESS: no congestion * NET_RX_DROP: packet was dropped */ -int netif_receive_skb_sk(struct sock *sk, struct sk_buff *skb) +int netif_receive_skb(struct sk_buff *skb) { trace_netif_receive_skb_entry(skb); return netif_receive_skb_internal(skb); } -EXPORT_SYMBOL(netif_receive_skb_sk); +EXPORT_SYMBOL(netif_receive_skb); /* Network device is going away, flush any packets still pending * Called with irqs disabled. @@ -4884,8 +4898,7 @@ struct netdev_adjacent { struct rcu_head rcu; }; -static struct netdev_adjacent *__netdev_find_adj(struct net_device *dev, - struct net_device *adj_dev, +static struct netdev_adjacent *__netdev_find_adj(struct net_device *adj_dev, struct list_head *adj_list) { struct netdev_adjacent *adj; @@ -4911,7 +4924,7 @@ bool netdev_has_upper_dev(struct net_device *dev, { ASSERT_RTNL(); - return __netdev_find_adj(dev, upper_dev, &dev->all_adj_list.upper); + return __netdev_find_adj(upper_dev, &dev->all_adj_list.upper); } EXPORT_SYMBOL(netdev_has_upper_dev); @@ -5173,7 +5186,7 @@ static int __netdev_adjacent_dev_insert(struct net_device *dev, struct netdev_adjacent *adj; int ret; - adj = __netdev_find_adj(dev, adj_dev, dev_list); + adj = __netdev_find_adj(adj_dev, dev_list); if (adj) { adj->ref_nr++; @@ -5229,7 +5242,7 @@ static void __netdev_adjacent_dev_remove(struct net_device *dev, { struct netdev_adjacent *adj; - adj = __netdev_find_adj(dev, adj_dev, dev_list); + adj = __netdev_find_adj(adj_dev, dev_list); if (!adj) { pr_err("tried to remove device %s from %s\n", @@ -5350,10 +5363,10 @@ static int __netdev_upper_dev_link(struct net_device *dev, return -EBUSY; /* To prevent loops, check if dev is not upper device to upper_dev. */ - if (__netdev_find_adj(upper_dev, dev, &upper_dev->all_adj_list.upper)) + if (__netdev_find_adj(dev, &upper_dev->all_adj_list.upper)) return -EBUSY; - if (__netdev_find_adj(dev, upper_dev, &dev->adj_list.upper)) + if (__netdev_find_adj(upper_dev, &dev->adj_list.upper)) return -EEXIST; if (master && netdev_master_upper_dev_get(dev)) @@ -5363,6 +5376,12 @@ static int __netdev_upper_dev_link(struct net_device *dev, changeupper_info.master = master; changeupper_info.linking = true; + ret = call_netdevice_notifiers_info(NETDEV_PRECHANGEUPPER, dev, + &changeupper_info.info); + ret = notifier_to_errno(ret); + if (ret) + return ret; + ret = __netdev_adjacent_dev_link_neighbour(dev, upper_dev, private, master); if (ret) @@ -5505,6 +5524,9 @@ void netdev_upper_dev_unlink(struct net_device *dev, changeupper_info.master = netdev_master_upper_dev_get(dev) == upper_dev; changeupper_info.linking = false; + call_netdevice_notifiers_info(NETDEV_PRECHANGEUPPER, dev, + &changeupper_info.info); + __netdev_adjacent_dev_unlink_neighbour(dev, upper_dev); /* Here is the tricky part. We must remove all dev's lower @@ -5631,7 +5653,7 @@ void *netdev_lower_dev_get_private(struct net_device *dev, if (!lower_dev) return NULL; - lower = __netdev_find_adj(dev, lower_dev, &dev->adj_list.lower); + lower = __netdev_find_adj(lower_dev, &dev->adj_list.lower); if (!lower) return NULL; @@ -6269,6 +6291,48 @@ static void rollback_registered(struct net_device *dev) list_del(&single); } +static netdev_features_t netdev_sync_upper_features(struct net_device *lower, + struct net_device *upper, netdev_features_t features) +{ + netdev_features_t upper_disables = NETIF_F_UPPER_DISABLES; + netdev_features_t feature; + int feature_bit; + + for_each_netdev_feature(&upper_disables, feature_bit) { + feature = __NETIF_F_BIT(feature_bit); + if (!(upper->wanted_features & feature) + && (features & feature)) { + netdev_dbg(lower, "Dropping feature %pNF, upper dev %s has it off.\n", + &feature, upper->name); + features &= ~feature; + } + } + + return features; +} + +static void netdev_sync_lower_features(struct net_device *upper, + struct net_device *lower, netdev_features_t features) +{ + netdev_features_t upper_disables = NETIF_F_UPPER_DISABLES; + netdev_features_t feature; + int feature_bit; + + for_each_netdev_feature(&upper_disables, feature_bit) { + feature = __NETIF_F_BIT(feature_bit); + if (!(features & feature) && (lower->features & feature)) { + netdev_dbg(upper, "Disabling feature %pNF on lower dev %s.\n", + &feature, lower->name); + lower->wanted_features &= ~feature; + netdev_update_features(lower); + + if (unlikely(lower->features & feature)) + netdev_WARN(upper, "failed to disable %pNF on %s!\n", + &feature, lower->name); + } + } +} + static netdev_features_t netdev_fix_features(struct net_device *dev, netdev_features_t features) { @@ -6338,8 +6402,10 @@ static netdev_features_t netdev_fix_features(struct net_device *dev, int __netdev_update_features(struct net_device *dev) { + struct net_device *upper, *lower; netdev_features_t features; - int err = 0; + struct list_head *iter; + int err = -1; ASSERT_RTNL(); @@ -6351,26 +6417,42 @@ int __netdev_update_features(struct net_device *dev) /* driver might be less strict about feature dependencies */ features = netdev_fix_features(dev, features); + /* some features can't be enabled if they're off an an upper device */ + netdev_for_each_upper_dev_rcu(dev, upper, iter) + features = netdev_sync_upper_features(dev, upper, features); + if (dev->features == features) - return 0; + goto sync_lower; netdev_dbg(dev, "Features changed: %pNF -> %pNF\n", &dev->features, &features); if (dev->netdev_ops->ndo_set_features) err = dev->netdev_ops->ndo_set_features(dev, features); + else + err = 0; if (unlikely(err < 0)) { netdev_err(dev, "set_features() failed (%d); wanted %pNF, left %pNF\n", err, &features, &dev->features); + /* return non-0 since some features might have changed and + * it's better to fire a spurious notification than miss it + */ return -1; } +sync_lower: + /* some features must be disabled on lower devices when disabled + * on an upper device (think: bonding master or bridge) + */ + netdev_for_each_lower_dev(dev, lower, iter) + netdev_sync_lower_features(dev, lower, features); + if (!err) dev->features = features; - return 1; + return err < 0 ? 0 : 1; } /** diff --git a/net/core/dst.c b/net/core/dst.c index d6a5a0bc7..a1656e3b8 100644 --- a/net/core/dst.c +++ b/net/core/dst.c @@ -144,12 +144,12 @@ loop: mutex_unlock(&dst_gc_mutex); } -int dst_discard_sk(struct sock *sk, struct sk_buff *skb) +int dst_discard_out(struct net *net, struct sock *sk, struct sk_buff *skb) { kfree_skb(skb); return 0; } -EXPORT_SYMBOL(dst_discard_sk); +EXPORT_SYMBOL(dst_discard_out); const u32 dst_default_metrics[RTAX_MAX + 1] = { /* This initializer is needed to force linker to place this variable @@ -177,7 +177,7 @@ void dst_init(struct dst_entry *dst, struct dst_ops *ops, dst->xfrm = NULL; #endif dst->input = dst_discard; - dst->output = dst_discard_sk; + dst->output = dst_discard_out; dst->error = 0; dst->obsolete = initial_obsolete; dst->header_len = 0; @@ -224,7 +224,7 @@ static void ___dst_free(struct dst_entry *dst) */ if (dst->dev == NULL || !(dst->dev->flags&IFF_UP)) { dst->input = dst_discard; - dst->output = dst_discard_sk; + dst->output = dst_discard_out; } dst->obsolete = DST_OBSOLETE_DEAD; } @@ -301,12 +301,13 @@ void dst_release(struct dst_entry *dst) { if (dst) { int newrefcnt; + unsigned short nocache = dst->flags & DST_NOCACHE; newrefcnt = atomic_dec_return(&dst->__refcnt); if (unlikely(newrefcnt < 0)) net_warn_ratelimited("%s: dst:%p refcnt:%d\n", __func__, dst, newrefcnt); - if (!newrefcnt && unlikely(dst->flags & DST_NOCACHE)) + if (!newrefcnt && unlikely(nocache)) call_rcu(&dst->rcu_head, dst_destroy_rcu); } } @@ -352,7 +353,7 @@ static struct dst_ops md_dst_ops = { .family = AF_UNSPEC, }; -static int dst_md_discard_sk(struct sock *sk, struct sk_buff *skb) +static int dst_md_discard_out(struct net *net, struct sock *sk, struct sk_buff *skb) { WARN_ONCE(1, "Attempting to call output on metadata dst\n"); kfree_skb(skb); @@ -375,7 +376,7 @@ static void __metadata_dst_init(struct metadata_dst *md_dst, u8 optslen) DST_METADATA | DST_NOCACHE | DST_NOCOUNT); dst->input = dst_md_discard; - dst->output = dst_md_discard_sk; + dst->output = dst_md_discard_out; memset(dst + 1, 0, sizeof(*md_dst) + optslen - sizeof(*dst)); } @@ -430,7 +431,7 @@ static void dst_ifdown(struct dst_entry *dst, struct net_device *dev, if (!unregister) { dst->input = dst_discard; - dst->output = dst_discard_sk; + dst->output = dst_discard_out; } else { dst->dev = dev_net(dst->dev)->loopback_dev; dev_hold(dst->dev); diff --git a/net/core/filter.c b/net/core/filter.c index bb18c3680..672eefbfb 100644 --- a/net/core/filter.c +++ b/net/core/filter.c @@ -49,16 +49,17 @@ #include <net/sch_generic.h> #include <net/cls_cgroup.h> #include <net/dst_metadata.h> +#include <net/dst.h> /** * sk_filter - run a packet through a socket filter * @sk: sock associated with &sk_buff * @skb: buffer to filter * - * Run the filter code and then cut skb->data to correct size returned by - * SK_RUN_FILTER. If pkt_len is 0 we toss packet. If skb->len is smaller + * Run the eBPF program and then cut skb->data to correct size returned by + * the program. If pkt_len is 0 we toss packet. If skb->len is smaller * than pkt_len we keep whole skb->data. This is the socket level - * wrapper to SK_RUN_FILTER. It returns 0 if the packet should + * wrapper to BPF_PROG_RUN. It returns 0 if the packet should * be accepted or -EPERM if the packet should be tossed. * */ @@ -82,7 +83,7 @@ int sk_filter(struct sock *sk, struct sk_buff *skb) rcu_read_lock(); filter = rcu_dereference(sk->sk_filter); if (filter) { - unsigned int pkt_len = SK_RUN_FILTER(filter, skb); + unsigned int pkt_len = bpf_prog_run_save_cb(filter->prog, skb); err = pkt_len ? pskb_trim(skb, pkt_len) : -EPERM; } @@ -148,12 +149,6 @@ static u64 __get_raw_cpu_id(u64 ctx, u64 a, u64 x, u64 r4, u64 r5) return raw_smp_processor_id(); } -/* note that this only generates 32-bit random numbers */ -static u64 __get_random_u32(u64 ctx, u64 a, u64 x, u64 r4, u64 r5) -{ - return prandom_u32(); -} - static u32 convert_skb_access(int skb_field, int dst_reg, int src_reg, struct bpf_insn *insn_buf) { @@ -312,7 +307,8 @@ static bool convert_bpf_extensions(struct sock_filter *fp, *insn = BPF_EMIT_CALL(__get_raw_cpu_id); break; case SKF_AD_OFF + SKF_AD_RANDOM: - *insn = BPF_EMIT_CALL(__get_random_u32); + *insn = BPF_EMIT_CALL(bpf_user_rnd_u32); + bpf_user_rnd_init_once(); break; } break; @@ -1001,7 +997,7 @@ static struct bpf_prog *bpf_prepare_filter(struct bpf_prog *fp, int err; fp->bpf_func = NULL; - fp->jited = false; + fp->jited = 0; err = bpf_check_classic(fp->insns, fp->len); if (err) { @@ -1083,16 +1079,18 @@ EXPORT_SYMBOL_GPL(bpf_prog_create); * @pfp: the unattached filter that is created * @fprog: the filter program * @trans: post-classic verifier transformation handler + * @save_orig: save classic BPF program * * This function effectively does the same as bpf_prog_create(), only * that it builds up its insns buffer from user space provided buffer. * It also allows for passing a bpf_aux_classic_check_t handler. */ int bpf_prog_create_from_user(struct bpf_prog **pfp, struct sock_fprog *fprog, - bpf_aux_classic_check_t trans) + bpf_aux_classic_check_t trans, bool save_orig) { unsigned int fsize = bpf_classic_proglen(fprog); struct bpf_prog *fp; + int err; /* Make sure new filter is there and in the right amounts. */ if (fprog->filter == NULL) @@ -1108,12 +1106,16 @@ int bpf_prog_create_from_user(struct bpf_prog **pfp, struct sock_fprog *fprog, } fp->len = fprog->len; - /* Since unattached filters are not copied back to user - * space through sk_get_filter(), we do not need to hold - * a copy here, and can spare us the work. - */ fp->orig_prog = NULL; + if (save_orig) { + err = bpf_prog_store_orig_filter(fp, fprog); + if (err) { + __bpf_prog_free(fp); + return -ENOMEM; + } + } + /* bpf_prepare_filter() already takes care of freeing * memory in case something goes wrong. */ @@ -1404,9 +1406,6 @@ static u64 bpf_clone_redirect(u64 r1, u64 ifindex, u64 flags, u64 r4, u64 r5) if (unlikely(!dev)) return -EINVAL; - if (unlikely(!(dev->flags & IFF_UP))) - return -EINVAL; - skb2 = skb_clone(skb, GFP_ATOMIC); if (unlikely(!skb2)) return -ENOMEM; @@ -1428,6 +1427,49 @@ const struct bpf_func_proto bpf_clone_redirect_proto = { .arg3_type = ARG_ANYTHING, }; +struct redirect_info { + u32 ifindex; + u32 flags; +}; + +static DEFINE_PER_CPU(struct redirect_info, redirect_info); +static u64 bpf_redirect(u64 ifindex, u64 flags, u64 r3, u64 r4, u64 r5) +{ + struct redirect_info *ri = this_cpu_ptr(&redirect_info); + + ri->ifindex = ifindex; + ri->flags = flags; + return TC_ACT_REDIRECT; +} + +int skb_do_redirect(struct sk_buff *skb) +{ + struct redirect_info *ri = this_cpu_ptr(&redirect_info); + struct net_device *dev; + + dev = dev_get_by_index_rcu(dev_net(skb->dev), ri->ifindex); + ri->ifindex = 0; + if (unlikely(!dev)) { + kfree_skb(skb); + return -EINVAL; + } + + if (BPF_IS_REDIRECT_INGRESS(ri->flags)) + return dev_forward_skb(dev, skb); + + skb->dev = dev; + skb_sender_cpu_clear(skb); + return dev_queue_xmit(skb); +} + +const struct bpf_func_proto bpf_redirect_proto = { + .func = bpf_redirect, + .gpl_only = false, + .ret_type = RET_INTEGER, + .arg1_type = ARG_ANYTHING, + .arg2_type = ARG_ANYTHING, +}; + static u64 bpf_get_cgroup_classid(u64 r1, u64 r2, u64 r3, u64 r4, u64 r5) { return task_get_classid((struct sk_buff *) (unsigned long) r1); @@ -1440,6 +1482,25 @@ static const struct bpf_func_proto bpf_get_cgroup_classid_proto = { .arg1_type = ARG_PTR_TO_CTX, }; +static u64 bpf_get_route_realm(u64 r1, u64 r2, u64 r3, u64 r4, u64 r5) +{ +#ifdef CONFIG_IP_ROUTE_CLASSID + const struct dst_entry *dst; + + dst = skb_dst((struct sk_buff *) (unsigned long) r1); + if (dst) + return dst->tclassid; +#endif + return 0; +} + +static const struct bpf_func_proto bpf_get_route_realm_proto = { + .func = bpf_get_route_realm, + .gpl_only = false, + .ret_type = RET_INTEGER, + .arg1_type = ARG_PTR_TO_CTX, +}; + static u64 bpf_skb_vlan_push(u64 r1, u64 r2, u64 vlan_tci, u64 r4, u64 r5) { struct sk_buff *skb = (struct sk_buff *) (long) r1; @@ -1580,7 +1641,8 @@ sk_filter_func_proto(enum bpf_func_id func_id) case BPF_FUNC_ktime_get_ns: return &bpf_ktime_get_ns_proto; case BPF_FUNC_trace_printk: - return bpf_get_trace_printk_proto(); + if (capable(CAP_SYS_ADMIN)) + return bpf_get_trace_printk_proto(); default: return NULL; } @@ -1608,6 +1670,10 @@ tc_cls_act_func_proto(enum bpf_func_id func_id) return &bpf_skb_get_tunnel_key_proto; case BPF_FUNC_skb_set_tunnel_key: return bpf_get_skb_set_tunnel_key_proto(); + case BPF_FUNC_redirect: + return &bpf_redirect_proto; + case BPF_FUNC_get_route_realm: + return &bpf_get_route_realm_proto; default: return sk_filter_func_proto(func_id); } @@ -1633,6 +1699,9 @@ static bool __is_valid_access(int off, int size, enum bpf_access_type type) static bool sk_filter_is_valid_access(int off, int size, enum bpf_access_type type) { + if (off == offsetof(struct __sk_buff, tc_classid)) + return false; + if (type == BPF_WRITE) { switch (off) { case offsetof(struct __sk_buff, cb[0]) ... @@ -1649,10 +1718,14 @@ static bool sk_filter_is_valid_access(int off, int size, static bool tc_cls_act_is_valid_access(int off, int size, enum bpf_access_type type) { + if (off == offsetof(struct __sk_buff, tc_classid)) + return type == BPF_WRITE ? true : false; + if (type == BPF_WRITE) { switch (off) { case offsetof(struct __sk_buff, mark): case offsetof(struct __sk_buff, tc_index): + case offsetof(struct __sk_buff, priority): case offsetof(struct __sk_buff, cb[0]) ... offsetof(struct __sk_buff, cb[4]): break; @@ -1665,7 +1738,8 @@ static bool tc_cls_act_is_valid_access(int off, int size, static u32 bpf_net_convert_ctx_access(enum bpf_access_type type, int dst_reg, int src_reg, int ctx_off, - struct bpf_insn *insn_buf) + struct bpf_insn *insn_buf, + struct bpf_prog *prog) { struct bpf_insn *insn = insn_buf; @@ -1694,8 +1768,12 @@ static u32 bpf_net_convert_ctx_access(enum bpf_access_type type, int dst_reg, case offsetof(struct __sk_buff, priority): BUILD_BUG_ON(FIELD_SIZEOF(struct sk_buff, priority) != 4); - *insn++ = BPF_LDX_MEM(BPF_W, dst_reg, src_reg, - offsetof(struct sk_buff, priority)); + if (type == BPF_WRITE) + *insn++ = BPF_STX_MEM(BPF_W, dst_reg, src_reg, + offsetof(struct sk_buff, priority)); + else + *insn++ = BPF_LDX_MEM(BPF_W, dst_reg, src_reg, + offsetof(struct sk_buff, priority)); break; case offsetof(struct __sk_buff, ingress_ifindex): @@ -1752,6 +1830,7 @@ static u32 bpf_net_convert_ctx_access(enum bpf_access_type type, int dst_reg, offsetof(struct __sk_buff, cb[4]): BUILD_BUG_ON(FIELD_SIZEOF(struct qdisc_skb_cb, data) < 20); + prog->cb_access = 1; ctx_off -= offsetof(struct __sk_buff, cb[0]); ctx_off += offsetof(struct sk_buff, cb); ctx_off += offsetof(struct qdisc_skb_cb, data); @@ -1761,6 +1840,14 @@ static u32 bpf_net_convert_ctx_access(enum bpf_access_type type, int dst_reg, *insn++ = BPF_LDX_MEM(BPF_W, dst_reg, src_reg, ctx_off); break; + case offsetof(struct __sk_buff, tc_classid): + ctx_off -= offsetof(struct __sk_buff, tc_classid); + ctx_off += offsetof(struct sk_buff, cb); + ctx_off += offsetof(struct qdisc_skb_cb, tc_classid); + WARN_ON(type != BPF_WRITE); + *insn++ = BPF_STX_MEM(BPF_H, dst_reg, src_reg, ctx_off); + break; + case offsetof(struct __sk_buff, tc_index): #ifdef CONFIG_NET_SCHED BUILD_BUG_ON(FIELD_SIZEOF(struct sk_buff, tc_index) != 2); diff --git a/net/core/lwtunnel.c b/net/core/lwtunnel.c index dfb1a9ca0..299cfc24d 100644 --- a/net/core/lwtunnel.c +++ b/net/core/lwtunnel.c @@ -180,7 +180,7 @@ int lwtunnel_cmp_encap(struct lwtunnel_state *a, struct lwtunnel_state *b) } EXPORT_SYMBOL(lwtunnel_cmp_encap); -int lwtunnel_output(struct sock *sk, struct sk_buff *skb) +int lwtunnel_output(struct net *net, struct sock *sk, struct sk_buff *skb) { struct dst_entry *dst = skb_dst(skb); const struct lwtunnel_encap_ops *ops; @@ -199,7 +199,7 @@ int lwtunnel_output(struct sock *sk, struct sk_buff *skb) rcu_read_lock(); ops = rcu_dereference(lwtun_encaps[lwtstate->type]); if (likely(ops && ops->output)) - ret = ops->output(sk, skb); + ret = ops->output(net, sk, skb); rcu_read_unlock(); if (ret == -EOPNOTSUPP) diff --git a/net/core/neighbour.c b/net/core/neighbour.c index c169bba44..f18ae91b6 100644 --- a/net/core/neighbour.c +++ b/net/core/neighbour.c @@ -857,7 +857,7 @@ static void neigh_probe(struct neighbour *neigh) struct sk_buff *skb = skb_peek_tail(&neigh->arp_queue); /* keep skb alive even if arp_queue overflows */ if (skb) - skb = skb_copy(skb, GFP_ATOMIC); + skb = skb_clone(skb, GFP_ATOMIC); write_unlock(&neigh->lock); neigh->ops->solicit(neigh, skb); atomic_inc(&neigh->probes); @@ -2235,14 +2235,53 @@ static void neigh_update_notify(struct neighbour *neigh) __neigh_notify(neigh, RTM_NEWNEIGH, 0); } +static bool neigh_master_filtered(struct net_device *dev, int master_idx) +{ + struct net_device *master; + + if (!master_idx) + return false; + + master = netdev_master_upper_dev_get(dev); + if (!master || master->ifindex != master_idx) + return true; + + return false; +} + +static bool neigh_ifindex_filtered(struct net_device *dev, int filter_idx) +{ + if (filter_idx && dev->ifindex != filter_idx) + return true; + + return false; +} + static int neigh_dump_table(struct neigh_table *tbl, struct sk_buff *skb, struct netlink_callback *cb) { struct net *net = sock_net(skb->sk); + const struct nlmsghdr *nlh = cb->nlh; + struct nlattr *tb[NDA_MAX + 1]; struct neighbour *n; int rc, h, s_h = cb->args[1]; int idx, s_idx = idx = cb->args[2]; struct neigh_hash_table *nht; + int filter_master_idx = 0, filter_idx = 0; + unsigned int flags = NLM_F_MULTI; + int err; + + err = nlmsg_parse(nlh, sizeof(struct ndmsg), tb, NDA_MAX, NULL); + if (!err) { + if (tb[NDA_IFINDEX]) + filter_idx = nla_get_u32(tb[NDA_IFINDEX]); + + if (tb[NDA_MASTER]) + filter_master_idx = nla_get_u32(tb[NDA_MASTER]); + + if (filter_idx || filter_master_idx) + flags |= NLM_F_DUMP_FILTERED; + } rcu_read_lock_bh(); nht = rcu_dereference_bh(tbl->nht); @@ -2255,12 +2294,16 @@ static int neigh_dump_table(struct neigh_table *tbl, struct sk_buff *skb, n = rcu_dereference_bh(n->next)) { if (!net_eq(dev_net(n->dev), net)) continue; + if (neigh_ifindex_filtered(n->dev, filter_idx)) + continue; + if (neigh_master_filtered(n->dev, filter_master_idx)) + continue; if (idx < s_idx) goto next; if (neigh_fill_info(skb, n, NETLINK_CB(cb->skb).portid, cb->nlh->nlmsg_seq, RTM_NEWNEIGH, - NLM_F_MULTI) < 0) { + flags) < 0) { rc = -1; goto out; } diff --git a/net/core/net-sysfs.c b/net/core/net-sysfs.c index 830f8a7c1..f88a62ab0 100644 --- a/net/core/net-sysfs.c +++ b/net/core/net-sysfs.c @@ -471,7 +471,7 @@ static ssize_t phys_switch_id_show(struct device *dev, if (dev_isalive(netdev)) { struct switchdev_attr attr = { - .id = SWITCHDEV_ATTR_PORT_PARENT_ID, + .id = SWITCHDEV_ATTR_ID_PORT_PARENT_ID, .flags = SWITCHDEV_F_NO_RECURSE, }; @@ -1003,15 +1003,12 @@ static ssize_t show_trans_timeout(struct netdev_queue *queue, } #ifdef CONFIG_XPS -static inline unsigned int get_netdev_queue_index(struct netdev_queue *queue) +static unsigned int get_netdev_queue_index(struct netdev_queue *queue) { struct net_device *dev = queue->dev; - int i; - - for (i = 0; i < dev->num_tx_queues; i++) - if (queue == &dev->_tx[i]) - break; + unsigned int i; + i = queue - dev->_tx; BUG_ON(i >= dev->num_tx_queues); return i; diff --git a/net/core/netclassid_cgroup.c b/net/core/netclassid_cgroup.c index 6441f47b1..d9ee8d08a 100644 --- a/net/core/netclassid_cgroup.c +++ b/net/core/netclassid_cgroup.c @@ -56,7 +56,7 @@ static void cgrp_css_free(struct cgroup_subsys_state *css) kfree(css_cls_state(css)); } -static int update_classid(const void *v, struct file *file, unsigned n) +static int update_classid_sock(const void *v, struct file *file, unsigned n) { int err; struct socket *sock = sock_from_file(file, &err); @@ -67,18 +67,27 @@ static int update_classid(const void *v, struct file *file, unsigned n) return 0; } -static void cgrp_attach(struct cgroup_subsys_state *css, - struct cgroup_taskset *tset) +static void update_classid(struct cgroup_subsys_state *css, void *v) { - struct cgroup_cls_state *cs = css_cls_state(css); - void *v = (void *)(unsigned long)cs->classid; + struct css_task_iter it; struct task_struct *p; - cgroup_taskset_for_each(p, tset) { + css_task_iter_start(css, &it); + while ((p = css_task_iter_next(&it))) { task_lock(p); - iterate_fd(p->files, 0, update_classid, v); + iterate_fd(p->files, 0, update_classid_sock, v); task_unlock(p); } + css_task_iter_end(&it); +} + +static void cgrp_attach(struct cgroup_taskset *tset) +{ + struct cgroup_subsys_state *css; + + cgroup_taskset_first(tset, &css); + update_classid(css, + (void *)(unsigned long)css_cls_state(css)->classid); } static u64 read_classid(struct cgroup_subsys_state *css, struct cftype *cft) @@ -89,8 +98,11 @@ static u64 read_classid(struct cgroup_subsys_state *css, struct cftype *cft) static int write_classid(struct cgroup_subsys_state *css, struct cftype *cft, u64 value) { - css_cls_state(css)->classid = (u32) value; + struct cgroup_cls_state *cs = css_cls_state(css); + + cs->classid = (u32)value; + update_classid(css, (void *)(unsigned long)cs->classid); return 0; } diff --git a/net/core/netpoll.c b/net/core/netpoll.c index 8bdada242..94acfc89a 100644 --- a/net/core/netpoll.c +++ b/net/core/netpoll.c @@ -140,7 +140,7 @@ static void queue_process(struct work_struct *work) * case. Further, we test the poll_owner to avoid recursion on UP * systems where the lock doesn't exist. */ -static int poll_one_napi(struct napi_struct *napi, int budget) +static void poll_one_napi(struct napi_struct *napi) { int work = 0; @@ -149,33 +149,33 @@ static int poll_one_napi(struct napi_struct *napi, int budget) * holding the napi->poll_lock. */ if (!test_bit(NAPI_STATE_SCHED, &napi->state)) - return budget; + return; /* If we set this bit but see that it has already been set, * that indicates that napi has been disabled and we need * to abort this operation */ if (test_and_set_bit(NAPI_STATE_NPSVC, &napi->state)) - goto out; + return; - work = napi->poll(napi, budget); - WARN_ONCE(work > budget, "%pF exceeded budget in poll\n", napi->poll); + /* We explicilty pass the polling call a budget of 0 to + * indicate that we are clearing the Tx path only. + */ + work = napi->poll(napi, 0); + WARN_ONCE(work, "%pF exceeded budget in poll\n", napi->poll); trace_napi_poll(napi); clear_bit(NAPI_STATE_NPSVC, &napi->state); - -out: - return budget - work; } -static void poll_napi(struct net_device *dev, int budget) +static void poll_napi(struct net_device *dev) { struct napi_struct *napi; list_for_each_entry(napi, &dev->napi_list, dev_list) { if (napi->poll_owner != smp_processor_id() && spin_trylock(&napi->poll_lock)) { - budget = poll_one_napi(napi, budget); + poll_one_napi(napi); spin_unlock(&napi->poll_lock); } } @@ -185,7 +185,6 @@ static void netpoll_poll_dev(struct net_device *dev) { const struct net_device_ops *ops; struct netpoll_info *ni = rcu_dereference_bh(dev->npinfo); - int budget = 0; /* Don't do any rx activity if the dev_lock mutex is held * the dev_open/close paths use this to block netpoll activity @@ -208,7 +207,7 @@ static void netpoll_poll_dev(struct net_device *dev) /* Process pending work on NIC */ ops->ndo_poll_controller(dev); - poll_napi(dev, budget); + poll_napi(dev); up(&ni->dev_lock); diff --git a/net/core/netprio_cgroup.c b/net/core/netprio_cgroup.c index cbd0a199b..40fd09fe0 100644 --- a/net/core/netprio_cgroup.c +++ b/net/core/netprio_cgroup.c @@ -218,13 +218,14 @@ static int update_netprio(const void *v, struct file *file, unsigned n) return 0; } -static void net_prio_attach(struct cgroup_subsys_state *css, - struct cgroup_taskset *tset) +static void net_prio_attach(struct cgroup_taskset *tset) { struct task_struct *p; - void *v = (void *)(unsigned long)css->cgroup->id; + struct cgroup_subsys_state *css; + + cgroup_taskset_for_each(p, css, tset) { + void *v = (void *)(unsigned long)css->cgroup->id; - cgroup_taskset_for_each(p, tset) { task_lock(p); iterate_fd(p->files, 0, update_netprio, v); task_unlock(p); diff --git a/net/core/ptp_classifier.c b/net/core/ptp_classifier.c index 4eab4a94a..703cf76aa 100644 --- a/net/core/ptp_classifier.c +++ b/net/core/ptp_classifier.c @@ -58,7 +58,7 @@ * jneq #0x0, drop_ieee1588 ; for PTP_GEN_BIT and drop these * ldh [18] ; reload payload * and #0xf ; mask PTP_CLASS_VMASK - * or #0x70 ; PTP_CLASS_VLAN|PTP_CLASS_L2 + * or #0xc0 ; PTP_CLASS_VLAN|PTP_CLASS_L2 * ret a ; return PTP class * * ; PTP over UDP over IPv4 over 802.1Q over Ethernet @@ -73,7 +73,7 @@ * jneq #319, drop_8021q_ipv4 ; is port PTP_EV_PORT ? * ldh [x + 26] ; load payload * and #0xf ; mask PTP_CLASS_VMASK - * or #0x50 ; PTP_CLASS_VLAN|PTP_CLASS_IPV4 + * or #0x90 ; PTP_CLASS_VLAN|PTP_CLASS_IPV4 * ret a ; return PTP class * drop_8021q_ipv4: ret #0x0 ; PTP_CLASS_NONE * @@ -86,7 +86,7 @@ * jneq #319, drop_8021q_ipv6 ; is port PTP_EV_PORT ? * ldh [66] ; load payload * and #0xf ; mask PTP_CLASS_VMASK - * or #0x60 ; PTP_CLASS_VLAN|PTP_CLASS_IPV6 + * or #0xa0 ; PTP_CLASS_VLAN|PTP_CLASS_IPV6 * ret a ; return PTP class * drop_8021q_ipv6: ret #0x0 ; PTP_CLASS_NONE * @@ -98,7 +98,7 @@ * jneq #0x0, drop_ieee1588 ; for PTP_GEN_BIT and drop these * ldh [14] ; reload payload * and #0xf ; mask PTP_CLASS_VMASK - * or #0x30 ; PTP_CLASS_L2 + * or #0x40 ; PTP_CLASS_L2 * ret a ; return PTP class * drop_ieee1588: ret #0x0 ; PTP_CLASS_NONE */ @@ -150,7 +150,7 @@ void __init ptp_classifier_init(void) { 0x15, 0, 35, 0x00000000 }, { 0x28, 0, 0, 0x00000012 }, { 0x54, 0, 0, 0x0000000f }, - { 0x44, 0, 0, 0x00000070 }, + { 0x44, 0, 0, 0x000000c0 }, { 0x16, 0, 0, 0x00000000 }, { 0x15, 0, 12, 0x00000800 }, { 0x30, 0, 0, 0x0000001b }, @@ -162,7 +162,7 @@ void __init ptp_classifier_init(void) { 0x15, 0, 4, 0x0000013f }, { 0x48, 0, 0, 0x0000001a }, { 0x54, 0, 0, 0x0000000f }, - { 0x44, 0, 0, 0x00000050 }, + { 0x44, 0, 0, 0x00000090 }, { 0x16, 0, 0, 0x00000000 }, { 0x06, 0, 0, 0x00000000 }, { 0x15, 0, 8, 0x000086dd }, @@ -172,7 +172,7 @@ void __init ptp_classifier_init(void) { 0x15, 0, 4, 0x0000013f }, { 0x28, 0, 0, 0x00000042 }, { 0x54, 0, 0, 0x0000000f }, - { 0x44, 0, 0, 0x00000060 }, + { 0x44, 0, 0, 0x000000a0 }, { 0x16, 0, 0, 0x00000000 }, { 0x06, 0, 0, 0x00000000 }, { 0x15, 0, 7, 0x000088f7 }, @@ -181,7 +181,7 @@ void __init ptp_classifier_init(void) { 0x15, 0, 4, 0x00000000 }, { 0x28, 0, 0, 0x0000000e }, { 0x54, 0, 0, 0x0000000f }, - { 0x44, 0, 0, 0x00000030 }, + { 0x44, 0, 0, 0x00000040 }, { 0x16, 0, 0, 0x00000000 }, { 0x06, 0, 0, 0x00000000 }, }; diff --git a/net/core/request_sock.c b/net/core/request_sock.c index b42f0e26f..5d26056b6 100644 --- a/net/core/request_sock.c +++ b/net/core/request_sock.c @@ -37,90 +37,16 @@ int sysctl_max_syn_backlog = 256; EXPORT_SYMBOL(sysctl_max_syn_backlog); -int reqsk_queue_alloc(struct request_sock_queue *queue, - unsigned int nr_table_entries) +void reqsk_queue_alloc(struct request_sock_queue *queue) { - size_t lopt_size = sizeof(struct listen_sock); - struct listen_sock *lopt = NULL; + spin_lock_init(&queue->rskq_lock); - nr_table_entries = min_t(u32, nr_table_entries, sysctl_max_syn_backlog); - nr_table_entries = max_t(u32, nr_table_entries, 8); - nr_table_entries = roundup_pow_of_two(nr_table_entries + 1); - lopt_size += nr_table_entries * sizeof(struct request_sock *); + spin_lock_init(&queue->fastopenq.lock); + queue->fastopenq.rskq_rst_head = NULL; + queue->fastopenq.rskq_rst_tail = NULL; + queue->fastopenq.qlen = 0; - if (lopt_size <= (PAGE_SIZE << PAGE_ALLOC_COSTLY_ORDER)) - lopt = kzalloc(lopt_size, GFP_KERNEL | - __GFP_NOWARN | - __GFP_NORETRY); - if (!lopt) - lopt = vzalloc(lopt_size); - if (!lopt) - return -ENOMEM; - - get_random_bytes(&lopt->hash_rnd, sizeof(lopt->hash_rnd)); - spin_lock_init(&queue->syn_wait_lock); queue->rskq_accept_head = NULL; - lopt->nr_table_entries = nr_table_entries; - lopt->max_qlen_log = ilog2(nr_table_entries); - - spin_lock_bh(&queue->syn_wait_lock); - queue->listen_opt = lopt; - spin_unlock_bh(&queue->syn_wait_lock); - - return 0; -} - -void __reqsk_queue_destroy(struct request_sock_queue *queue) -{ - /* This is an error recovery path only, no locking needed */ - kvfree(queue->listen_opt); -} - -static inline struct listen_sock *reqsk_queue_yank_listen_sk( - struct request_sock_queue *queue) -{ - struct listen_sock *lopt; - - spin_lock_bh(&queue->syn_wait_lock); - lopt = queue->listen_opt; - queue->listen_opt = NULL; - spin_unlock_bh(&queue->syn_wait_lock); - - return lopt; -} - -void reqsk_queue_destroy(struct request_sock_queue *queue) -{ - /* make all the listen_opt local to us */ - struct listen_sock *lopt = reqsk_queue_yank_listen_sk(queue); - - if (listen_sock_qlen(lopt) != 0) { - unsigned int i; - - for (i = 0; i < lopt->nr_table_entries; i++) { - struct request_sock *req; - - spin_lock_bh(&queue->syn_wait_lock); - while ((req = lopt->syn_table[i]) != NULL) { - lopt->syn_table[i] = req->dl_next; - /* Because of following del_timer_sync(), - * we must release the spinlock here - * or risk a dead lock. - */ - spin_unlock_bh(&queue->syn_wait_lock); - atomic_inc(&lopt->qlen_dec); - if (del_timer_sync(&req->rsk_timer)) - reqsk_put(req); - reqsk_put(req); - spin_lock_bh(&queue->syn_wait_lock); - } - spin_unlock_bh(&queue->syn_wait_lock); - } - } - - if (WARN_ON(listen_sock_qlen(lopt) != 0)) - pr_err("qlen %u\n", listen_sock_qlen(lopt)); - kvfree(lopt); } /* @@ -174,7 +100,7 @@ void reqsk_fastopen_remove(struct sock *sk, struct request_sock *req, struct sock *lsk = req->rsk_listener; struct fastopen_queue *fastopenq; - fastopenq = inet_csk(lsk)->icsk_accept_queue.fastopenq; + fastopenq = &inet_csk(lsk)->icsk_accept_queue.fastopenq; tcp_sk(sk)->fastopen_rsk = NULL; spin_lock_bh(&fastopenq->lock); diff --git a/net/core/rtnetlink.c b/net/core/rtnetlink.c index 0ec48403e..34ba7a088 100644 --- a/net/core/rtnetlink.c +++ b/net/core/rtnetlink.c @@ -96,7 +96,7 @@ int rtnl_is_locked(void) EXPORT_SYMBOL(rtnl_is_locked); #ifdef CONFIG_PROVE_LOCKING -int lockdep_rtnl_is_held(void) +bool lockdep_rtnl_is_held(void) { return lockdep_is_held(&rtnl_mutex); } @@ -497,7 +497,8 @@ void rtnl_af_unregister(struct rtnl_af_ops *ops) } EXPORT_SYMBOL_GPL(rtnl_af_unregister); -static size_t rtnl_link_get_af_size(const struct net_device *dev) +static size_t rtnl_link_get_af_size(const struct net_device *dev, + u32 ext_filter_mask) { struct rtnl_af_ops *af_ops; size_t size; @@ -509,7 +510,7 @@ static size_t rtnl_link_get_af_size(const struct net_device *dev) if (af_ops->get_link_af_size) { /* AF_* + nested data */ size += nla_total_size(sizeof(struct nlattr)) + - af_ops->get_link_af_size(dev); + af_ops->get_link_af_size(dev, ext_filter_mask); } } @@ -837,7 +838,8 @@ static inline int rtnl_vfinfo_size(const struct net_device *dev, /* IFLA_VF_STATS_BROADCAST */ nla_total_size(sizeof(__u64)) + /* IFLA_VF_STATS_MULTICAST */ - nla_total_size(sizeof(__u64))); + nla_total_size(sizeof(__u64)) + + nla_total_size(sizeof(struct ifla_vf_trust))); return size; } else return 0; @@ -900,7 +902,7 @@ static noinline size_t if_nlmsg_size(const struct net_device *dev, + rtnl_vfinfo_size(dev, ext_filter_mask) /* IFLA_VFINFO_LIST */ + rtnl_port_size(dev, ext_filter_mask) /* IFLA_VF_PORTS + IFLA_PORT_SELF */ + rtnl_link_get_size(dev) /* IFLA_LINKINFO */ - + rtnl_link_get_af_size(dev) /* IFLA_AF_SPEC */ + + rtnl_link_get_af_size(dev, ext_filter_mask) /* IFLA_AF_SPEC */ + nla_total_size(MAX_PHYS_ITEM_ID_LEN) /* IFLA_PHYS_PORT_ID */ + nla_total_size(MAX_PHYS_ITEM_ID_LEN) /* IFLA_PHYS_SWITCH_ID */ + nla_total_size(1); /* IFLA_PROTO_DOWN */ @@ -1025,7 +1027,7 @@ static int rtnl_phys_switch_id_fill(struct sk_buff *skb, struct net_device *dev) { int err; struct switchdev_attr attr = { - .id = SWITCHDEV_ATTR_PORT_PARENT_ID, + .id = SWITCHDEV_ATTR_ID_PORT_PARENT_ID, .flags = SWITCHDEV_F_NO_RECURSE, }; @@ -1043,15 +1045,156 @@ static int rtnl_phys_switch_id_fill(struct sk_buff *skb, struct net_device *dev) return 0; } +static noinline_for_stack int rtnl_fill_stats(struct sk_buff *skb, + struct net_device *dev) +{ + const struct rtnl_link_stats64 *stats; + struct rtnl_link_stats64 temp; + struct nlattr *attr; + + stats = dev_get_stats(dev, &temp); + + attr = nla_reserve(skb, IFLA_STATS, + sizeof(struct rtnl_link_stats)); + if (!attr) + return -EMSGSIZE; + + copy_rtnl_link_stats(nla_data(attr), stats); + + attr = nla_reserve(skb, IFLA_STATS64, + sizeof(struct rtnl_link_stats64)); + if (!attr) + return -EMSGSIZE; + + copy_rtnl_link_stats64(nla_data(attr), stats); + + return 0; +} + +static noinline_for_stack int rtnl_fill_vfinfo(struct sk_buff *skb, + struct net_device *dev, + int vfs_num, + struct nlattr *vfinfo) +{ + struct ifla_vf_rss_query_en vf_rss_query_en; + struct ifla_vf_link_state vf_linkstate; + struct ifla_vf_spoofchk vf_spoofchk; + struct ifla_vf_tx_rate vf_tx_rate; + struct ifla_vf_stats vf_stats; + struct ifla_vf_trust vf_trust; + struct ifla_vf_vlan vf_vlan; + struct ifla_vf_rate vf_rate; + struct nlattr *vf, *vfstats; + struct ifla_vf_mac vf_mac; + struct ifla_vf_info ivi; + + /* Not all SR-IOV capable drivers support the + * spoofcheck and "RSS query enable" query. Preset to + * -1 so the user space tool can detect that the driver + * didn't report anything. + */ + ivi.spoofchk = -1; + ivi.rss_query_en = -1; + ivi.trusted = -1; + memset(ivi.mac, 0, sizeof(ivi.mac)); + /* The default value for VF link state is "auto" + * IFLA_VF_LINK_STATE_AUTO which equals zero + */ + ivi.linkstate = 0; + if (dev->netdev_ops->ndo_get_vf_config(dev, vfs_num, &ivi)) + return 0; + + vf_mac.vf = + vf_vlan.vf = + vf_rate.vf = + vf_tx_rate.vf = + vf_spoofchk.vf = + vf_linkstate.vf = + vf_rss_query_en.vf = + vf_trust.vf = ivi.vf; + + memcpy(vf_mac.mac, ivi.mac, sizeof(ivi.mac)); + vf_vlan.vlan = ivi.vlan; + vf_vlan.qos = ivi.qos; + vf_tx_rate.rate = ivi.max_tx_rate; + vf_rate.min_tx_rate = ivi.min_tx_rate; + vf_rate.max_tx_rate = ivi.max_tx_rate; + vf_spoofchk.setting = ivi.spoofchk; + vf_linkstate.link_state = ivi.linkstate; + vf_rss_query_en.setting = ivi.rss_query_en; + vf_trust.setting = ivi.trusted; + vf = nla_nest_start(skb, IFLA_VF_INFO); + if (!vf) { + nla_nest_cancel(skb, vfinfo); + return -EMSGSIZE; + } + if (nla_put(skb, IFLA_VF_MAC, sizeof(vf_mac), &vf_mac) || + nla_put(skb, IFLA_VF_VLAN, sizeof(vf_vlan), &vf_vlan) || + nla_put(skb, IFLA_VF_RATE, sizeof(vf_rate), + &vf_rate) || + nla_put(skb, IFLA_VF_TX_RATE, sizeof(vf_tx_rate), + &vf_tx_rate) || + nla_put(skb, IFLA_VF_SPOOFCHK, sizeof(vf_spoofchk), + &vf_spoofchk) || + nla_put(skb, IFLA_VF_LINK_STATE, sizeof(vf_linkstate), + &vf_linkstate) || + nla_put(skb, IFLA_VF_RSS_QUERY_EN, + sizeof(vf_rss_query_en), + &vf_rss_query_en) || + nla_put(skb, IFLA_VF_TRUST, + sizeof(vf_trust), &vf_trust)) + return -EMSGSIZE; + memset(&vf_stats, 0, sizeof(vf_stats)); + if (dev->netdev_ops->ndo_get_vf_stats) + dev->netdev_ops->ndo_get_vf_stats(dev, vfs_num, + &vf_stats); + vfstats = nla_nest_start(skb, IFLA_VF_STATS); + if (!vfstats) { + nla_nest_cancel(skb, vf); + nla_nest_cancel(skb, vfinfo); + return -EMSGSIZE; + } + if (nla_put_u64(skb, IFLA_VF_STATS_RX_PACKETS, + vf_stats.rx_packets) || + nla_put_u64(skb, IFLA_VF_STATS_TX_PACKETS, + vf_stats.tx_packets) || + nla_put_u64(skb, IFLA_VF_STATS_RX_BYTES, + vf_stats.rx_bytes) || + nla_put_u64(skb, IFLA_VF_STATS_TX_BYTES, + vf_stats.tx_bytes) || + nla_put_u64(skb, IFLA_VF_STATS_BROADCAST, + vf_stats.broadcast) || + nla_put_u64(skb, IFLA_VF_STATS_MULTICAST, + vf_stats.multicast)) + return -EMSGSIZE; + nla_nest_end(skb, vfstats); + nla_nest_end(skb, vf); + return 0; +} + +static int rtnl_fill_link_ifmap(struct sk_buff *skb, struct net_device *dev) +{ + struct rtnl_link_ifmap map = { + .mem_start = dev->mem_start, + .mem_end = dev->mem_end, + .base_addr = dev->base_addr, + .irq = dev->irq, + .dma = dev->dma, + .port = dev->if_port, + }; + if (nla_put(skb, IFLA_MAP, sizeof(map), &map)) + return -EMSGSIZE; + + return 0; +} + static int rtnl_fill_ifinfo(struct sk_buff *skb, struct net_device *dev, int type, u32 pid, u32 seq, u32 change, unsigned int flags, u32 ext_filter_mask) { struct ifinfomsg *ifm; struct nlmsghdr *nlh; - struct rtnl_link_stats64 temp; - const struct rtnl_link_stats64 *stats; - struct nlattr *attr, *af_spec; + struct nlattr *af_spec; struct rtnl_af_ops *af_ops; struct net_device *upper_dev = netdev_master_upper_dev_get(dev); @@ -1094,18 +1237,8 @@ static int rtnl_fill_ifinfo(struct sk_buff *skb, struct net_device *dev, nla_put_u8(skb, IFLA_PROTO_DOWN, dev->proto_down)) goto nla_put_failure; - if (1) { - struct rtnl_link_ifmap map = { - .mem_start = dev->mem_start, - .mem_end = dev->mem_end, - .base_addr = dev->base_addr, - .irq = dev->irq, - .dma = dev->dma, - .port = dev->if_port, - }; - if (nla_put(skb, IFLA_MAP, sizeof(map), &map)) - goto nla_put_failure; - } + if (rtnl_fill_link_ifmap(skb, dev)) + goto nla_put_failure; if (dev->addr_len) { if (nla_put(skb, IFLA_ADDRESS, dev->addr_len, dev->dev_addr) || @@ -1122,122 +1255,27 @@ static int rtnl_fill_ifinfo(struct sk_buff *skb, struct net_device *dev, if (rtnl_phys_switch_id_fill(skb, dev)) goto nla_put_failure; - attr = nla_reserve(skb, IFLA_STATS, - sizeof(struct rtnl_link_stats)); - if (attr == NULL) + if (rtnl_fill_stats(skb, dev)) goto nla_put_failure; - stats = dev_get_stats(dev, &temp); - copy_rtnl_link_stats(nla_data(attr), stats); - - attr = nla_reserve(skb, IFLA_STATS64, - sizeof(struct rtnl_link_stats64)); - if (attr == NULL) - goto nla_put_failure; - copy_rtnl_link_stats64(nla_data(attr), stats); - if (dev->dev.parent && (ext_filter_mask & RTEXT_FILTER_VF) && nla_put_u32(skb, IFLA_NUM_VF, dev_num_vf(dev->dev.parent))) goto nla_put_failure; - if (dev->netdev_ops->ndo_get_vf_config && dev->dev.parent - && (ext_filter_mask & RTEXT_FILTER_VF)) { + if (dev->netdev_ops->ndo_get_vf_config && dev->dev.parent && + ext_filter_mask & RTEXT_FILTER_VF) { int i; - - struct nlattr *vfinfo, *vf, *vfstats; + struct nlattr *vfinfo; int num_vfs = dev_num_vf(dev->dev.parent); vfinfo = nla_nest_start(skb, IFLA_VFINFO_LIST); if (!vfinfo) goto nla_put_failure; for (i = 0; i < num_vfs; i++) { - struct ifla_vf_info ivi; - struct ifla_vf_mac vf_mac; - struct ifla_vf_vlan vf_vlan; - struct ifla_vf_rate vf_rate; - struct ifla_vf_tx_rate vf_tx_rate; - struct ifla_vf_spoofchk vf_spoofchk; - struct ifla_vf_link_state vf_linkstate; - struct ifla_vf_rss_query_en vf_rss_query_en; - struct ifla_vf_stats vf_stats; - - /* - * Not all SR-IOV capable drivers support the - * spoofcheck and "RSS query enable" query. Preset to - * -1 so the user space tool can detect that the driver - * didn't report anything. - */ - ivi.spoofchk = -1; - ivi.rss_query_en = -1; - memset(ivi.mac, 0, sizeof(ivi.mac)); - /* The default value for VF link state is "auto" - * IFLA_VF_LINK_STATE_AUTO which equals zero - */ - ivi.linkstate = 0; - if (dev->netdev_ops->ndo_get_vf_config(dev, i, &ivi)) - break; - vf_mac.vf = - vf_vlan.vf = - vf_rate.vf = - vf_tx_rate.vf = - vf_spoofchk.vf = - vf_linkstate.vf = - vf_rss_query_en.vf = ivi.vf; - - memcpy(vf_mac.mac, ivi.mac, sizeof(ivi.mac)); - vf_vlan.vlan = ivi.vlan; - vf_vlan.qos = ivi.qos; - vf_tx_rate.rate = ivi.max_tx_rate; - vf_rate.min_tx_rate = ivi.min_tx_rate; - vf_rate.max_tx_rate = ivi.max_tx_rate; - vf_spoofchk.setting = ivi.spoofchk; - vf_linkstate.link_state = ivi.linkstate; - vf_rss_query_en.setting = ivi.rss_query_en; - vf = nla_nest_start(skb, IFLA_VF_INFO); - if (!vf) { - nla_nest_cancel(skb, vfinfo); - goto nla_put_failure; - } - if (nla_put(skb, IFLA_VF_MAC, sizeof(vf_mac), &vf_mac) || - nla_put(skb, IFLA_VF_VLAN, sizeof(vf_vlan), &vf_vlan) || - nla_put(skb, IFLA_VF_RATE, sizeof(vf_rate), - &vf_rate) || - nla_put(skb, IFLA_VF_TX_RATE, sizeof(vf_tx_rate), - &vf_tx_rate) || - nla_put(skb, IFLA_VF_SPOOFCHK, sizeof(vf_spoofchk), - &vf_spoofchk) || - nla_put(skb, IFLA_VF_LINK_STATE, sizeof(vf_linkstate), - &vf_linkstate) || - nla_put(skb, IFLA_VF_RSS_QUERY_EN, - sizeof(vf_rss_query_en), - &vf_rss_query_en)) - goto nla_put_failure; - memset(&vf_stats, 0, sizeof(vf_stats)); - if (dev->netdev_ops->ndo_get_vf_stats) - dev->netdev_ops->ndo_get_vf_stats(dev, i, - &vf_stats); - vfstats = nla_nest_start(skb, IFLA_VF_STATS); - if (!vfstats) { - nla_nest_cancel(skb, vf); - nla_nest_cancel(skb, vfinfo); - goto nla_put_failure; - } - if (nla_put_u64(skb, IFLA_VF_STATS_RX_PACKETS, - vf_stats.rx_packets) || - nla_put_u64(skb, IFLA_VF_STATS_TX_PACKETS, - vf_stats.tx_packets) || - nla_put_u64(skb, IFLA_VF_STATS_RX_BYTES, - vf_stats.rx_bytes) || - nla_put_u64(skb, IFLA_VF_STATS_TX_BYTES, - vf_stats.tx_bytes) || - nla_put_u64(skb, IFLA_VF_STATS_BROADCAST, - vf_stats.broadcast) || - nla_put_u64(skb, IFLA_VF_STATS_MULTICAST, - vf_stats.multicast)) + if (rtnl_fill_vfinfo(skb, dev, i, vfinfo)) goto nla_put_failure; - nla_nest_end(skb, vfstats); - nla_nest_end(skb, vf); } + nla_nest_end(skb, vfinfo); } @@ -1272,7 +1310,7 @@ static int rtnl_fill_ifinfo(struct sk_buff *skb, struct net_device *dev, if (!(af = nla_nest_start(skb, af_ops->family))) goto nla_put_failure; - err = af_ops->fill_link_af(skb, dev); + err = af_ops->fill_link_af(skb, dev, ext_filter_mask); /* * Caller may return ENODATA to indicate that there @@ -1347,6 +1385,7 @@ static const struct nla_policy ifla_vf_policy[IFLA_VF_MAX+1] = { [IFLA_VF_LINK_STATE] = { .len = sizeof(struct ifla_vf_link_state) }, [IFLA_VF_RSS_QUERY_EN] = { .len = sizeof(struct ifla_vf_rss_query_en) }, [IFLA_VF_STATS] = { .type = NLA_NESTED }, + [IFLA_VF_TRUST] = { .len = sizeof(struct ifla_vf_trust) }, }; static const struct nla_policy ifla_vf_stats_policy[IFLA_VF_STATS_MAX + 1] = { @@ -1586,6 +1625,16 @@ static int do_setvfinfo(struct net_device *dev, struct nlattr **tb) return err; } + if (tb[IFLA_VF_TRUST]) { + struct ifla_vf_trust *ivt = nla_data(tb[IFLA_VF_TRUST]); + + err = -EOPNOTSUPP; + if (ops->ndo_set_vf_trust) + err = ops->ndo_set_vf_trust(dev, ivt->vf, ivt->setting); + if (err < 0) + return err; + } + return err; } @@ -3443,4 +3492,3 @@ void __init rtnetlink_init(void) rtnl_register(PF_BRIDGE, RTM_DELLINK, rtnl_bridge_dellink, NULL, NULL); rtnl_register(PF_BRIDGE, RTM_SETLINK, rtnl_bridge_setlink, NULL, NULL); } - diff --git a/net/core/skbuff.c b/net/core/skbuff.c index fab4599ba..b2df375ec 100644 --- a/net/core/skbuff.c +++ b/net/core/skbuff.c @@ -414,7 +414,7 @@ struct sk_buff *__netdev_alloc_skb(struct net_device *dev, unsigned int len, len += NET_SKB_PAD; if ((len > SKB_WITH_OVERHEAD(PAGE_SIZE)) || - (gfp_mask & (__GFP_WAIT | GFP_DMA))) { + (gfp_mask & (__GFP_DIRECT_RECLAIM | GFP_DMA))) { skb = __alloc_skb(len, gfp_mask, SKB_ALLOC_RX, NUMA_NO_NODE); if (!skb) goto skb_fail; @@ -481,7 +481,7 @@ struct sk_buff *__napi_alloc_skb(struct napi_struct *napi, unsigned int len, len += NET_SKB_PAD + NET_IP_ALIGN; if ((len > SKB_WITH_OVERHEAD(PAGE_SIZE)) || - (gfp_mask & (__GFP_WAIT | GFP_DMA))) { + (gfp_mask & (__GFP_DIRECT_RECLAIM | GFP_DMA))) { skb = __alloc_skb(len, gfp_mask, SKB_ALLOC_RX, NUMA_NO_NODE); if (!skb) goto skb_fail; @@ -3643,7 +3643,8 @@ static void __skb_complete_tx_timestamp(struct sk_buff *skb, serr->ee.ee_info = tstype; if (sk->sk_tsflags & SOF_TIMESTAMPING_OPT_ID) { serr->ee.ee_data = skb_shinfo(skb)->tskey; - if (sk->sk_protocol == IPPROTO_TCP) + if (sk->sk_protocol == IPPROTO_TCP && + sk->sk_type == SOCK_STREAM) serr->ee.ee_data -= sk->sk_tskey; } @@ -4268,7 +4269,8 @@ static struct sk_buff *skb_reorder_vlan_header(struct sk_buff *skb) return NULL; } - memmove(skb->data - ETH_HLEN, skb->data - VLAN_ETH_HLEN, 2 * ETH_ALEN); + memmove(skb->data - ETH_HLEN, skb->data - skb->mac_len - VLAN_HLEN, + 2 * ETH_ALEN); skb->mac_header += VLAN_HLEN; return skb; } @@ -4452,7 +4454,7 @@ struct sk_buff *alloc_skb_with_frags(unsigned long header_len, return NULL; gfp_head = gfp_mask; - if (gfp_head & __GFP_WAIT) + if (gfp_head & __GFP_DIRECT_RECLAIM) gfp_head |= __GFP_REPEAT; *errcode = -ENOBUFS; @@ -4467,7 +4469,7 @@ struct sk_buff *alloc_skb_with_frags(unsigned long header_len, while (order) { if (npages >= 1 << order) { - page = alloc_pages((gfp_mask & ~__GFP_WAIT) | + page = alloc_pages((gfp_mask & ~__GFP_DIRECT_RECLAIM) | __GFP_COMP | __GFP_NOWARN | __GFP_NORETRY, diff --git a/net/core/sock.c b/net/core/sock.c index 3307c0224..0d91f7dca 100644 --- a/net/core/sock.c +++ b/net/core/sock.c @@ -422,13 +422,23 @@ static void sock_warn_obsolete_bsdism(const char *name) } } -#define SK_FLAGS_TIMESTAMP ((1UL << SOCK_TIMESTAMP) | (1UL << SOCK_TIMESTAMPING_RX_SOFTWARE)) +static bool sock_needs_netstamp(const struct sock *sk) +{ + switch (sk->sk_family) { + case AF_UNSPEC: + case AF_UNIX: + return false; + default: + return true; + } +} static void sock_disable_timestamp(struct sock *sk, unsigned long flags) { if (sk->sk_flags & flags) { sk->sk_flags &= ~flags; - if (!(sk->sk_flags & SK_FLAGS_TIMESTAMP)) + if (sock_needs_netstamp(sk) && + !(sk->sk_flags & SK_FLAGS_TIMESTAMP)) net_disable_timestamp(); } } @@ -862,7 +872,8 @@ set_rcvbuf: if (val & SOF_TIMESTAMPING_OPT_ID && !(sk->sk_tsflags & SOF_TIMESTAMPING_OPT_ID)) { - if (sk->sk_protocol == IPPROTO_TCP) { + if (sk->sk_protocol == IPPROTO_TCP && + sk->sk_type == SOCK_STREAM) { if (sk->sk_state != TCP_ESTABLISHED) { ret = -EINVAL; break; @@ -988,6 +999,10 @@ set_rcvbuf: sk->sk_max_pacing_rate); break; + case SO_INCOMING_CPU: + sk->sk_incoming_cpu = val; + break; + default: ret = -ENOPROTOOPT; break; @@ -1514,7 +1529,6 @@ struct sock *sk_clone_lock(const struct sock *sk, const gfp_t priority) skb_queue_head_init(&newsk->sk_receive_queue); skb_queue_head_init(&newsk->sk_write_queue); - spin_lock_init(&newsk->sk_dst_lock); rwlock_init(&newsk->sk_callback_lock); lockdep_set_class_and_name(&newsk->sk_callback_lock, af_callback_keys + newsk->sk_family, @@ -1537,7 +1551,7 @@ struct sock *sk_clone_lock(const struct sock *sk, const gfp_t priority) */ is_charged = sk_filter_charge(newsk, filter); - if (unlikely(!is_charged || xfrm_sk_clone_policy(newsk))) { + if (unlikely(!is_charged || xfrm_sk_clone_policy(newsk, sk))) { /* It is still raw copy of parent, so invalidate * destructor and make plain sk_free() */ newsk->sk_destruct = NULL; @@ -1578,7 +1592,8 @@ struct sock *sk_clone_lock(const struct sock *sk, const gfp_t priority) if (newsk->sk_prot->sockets_allocated) sk_sockets_allocated_inc(newsk); - if (newsk->sk_flags & SK_FLAGS_TIMESTAMP) + if (sock_needs_netstamp(sk) && + newsk->sk_flags & SK_FLAGS_TIMESTAMP) net_enable_timestamp(); } out: @@ -1590,7 +1605,7 @@ void sk_setup_caps(struct sock *sk, struct dst_entry *dst) { u32 max_segs = 1; - __sk_dst_set(sk, dst); + sk_dst_set(sk, dst); sk->sk_route_caps = dst->dev->features; if (sk->sk_route_caps & NETIF_F_GSO) sk->sk_route_caps |= NETIF_F_GSO_SOFTWARE; @@ -1639,6 +1654,28 @@ void sock_wfree(struct sk_buff *skb) } EXPORT_SYMBOL(sock_wfree); +void skb_set_owner_w(struct sk_buff *skb, struct sock *sk) +{ + skb_orphan(skb); + skb->sk = sk; +#ifdef CONFIG_INET + if (unlikely(!sk_fullsock(sk))) { + skb->destructor = sock_edemux; + sock_hold(sk); + return; + } +#endif + skb->destructor = sock_wfree; + skb_set_hash_from_sk(skb, sk); + /* + * We used to take a refcount on sk, but following operation + * is enough to guarantee sk_free() wont free this sock until + * all in-flight packets are completed + */ + atomic_add(skb->truesize, &sk->sk_wmem_alloc); +} +EXPORT_SYMBOL(skb_set_owner_w); + void skb_orphan_partial(struct sk_buff *skb) { /* TCP stack sets skb->ooo_okay based on sk_wmem_alloc, @@ -1776,7 +1813,7 @@ static long sock_wait_for_wmem(struct sock *sk, long timeo) { DEFINE_WAIT(wait); - clear_bit(SOCK_ASYNC_NOSPACE, &sk->sk_socket->flags); + sk_clear_bit(SOCKWQ_ASYNC_NOSPACE, sk); for (;;) { if (!timeo) break; @@ -1822,7 +1859,7 @@ struct sk_buff *sock_alloc_send_pskb(struct sock *sk, unsigned long header_len, if (sk_wmem_alloc_get(sk) < sk->sk_sndbuf) break; - set_bit(SOCK_ASYNC_NOSPACE, &sk->sk_socket->flags); + sk_set_bit(SOCKWQ_ASYNC_NOSPACE, sk); set_bit(SOCK_NOSPACE, &sk->sk_socket->flags); err = -EAGAIN; if (!timeo) @@ -1852,6 +1889,32 @@ struct sk_buff *sock_alloc_send_skb(struct sock *sk, unsigned long size, } EXPORT_SYMBOL(sock_alloc_send_skb); +int sock_cmsg_send(struct sock *sk, struct msghdr *msg, + struct sockcm_cookie *sockc) +{ + struct cmsghdr *cmsg; + + for_each_cmsghdr(cmsg, msg) { + if (!CMSG_OK(msg, cmsg)) + return -EINVAL; + if (cmsg->cmsg_level != SOL_SOCKET) + continue; + switch (cmsg->cmsg_type) { + case SO_MARK: + if (!ns_capable(sock_net(sk)->user_ns, CAP_NET_ADMIN)) + return -EPERM; + if (cmsg->cmsg_len != CMSG_LEN(sizeof(u32))) + return -EINVAL; + sockc->mark = *(u32 *)CMSG_DATA(cmsg); + break; + default: + return -EINVAL; + } + } + return 0; +} +EXPORT_SYMBOL(sock_cmsg_send); + /* On 32bit arches, an skb frag is limited to 2^15 */ #define SKB_FRAG_PAGE_ORDER get_order(32768) @@ -1879,8 +1942,10 @@ bool skb_page_frag_refill(unsigned int sz, struct page_frag *pfrag, gfp_t gfp) pfrag->offset = 0; if (SKB_FRAG_PAGE_ORDER) { - pfrag->page = alloc_pages((gfp & ~__GFP_WAIT) | __GFP_COMP | - __GFP_NOWARN | __GFP_NORETRY, + /* Avoid direct reclaim but allow kswapd to wake */ + pfrag->page = alloc_pages((gfp & ~__GFP_DIRECT_RECLAIM) | + __GFP_COMP | __GFP_NOWARN | + __GFP_NORETRY, SKB_FRAG_PAGE_ORDER); if (likely(pfrag->page)) { pfrag->size = PAGE_SIZE << SKB_FRAG_PAGE_ORDER; @@ -1981,9 +2046,9 @@ int sk_wait_data(struct sock *sk, long *timeo, const struct sk_buff *skb) DEFINE_WAIT(wait); prepare_to_wait(sk_sleep(sk), &wait, TASK_INTERRUPTIBLE); - set_bit(SOCK_ASYNC_WAITDATA, &sk->sk_socket->flags); + sk_set_bit(SOCKWQ_ASYNC_WAITDATA, sk); rc = sk_wait_event(sk, timeo, skb_peek_tail(&sk->sk_receive_queue) != skb); - clear_bit(SOCK_ASYNC_WAITDATA, &sk->sk_socket->flags); + sk_clear_bit(SOCKWQ_ASYNC_WAITDATA, sk); finish_wait(sk_sleep(sk), &wait); return rc; } @@ -2321,7 +2386,6 @@ void sock_init_data(struct socket *sock, struct sock *sk) } else sk->sk_wq = NULL; - spin_lock_init(&sk->sk_dst_lock); rwlock_init(&sk->sk_callback_lock); lockdep_set_class_and_name(&sk->sk_callback_lock, af_callback_keys + sk->sk_family, @@ -2353,6 +2417,7 @@ void sock_init_data(struct socket *sock, struct sock *sk) sk->sk_max_pacing_rate = ~0U; sk->sk_pacing_rate = ~0U; + sk->sk_incoming_cpu = -1; /* * Before updating sk_refcnt, we must commit prior changes to memory * (Documentation/RCU/rculist_nulls.txt for details) @@ -2479,7 +2544,8 @@ void sock_enable_timestamp(struct sock *sk, int flag) * time stamping, but time stamping might have been on * already because of the other one */ - if (!(previous_flags & SK_FLAGS_TIMESTAMP)) + if (sock_needs_netstamp(sk) && + !(previous_flags & SK_FLAGS_TIMESTAMP)) net_enable_timestamp(); } } @@ -2758,7 +2824,7 @@ static int req_prot_init(const struct proto *prot) rsk_prot->slab = kmem_cache_create(rsk_prot->slab_name, rsk_prot->obj_size, 0, - 0, NULL); + prot->slab_flags, NULL); if (!rsk_prot->slab) { pr_crit("%s: Can't create request sock SLAB cache!\n", diff --git a/net/core/sock_diag.c b/net/core/sock_diag.c index 817622f3d..0c1d58d43 100644 --- a/net/core/sock_diag.c +++ b/net/core/sock_diag.c @@ -1,3 +1,5 @@ +/* License: GPL */ + #include <linux/mutex.h> #include <linux/socket.h> #include <linux/skbuff.h> @@ -323,14 +325,4 @@ static int __init sock_diag_init(void) BUG_ON(!broadcast_wq); return register_pernet_subsys(&diag_net_ops); } - -static void __exit sock_diag_exit(void) -{ - unregister_pernet_subsys(&diag_net_ops); - destroy_workqueue(broadcast_wq); -} - -module_init(sock_diag_init); -module_exit(sock_diag_exit); -MODULE_LICENSE("GPL"); -MODULE_ALIAS_NET_PF_PROTO(PF_NETLINK, NETLINK_SOCK_DIAG); +device_initcall(sock_diag_init); diff --git a/net/core/stream.c b/net/core/stream.c index d70f77a0c..b96f7a79e 100644 --- a/net/core/stream.c +++ b/net/core/stream.c @@ -39,7 +39,7 @@ void sk_stream_write_space(struct sock *sk) wake_up_interruptible_poll(&wq->wait, POLLOUT | POLLWRNORM | POLLWRBAND); if (wq && wq->fasync_list && !(sk->sk_shutdown & SEND_SHUTDOWN)) - sock_wake_async(sock, SOCK_WAKE_SPACE, POLL_OUT); + sock_wake_async(wq, SOCK_WAKE_SPACE, POLL_OUT); rcu_read_unlock(); } } @@ -126,7 +126,7 @@ int sk_stream_wait_memory(struct sock *sk, long *timeo_p) current_timeo = vm_wait = (prandom_u32() % (HZ / 5)) + 2; while (1) { - set_bit(SOCK_ASYNC_NOSPACE, &sk->sk_socket->flags); + sk_set_bit(SOCKWQ_ASYNC_NOSPACE, sk); prepare_to_wait(sk_sleep(sk), &wait, TASK_INTERRUPTIBLE); @@ -139,7 +139,7 @@ int sk_stream_wait_memory(struct sock *sk, long *timeo_p) } if (signal_pending(current)) goto do_interrupted; - clear_bit(SOCK_ASYNC_NOSPACE, &sk->sk_socket->flags); + sk_clear_bit(SOCKWQ_ASYNC_NOSPACE, sk); if (sk_stream_memory_free(sk) && !vm_wait) break; diff --git a/net/core/tso.c b/net/core/tso.c index 630b30b4f..5dca7ce8e 100644 --- a/net/core/tso.c +++ b/net/core/tso.c @@ -1,4 +1,5 @@ #include <linux/export.h> +#include <linux/if_vlan.h> #include <net/ip.h> #include <net/tso.h> #include <asm/unaligned.h> @@ -14,18 +15,24 @@ EXPORT_SYMBOL(tso_count_descs); void tso_build_hdr(struct sk_buff *skb, char *hdr, struct tso_t *tso, int size, bool is_last) { - struct iphdr *iph; struct tcphdr *tcph; int hdr_len = skb_transport_offset(skb) + tcp_hdrlen(skb); int mac_hdr_len = skb_network_offset(skb); memcpy(hdr, skb->data, hdr_len); - iph = (struct iphdr *)(hdr + mac_hdr_len); - iph->id = htons(tso->ip_id); - iph->tot_len = htons(size + hdr_len - mac_hdr_len); + if (!tso->ipv6) { + struct iphdr *iph = (void *)(hdr + mac_hdr_len); + + iph->id = htons(tso->ip_id); + iph->tot_len = htons(size + hdr_len - mac_hdr_len); + tso->ip_id++; + } else { + struct ipv6hdr *iph = (void *)(hdr + mac_hdr_len); + + iph->payload_len = htons(size + tcp_hdrlen(skb)); + } tcph = (struct tcphdr *)(hdr + skb_transport_offset(skb)); put_unaligned_be32(tso->tcp_seq, &tcph->seq); - tso->ip_id++; if (!is_last) { /* Clear all special flags for not last packet */ @@ -61,6 +68,7 @@ void tso_start(struct sk_buff *skb, struct tso_t *tso) tso->ip_id = ntohs(ip_hdr(skb)->id); tso->tcp_seq = ntohl(tcp_hdr(skb)->seq); tso->next_frag_idx = 0; + tso->ipv6 = vlan_get_protocol(skb) == htons(ETH_P_IPV6); /* Build first data */ tso->size = skb_headlen(skb) - hdr_len; diff --git a/net/core/utils.c b/net/core/utils.c index 3dffce953..3d17ca8b4 100644 --- a/net/core/utils.c +++ b/net/core/utils.c @@ -348,52 +348,3 @@ void inet_proto_csum_replace_by_diff(__sum16 *sum, struct sk_buff *skb, } } EXPORT_SYMBOL(inet_proto_csum_replace_by_diff); - -struct __net_random_once_work { - struct work_struct work; - struct static_key *key; -}; - -static void __net_random_once_deferred(struct work_struct *w) -{ - struct __net_random_once_work *work = - container_of(w, struct __net_random_once_work, work); - BUG_ON(!static_key_enabled(work->key)); - static_key_slow_dec(work->key); - kfree(work); -} - -static void __net_random_once_disable_jump(struct static_key *key) -{ - struct __net_random_once_work *w; - - w = kmalloc(sizeof(*w), GFP_ATOMIC); - if (!w) - return; - - INIT_WORK(&w->work, __net_random_once_deferred); - w->key = key; - schedule_work(&w->work); -} - -bool __net_get_random_once(void *buf, int nbytes, bool *done, - struct static_key *once_key) -{ - static DEFINE_SPINLOCK(lock); - unsigned long flags; - - spin_lock_irqsave(&lock, flags); - if (*done) { - spin_unlock_irqrestore(&lock, flags); - return false; - } - - get_random_bytes(buf, nbytes); - *done = true; - spin_unlock_irqrestore(&lock, flags); - - __net_random_once_disable_jump(once_key); - - return true; -} -EXPORT_SYMBOL(__net_get_random_once); |