summaryrefslogtreecommitdiff
path: root/net/core
diff options
context:
space:
mode:
authorAndré Fabian Silva Delgado <emulatorman@parabola.nu>2016-09-11 04:34:46 -0300
committerAndré Fabian Silva Delgado <emulatorman@parabola.nu>2016-09-11 04:34:46 -0300
commit863981e96738983919de841ec669e157e6bdaeb0 (patch)
treed6d89a12e7eb8017837c057935a2271290907f76 /net/core
parent8dec7c70575785729a6a9e6719a955e9c545bcab (diff)
Linux-libre 4.7.1-gnupck-4.7.1-gnu
Diffstat (limited to 'net/core')
-rw-r--r--net/core/datagram.c9
-rw-r--r--net/core/dev.c112
-rw-r--r--net/core/devlink.c1050
-rw-r--r--net/core/ethtool.c29
-rw-r--r--net/core/fib_rules.c4
-rw-r--r--net/core/filter.c211
-rw-r--r--net/core/gen_stats.c37
-rw-r--r--net/core/neighbour.c22
-rw-r--r--net/core/net-procfs.c3
-rw-r--r--net/core/net-sysfs.c1
-rw-r--r--net/core/pktgen.c9
-rw-r--r--net/core/rtnetlink.c264
-rw-r--r--net/core/skbuff.c272
-rw-r--r--net/core/sock.c161
-rw-r--r--net/core/sock_diag.c3
-rw-r--r--net/core/sysctl_net_core.c9
16 files changed, 1967 insertions, 229 deletions
diff --git a/net/core/datagram.c b/net/core/datagram.c
index fa9dc6450..b7de71f8d 100644
--- a/net/core/datagram.c
+++ b/net/core/datagram.c
@@ -301,16 +301,19 @@ void skb_free_datagram(struct sock *sk, struct sk_buff *skb)
}
EXPORT_SYMBOL(skb_free_datagram);
-void skb_free_datagram_locked(struct sock *sk, struct sk_buff *skb)
+void __skb_free_datagram_locked(struct sock *sk, struct sk_buff *skb, int len)
{
bool slow;
if (likely(atomic_read(&skb->users) == 1))
smp_rmb();
- else if (likely(!atomic_dec_and_test(&skb->users)))
+ else if (likely(!atomic_dec_and_test(&skb->users))) {
+ sk_peek_offset_bwd(sk, len);
return;
+ }
slow = lock_sock_fast(sk);
+ sk_peek_offset_bwd(sk, len);
skb_orphan(skb);
sk_mem_reclaim_partial(sk);
unlock_sock_fast(sk, slow);
@@ -318,7 +321,7 @@ void skb_free_datagram_locked(struct sock *sk, struct sk_buff *skb)
/* skb is now orphaned, can be freed outside of locked section */
__kfree_skb(skb);
}
-EXPORT_SYMBOL(skb_free_datagram_locked);
+EXPORT_SYMBOL(__skb_free_datagram_locked);
/**
* skb_kill_datagram - Free a datagram skbuff forcibly
diff --git a/net/core/dev.c b/net/core/dev.c
index 5c925ac50..904ff431d 100644
--- a/net/core/dev.c
+++ b/net/core/dev.c
@@ -1741,7 +1741,7 @@ static inline void net_timestamp_set(struct sk_buff *skb)
__net_timestamp(SKB); \
} \
-bool is_skb_forwardable(struct net_device *dev, struct sk_buff *skb)
+bool is_skb_forwardable(const struct net_device *dev, const struct sk_buff *skb)
{
unsigned int len;
@@ -1850,7 +1850,7 @@ static inline bool skb_loop_sk(struct packet_type *ptype, struct sk_buff *skb)
* taps currently in use.
*/
-static void dev_queue_xmit_nit(struct sk_buff *skb, struct net_device *dev)
+void dev_queue_xmit_nit(struct sk_buff *skb, struct net_device *dev)
{
struct packet_type *ptype;
struct sk_buff *skb2 = NULL;
@@ -1907,6 +1907,7 @@ out_unlock:
pt_prev->func(skb2, skb->dev, pt_prev, skb->dev);
rcu_read_unlock();
}
+EXPORT_SYMBOL_GPL(dev_queue_xmit_nit);
/**
* netif_setup_tc - Handle tc mappings on real_num_tx_queues change
@@ -2711,6 +2712,19 @@ struct sk_buff *__skb_gso_segment(struct sk_buff *skb,
return ERR_PTR(err);
}
+ /* Only report GSO partial support if it will enable us to
+ * support segmentation on this frame without needing additional
+ * work.
+ */
+ if (features & NETIF_F_GSO_PARTIAL) {
+ netdev_features_t partial_features = NETIF_F_GSO_ROBUST;
+ struct net_device *dev = skb->dev;
+
+ partial_features |= dev->features & dev->gso_partial_features;
+ if (!skb_gso_ok(skb, features | partial_features))
+ features &= ~NETIF_F_GSO_PARTIAL;
+ }
+
BUILD_BUG_ON(SKB_SGO_CB_OFFSET +
sizeof(*SKB_GSO_CB(skb)) > sizeof(skb->cb));
@@ -2825,14 +2839,45 @@ static netdev_features_t dflt_features_check(const struct sk_buff *skb,
return vlan_features_check(skb, features);
}
+static netdev_features_t gso_features_check(const struct sk_buff *skb,
+ struct net_device *dev,
+ netdev_features_t features)
+{
+ u16 gso_segs = skb_shinfo(skb)->gso_segs;
+
+ if (gso_segs > dev->gso_max_segs)
+ return features & ~NETIF_F_GSO_MASK;
+
+ /* Support for GSO partial features requires software
+ * intervention before we can actually process the packets
+ * so we need to strip support for any partial features now
+ * and we can pull them back in after we have partially
+ * segmented the frame.
+ */
+ if (!(skb_shinfo(skb)->gso_type & SKB_GSO_PARTIAL))
+ features &= ~dev->gso_partial_features;
+
+ /* Make sure to clear the IPv4 ID mangling feature if the
+ * IPv4 header has the potential to be fragmented.
+ */
+ if (skb_shinfo(skb)->gso_type & SKB_GSO_TCPV4) {
+ struct iphdr *iph = skb->encapsulation ?
+ inner_ip_hdr(skb) : ip_hdr(skb);
+
+ if (!(iph->frag_off & htons(IP_DF)))
+ features &= ~NETIF_F_TSO_MANGLEID;
+ }
+
+ return features;
+}
+
netdev_features_t netif_skb_features(struct sk_buff *skb)
{
struct net_device *dev = skb->dev;
netdev_features_t features = dev->features;
- u16 gso_segs = skb_shinfo(skb)->gso_segs;
- if (gso_segs > dev->gso_max_segs || gso_segs < dev->gso_min_segs)
- features &= ~NETIF_F_GSO_MASK;
+ if (skb_is_gso(skb))
+ features = gso_features_check(skb, dev, features);
/* If encapsulation offload request, verify we are testing
* hardware encapsulation features instead of standard
@@ -2915,9 +2960,6 @@ static struct sk_buff *validate_xmit_skb(struct sk_buff *skb, struct net_device
{
netdev_features_t features;
- if (skb->next)
- return skb;
-
features = netif_skb_features(skb);
skb = validate_xmit_vlan(skb, features);
if (unlikely(!skb))
@@ -2960,6 +3002,7 @@ static struct sk_buff *validate_xmit_skb(struct sk_buff *skb, struct net_device
out_kfree_skb:
kfree_skb(skb);
out_null:
+ atomic_long_inc(&dev->tx_dropped);
return NULL;
}
@@ -3143,12 +3186,12 @@ sch_handle_egress(struct sk_buff *skb, int *ret, struct net_device *dev)
case TC_ACT_SHOT:
qdisc_qstats_cpu_drop(cl->q);
*ret = NET_XMIT_DROP;
- goto drop;
+ kfree_skb(skb);
+ return NULL;
case TC_ACT_STOLEN:
case TC_ACT_QUEUED:
*ret = NET_XMIT_SUCCESS;
-drop:
- kfree_skb(skb);
+ consume_skb(skb);
return NULL;
case TC_ACT_REDIRECT:
/* No need to push/pop skb's mac_header here on egress! */
@@ -3349,7 +3392,7 @@ static int __dev_queue_xmit(struct sk_buff *skb, void *accel_priv)
skb = validate_xmit_skb(skb, dev);
if (!skb)
- goto drop;
+ goto out;
HARD_TX_LOCK(dev, txq, cpu);
@@ -3376,7 +3419,6 @@ recursion_alert:
}
rc = -ENETDOWN;
-drop:
rcu_read_unlock_bh();
atomic_long_inc(&dev->tx_dropped);
@@ -3428,6 +3470,7 @@ u32 rps_cpu_mask __read_mostly;
EXPORT_SYMBOL(rps_cpu_mask);
struct static_key rps_needed __read_mostly;
+EXPORT_SYMBOL(rps_needed);
static struct rps_dev_flow *
set_rps_cpu(struct net_device *dev, struct sk_buff *skb,
@@ -3914,9 +3957,11 @@ sch_handle_ingress(struct sk_buff *skb, struct packet_type **pt_prev, int *ret,
break;
case TC_ACT_SHOT:
qdisc_qstats_cpu_drop(cl->q);
+ kfree_skb(skb);
+ return NULL;
case TC_ACT_STOLEN:
case TC_ACT_QUEUED:
- kfree_skb(skb);
+ consume_skb(skb);
return NULL;
case TC_ACT_REDIRECT:
/* skb_mac_header check was done by cls/act_bpf, so
@@ -4440,6 +4485,7 @@ static enum gro_result dev_gro_receive(struct napi_struct *napi, struct sk_buff
NAPI_GRO_CB(skb)->free = 0;
NAPI_GRO_CB(skb)->encap_mark = 0;
NAPI_GRO_CB(skb)->is_fou = 0;
+ NAPI_GRO_CB(skb)->is_atomic = 1;
NAPI_GRO_CB(skb)->gro_remcsum_start = 0;
/* Setup for GRO checksum validation */
@@ -4664,6 +4710,8 @@ static struct sk_buff *napi_frags_skb(struct napi_struct *napi)
if (unlikely(skb_gro_header_hard(skb, hlen))) {
eth = skb_gro_header_slow(skb, hlen, 0);
if (unlikely(!eth)) {
+ net_warn_ratelimited("%s: dropping impossible skb from %s\n",
+ __func__, napi->dev->name);
napi_reuse_skb(napi, skb);
return NULL;
}
@@ -4938,8 +4986,8 @@ bool sk_busy_loop(struct sock *sk, int nonblock)
netpoll_poll_unlock(have);
}
if (rc > 0)
- NET_ADD_STATS_BH(sock_net(sk),
- LINUX_MIB_BUSYPOLLRXPACKETS, rc);
+ __NET_ADD_STATS(sock_net(sk),
+ LINUX_MIB_BUSYPOLLRXPACKETS, rc);
local_bh_enable();
if (rc == LL_FLUSH_FAILED)
@@ -6676,6 +6724,10 @@ static netdev_features_t netdev_fix_features(struct net_device *dev,
features &= ~NETIF_F_TSO6;
}
+ /* TSO with IPv4 ID mangling requires IPv4 TSO be enabled */
+ if ((features & NETIF_F_TSO_MANGLEID) && !(features & NETIF_F_TSO))
+ features &= ~NETIF_F_TSO_MANGLEID;
+
/* TSO ECN requires that TSO is present as well. */
if ((features & NETIF_F_ALL_TSO) == NETIF_F_TSO_ECN)
features &= ~NETIF_F_TSO_ECN;
@@ -6704,6 +6756,14 @@ static netdev_features_t netdev_fix_features(struct net_device *dev,
}
}
+ /* GSO partial features require GSO partial be set */
+ if ((features & dev->gso_partial_features) &&
+ !(features & NETIF_F_GSO_PARTIAL)) {
+ netdev_dbg(dev,
+ "Dropping partially supported GSO features since no GSO partial.\n");
+ features &= ~dev->gso_partial_features;
+ }
+
#ifdef CONFIG_NET_RX_BUSY_POLL
if (dev->netdev_ops->ndo_busy_poll)
features |= NETIF_F_BUSY_POLL;
@@ -6974,9 +7034,22 @@ int register_netdevice(struct net_device *dev)
dev->features |= NETIF_F_SOFT_FEATURES;
dev->wanted_features = dev->features & dev->hw_features;
- if (!(dev->flags & IFF_LOOPBACK)) {
+ if (!(dev->flags & IFF_LOOPBACK))
dev->hw_features |= NETIF_F_NOCACHE_COPY;
- }
+
+ /* If IPv4 TCP segmentation offload is supported we should also
+ * allow the device to enable segmenting the frame with the option
+ * of ignoring a static IP ID value. This doesn't enable the
+ * feature itself but allows the user to enable it later.
+ */
+ if (dev->hw_features & NETIF_F_TSO)
+ dev->hw_features |= NETIF_F_TSO_MANGLEID;
+ if (dev->vlan_features & NETIF_F_TSO)
+ dev->vlan_features |= NETIF_F_TSO_MANGLEID;
+ if (dev->mpls_features & NETIF_F_TSO)
+ dev->mpls_features |= NETIF_F_TSO_MANGLEID;
+ if (dev->hw_enc_features & NETIF_F_TSO)
+ dev->hw_enc_features |= NETIF_F_TSO_MANGLEID;
/* Make NETIF_F_HIGHDMA inheritable to VLAN devices.
*/
@@ -6984,7 +7057,7 @@ int register_netdevice(struct net_device *dev)
/* Make NETIF_F_SG inheritable to tunnel devices.
*/
- dev->hw_enc_features |= NETIF_F_SG;
+ dev->hw_enc_features |= NETIF_F_SG | NETIF_F_GSO_PARTIAL;
/* Make NETIF_F_SG inheritable to MPLS.
*/
@@ -7427,7 +7500,6 @@ struct net_device *alloc_netdev_mqs(int sizeof_priv, const char *name,
dev->gso_max_size = GSO_MAX_SIZE;
dev->gso_max_segs = GSO_MAX_SEGS;
- dev->gso_min_segs = 0;
INIT_LIST_HEAD(&dev->napi_list);
INIT_LIST_HEAD(&dev->unreg_list);
diff --git a/net/core/devlink.c b/net/core/devlink.c
index 590fa561c..933e8d4d3 100644
--- a/net/core/devlink.c
+++ b/net/core/devlink.c
@@ -119,7 +119,171 @@ static struct devlink_port *devlink_port_get_from_info(struct devlink *devlink,
return devlink_port_get_from_attrs(devlink, info->attrs);
}
-#define DEVLINK_NL_FLAG_NEED_PORT BIT(0)
+struct devlink_sb {
+ struct list_head list;
+ unsigned int index;
+ u32 size;
+ u16 ingress_pools_count;
+ u16 egress_pools_count;
+ u16 ingress_tc_count;
+ u16 egress_tc_count;
+};
+
+static u16 devlink_sb_pool_count(struct devlink_sb *devlink_sb)
+{
+ return devlink_sb->ingress_pools_count + devlink_sb->egress_pools_count;
+}
+
+static struct devlink_sb *devlink_sb_get_by_index(struct devlink *devlink,
+ unsigned int sb_index)
+{
+ struct devlink_sb *devlink_sb;
+
+ list_for_each_entry(devlink_sb, &devlink->sb_list, list) {
+ if (devlink_sb->index == sb_index)
+ return devlink_sb;
+ }
+ return NULL;
+}
+
+static bool devlink_sb_index_exists(struct devlink *devlink,
+ unsigned int sb_index)
+{
+ return devlink_sb_get_by_index(devlink, sb_index);
+}
+
+static struct devlink_sb *devlink_sb_get_from_attrs(struct devlink *devlink,
+ struct nlattr **attrs)
+{
+ if (attrs[DEVLINK_ATTR_SB_INDEX]) {
+ u32 sb_index = nla_get_u32(attrs[DEVLINK_ATTR_SB_INDEX]);
+ struct devlink_sb *devlink_sb;
+
+ devlink_sb = devlink_sb_get_by_index(devlink, sb_index);
+ if (!devlink_sb)
+ return ERR_PTR(-ENODEV);
+ return devlink_sb;
+ }
+ return ERR_PTR(-EINVAL);
+}
+
+static struct devlink_sb *devlink_sb_get_from_info(struct devlink *devlink,
+ struct genl_info *info)
+{
+ return devlink_sb_get_from_attrs(devlink, info->attrs);
+}
+
+static int devlink_sb_pool_index_get_from_attrs(struct devlink_sb *devlink_sb,
+ struct nlattr **attrs,
+ u16 *p_pool_index)
+{
+ u16 val;
+
+ if (!attrs[DEVLINK_ATTR_SB_POOL_INDEX])
+ return -EINVAL;
+
+ val = nla_get_u16(attrs[DEVLINK_ATTR_SB_POOL_INDEX]);
+ if (val >= devlink_sb_pool_count(devlink_sb))
+ return -EINVAL;
+ *p_pool_index = val;
+ return 0;
+}
+
+static int devlink_sb_pool_index_get_from_info(struct devlink_sb *devlink_sb,
+ struct genl_info *info,
+ u16 *p_pool_index)
+{
+ return devlink_sb_pool_index_get_from_attrs(devlink_sb, info->attrs,
+ p_pool_index);
+}
+
+static int
+devlink_sb_pool_type_get_from_attrs(struct nlattr **attrs,
+ enum devlink_sb_pool_type *p_pool_type)
+{
+ u8 val;
+
+ if (!attrs[DEVLINK_ATTR_SB_POOL_TYPE])
+ return -EINVAL;
+
+ val = nla_get_u8(attrs[DEVLINK_ATTR_SB_POOL_TYPE]);
+ if (val != DEVLINK_SB_POOL_TYPE_INGRESS &&
+ val != DEVLINK_SB_POOL_TYPE_EGRESS)
+ return -EINVAL;
+ *p_pool_type = val;
+ return 0;
+}
+
+static int
+devlink_sb_pool_type_get_from_info(struct genl_info *info,
+ enum devlink_sb_pool_type *p_pool_type)
+{
+ return devlink_sb_pool_type_get_from_attrs(info->attrs, p_pool_type);
+}
+
+static int
+devlink_sb_th_type_get_from_attrs(struct nlattr **attrs,
+ enum devlink_sb_threshold_type *p_th_type)
+{
+ u8 val;
+
+ if (!attrs[DEVLINK_ATTR_SB_POOL_THRESHOLD_TYPE])
+ return -EINVAL;
+
+ val = nla_get_u8(attrs[DEVLINK_ATTR_SB_POOL_THRESHOLD_TYPE]);
+ if (val != DEVLINK_SB_THRESHOLD_TYPE_STATIC &&
+ val != DEVLINK_SB_THRESHOLD_TYPE_DYNAMIC)
+ return -EINVAL;
+ *p_th_type = val;
+ return 0;
+}
+
+static int
+devlink_sb_th_type_get_from_info(struct genl_info *info,
+ enum devlink_sb_threshold_type *p_th_type)
+{
+ return devlink_sb_th_type_get_from_attrs(info->attrs, p_th_type);
+}
+
+static int
+devlink_sb_tc_index_get_from_attrs(struct devlink_sb *devlink_sb,
+ struct nlattr **attrs,
+ enum devlink_sb_pool_type pool_type,
+ u16 *p_tc_index)
+{
+ u16 val;
+
+ if (!attrs[DEVLINK_ATTR_SB_TC_INDEX])
+ return -EINVAL;
+
+ val = nla_get_u16(attrs[DEVLINK_ATTR_SB_TC_INDEX]);
+ if (pool_type == DEVLINK_SB_POOL_TYPE_INGRESS &&
+ val >= devlink_sb->ingress_tc_count)
+ return -EINVAL;
+ if (pool_type == DEVLINK_SB_POOL_TYPE_EGRESS &&
+ val >= devlink_sb->egress_tc_count)
+ return -EINVAL;
+ *p_tc_index = val;
+ return 0;
+}
+
+static int
+devlink_sb_tc_index_get_from_info(struct devlink_sb *devlink_sb,
+ struct genl_info *info,
+ enum devlink_sb_pool_type pool_type,
+ u16 *p_tc_index)
+{
+ return devlink_sb_tc_index_get_from_attrs(devlink_sb, info->attrs,
+ pool_type, p_tc_index);
+}
+
+#define DEVLINK_NL_FLAG_NEED_DEVLINK BIT(0)
+#define DEVLINK_NL_FLAG_NEED_PORT BIT(1)
+#define DEVLINK_NL_FLAG_NEED_SB BIT(2)
+#define DEVLINK_NL_FLAG_LOCK_PORTS BIT(3)
+ /* port is not needed but we need to ensure they don't
+ * change in the middle of command
+ */
static int devlink_nl_pre_doit(const struct genl_ops *ops,
struct sk_buff *skb, struct genl_info *info)
@@ -132,8 +296,9 @@ static int devlink_nl_pre_doit(const struct genl_ops *ops,
mutex_unlock(&devlink_mutex);
return PTR_ERR(devlink);
}
- info->user_ptr[0] = devlink;
- if (ops->internal_flags & DEVLINK_NL_FLAG_NEED_PORT) {
+ if (ops->internal_flags & DEVLINK_NL_FLAG_NEED_DEVLINK) {
+ info->user_ptr[0] = devlink;
+ } else if (ops->internal_flags & DEVLINK_NL_FLAG_NEED_PORT) {
struct devlink_port *devlink_port;
mutex_lock(&devlink_port_mutex);
@@ -143,7 +308,22 @@ static int devlink_nl_pre_doit(const struct genl_ops *ops,
mutex_unlock(&devlink_mutex);
return PTR_ERR(devlink_port);
}
- info->user_ptr[1] = devlink_port;
+ info->user_ptr[0] = devlink_port;
+ }
+ if (ops->internal_flags & DEVLINK_NL_FLAG_LOCK_PORTS) {
+ mutex_lock(&devlink_port_mutex);
+ }
+ if (ops->internal_flags & DEVLINK_NL_FLAG_NEED_SB) {
+ struct devlink_sb *devlink_sb;
+
+ devlink_sb = devlink_sb_get_from_info(devlink, info);
+ if (IS_ERR(devlink_sb)) {
+ if (ops->internal_flags & DEVLINK_NL_FLAG_NEED_PORT)
+ mutex_unlock(&devlink_port_mutex);
+ mutex_unlock(&devlink_mutex);
+ return PTR_ERR(devlink_sb);
+ }
+ info->user_ptr[1] = devlink_sb;
}
return 0;
}
@@ -151,7 +331,8 @@ static int devlink_nl_pre_doit(const struct genl_ops *ops,
static void devlink_nl_post_doit(const struct genl_ops *ops,
struct sk_buff *skb, struct genl_info *info)
{
- if (ops->internal_flags & DEVLINK_NL_FLAG_NEED_PORT)
+ if (ops->internal_flags & DEVLINK_NL_FLAG_NEED_PORT ||
+ ops->internal_flags & DEVLINK_NL_FLAG_LOCK_PORTS)
mutex_unlock(&devlink_port_mutex);
mutex_unlock(&devlink_mutex);
}
@@ -356,8 +537,8 @@ out:
static int devlink_nl_cmd_port_get_doit(struct sk_buff *skb,
struct genl_info *info)
{
- struct devlink *devlink = info->user_ptr[0];
- struct devlink_port *devlink_port = info->user_ptr[1];
+ struct devlink_port *devlink_port = info->user_ptr[0];
+ struct devlink *devlink = devlink_port->devlink;
struct sk_buff *msg;
int err;
@@ -436,8 +617,8 @@ static int devlink_port_type_set(struct devlink *devlink,
static int devlink_nl_cmd_port_set_doit(struct sk_buff *skb,
struct genl_info *info)
{
- struct devlink *devlink = info->user_ptr[0];
- struct devlink_port *devlink_port = info->user_ptr[1];
+ struct devlink_port *devlink_port = info->user_ptr[0];
+ struct devlink *devlink = devlink_port->devlink;
int err;
if (info->attrs[DEVLINK_ATTR_PORT_TYPE]) {
@@ -497,12 +678,735 @@ static int devlink_nl_cmd_port_unsplit_doit(struct sk_buff *skb,
return devlink_port_unsplit(devlink, port_index);
}
+static int devlink_nl_sb_fill(struct sk_buff *msg, struct devlink *devlink,
+ struct devlink_sb *devlink_sb,
+ enum devlink_command cmd, u32 portid,
+ u32 seq, int flags)
+{
+ void *hdr;
+
+ hdr = genlmsg_put(msg, portid, seq, &devlink_nl_family, flags, cmd);
+ if (!hdr)
+ return -EMSGSIZE;
+
+ if (devlink_nl_put_handle(msg, devlink))
+ goto nla_put_failure;
+ if (nla_put_u32(msg, DEVLINK_ATTR_SB_INDEX, devlink_sb->index))
+ goto nla_put_failure;
+ if (nla_put_u32(msg, DEVLINK_ATTR_SB_SIZE, devlink_sb->size))
+ goto nla_put_failure;
+ if (nla_put_u16(msg, DEVLINK_ATTR_SB_INGRESS_POOL_COUNT,
+ devlink_sb->ingress_pools_count))
+ goto nla_put_failure;
+ if (nla_put_u16(msg, DEVLINK_ATTR_SB_EGRESS_POOL_COUNT,
+ devlink_sb->egress_pools_count))
+ goto nla_put_failure;
+ if (nla_put_u16(msg, DEVLINK_ATTR_SB_INGRESS_TC_COUNT,
+ devlink_sb->ingress_tc_count))
+ goto nla_put_failure;
+ if (nla_put_u16(msg, DEVLINK_ATTR_SB_EGRESS_TC_COUNT,
+ devlink_sb->egress_tc_count))
+ goto nla_put_failure;
+
+ genlmsg_end(msg, hdr);
+ return 0;
+
+nla_put_failure:
+ genlmsg_cancel(msg, hdr);
+ return -EMSGSIZE;
+}
+
+static int devlink_nl_cmd_sb_get_doit(struct sk_buff *skb,
+ struct genl_info *info)
+{
+ struct devlink *devlink = info->user_ptr[0];
+ struct devlink_sb *devlink_sb = info->user_ptr[1];
+ struct sk_buff *msg;
+ int err;
+
+ msg = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_KERNEL);
+ if (!msg)
+ return -ENOMEM;
+
+ err = devlink_nl_sb_fill(msg, devlink, devlink_sb,
+ DEVLINK_CMD_SB_NEW,
+ info->snd_portid, info->snd_seq, 0);
+ if (err) {
+ nlmsg_free(msg);
+ return err;
+ }
+
+ return genlmsg_reply(msg, info);
+}
+
+static int devlink_nl_cmd_sb_get_dumpit(struct sk_buff *msg,
+ struct netlink_callback *cb)
+{
+ struct devlink *devlink;
+ struct devlink_sb *devlink_sb;
+ int start = cb->args[0];
+ int idx = 0;
+ int err;
+
+ mutex_lock(&devlink_mutex);
+ list_for_each_entry(devlink, &devlink_list, list) {
+ if (!net_eq(devlink_net(devlink), sock_net(msg->sk)))
+ continue;
+ list_for_each_entry(devlink_sb, &devlink->sb_list, list) {
+ if (idx < start) {
+ idx++;
+ continue;
+ }
+ err = devlink_nl_sb_fill(msg, devlink, devlink_sb,
+ DEVLINK_CMD_SB_NEW,
+ NETLINK_CB(cb->skb).portid,
+ cb->nlh->nlmsg_seq,
+ NLM_F_MULTI);
+ if (err)
+ goto out;
+ idx++;
+ }
+ }
+out:
+ mutex_unlock(&devlink_mutex);
+
+ cb->args[0] = idx;
+ return msg->len;
+}
+
+static int devlink_nl_sb_pool_fill(struct sk_buff *msg, struct devlink *devlink,
+ struct devlink_sb *devlink_sb,
+ u16 pool_index, enum devlink_command cmd,
+ u32 portid, u32 seq, int flags)
+{
+ struct devlink_sb_pool_info pool_info;
+ void *hdr;
+ int err;
+
+ err = devlink->ops->sb_pool_get(devlink, devlink_sb->index,
+ pool_index, &pool_info);
+ if (err)
+ return err;
+
+ hdr = genlmsg_put(msg, portid, seq, &devlink_nl_family, flags, cmd);
+ if (!hdr)
+ return -EMSGSIZE;
+
+ if (devlink_nl_put_handle(msg, devlink))
+ goto nla_put_failure;
+ if (nla_put_u32(msg, DEVLINK_ATTR_SB_INDEX, devlink_sb->index))
+ goto nla_put_failure;
+ if (nla_put_u16(msg, DEVLINK_ATTR_SB_POOL_INDEX, pool_index))
+ goto nla_put_failure;
+ if (nla_put_u8(msg, DEVLINK_ATTR_SB_POOL_TYPE, pool_info.pool_type))
+ goto nla_put_failure;
+ if (nla_put_u32(msg, DEVLINK_ATTR_SB_POOL_SIZE, pool_info.size))
+ goto nla_put_failure;
+ if (nla_put_u8(msg, DEVLINK_ATTR_SB_POOL_THRESHOLD_TYPE,
+ pool_info.threshold_type))
+ goto nla_put_failure;
+
+ genlmsg_end(msg, hdr);
+ return 0;
+
+nla_put_failure:
+ genlmsg_cancel(msg, hdr);
+ return -EMSGSIZE;
+}
+
+static int devlink_nl_cmd_sb_pool_get_doit(struct sk_buff *skb,
+ struct genl_info *info)
+{
+ struct devlink *devlink = info->user_ptr[0];
+ struct devlink_sb *devlink_sb = info->user_ptr[1];
+ struct sk_buff *msg;
+ u16 pool_index;
+ int err;
+
+ err = devlink_sb_pool_index_get_from_info(devlink_sb, info,
+ &pool_index);
+ if (err)
+ return err;
+
+ if (!devlink->ops || !devlink->ops->sb_pool_get)
+ return -EOPNOTSUPP;
+
+ msg = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_KERNEL);
+ if (!msg)
+ return -ENOMEM;
+
+ err = devlink_nl_sb_pool_fill(msg, devlink, devlink_sb, pool_index,
+ DEVLINK_CMD_SB_POOL_NEW,
+ info->snd_portid, info->snd_seq, 0);
+ if (err) {
+ nlmsg_free(msg);
+ return err;
+ }
+
+ return genlmsg_reply(msg, info);
+}
+
+static int __sb_pool_get_dumpit(struct sk_buff *msg, int start, int *p_idx,
+ struct devlink *devlink,
+ struct devlink_sb *devlink_sb,
+ u32 portid, u32 seq)
+{
+ u16 pool_count = devlink_sb_pool_count(devlink_sb);
+ u16 pool_index;
+ int err;
+
+ for (pool_index = 0; pool_index < pool_count; pool_index++) {
+ if (*p_idx < start) {
+ (*p_idx)++;
+ continue;
+ }
+ err = devlink_nl_sb_pool_fill(msg, devlink,
+ devlink_sb,
+ pool_index,
+ DEVLINK_CMD_SB_POOL_NEW,
+ portid, seq, NLM_F_MULTI);
+ if (err)
+ return err;
+ (*p_idx)++;
+ }
+ return 0;
+}
+
+static int devlink_nl_cmd_sb_pool_get_dumpit(struct sk_buff *msg,
+ struct netlink_callback *cb)
+{
+ struct devlink *devlink;
+ struct devlink_sb *devlink_sb;
+ int start = cb->args[0];
+ int idx = 0;
+ int err;
+
+ mutex_lock(&devlink_mutex);
+ list_for_each_entry(devlink, &devlink_list, list) {
+ if (!net_eq(devlink_net(devlink), sock_net(msg->sk)) ||
+ !devlink->ops || !devlink->ops->sb_pool_get)
+ continue;
+ list_for_each_entry(devlink_sb, &devlink->sb_list, list) {
+ err = __sb_pool_get_dumpit(msg, start, &idx, devlink,
+ devlink_sb,
+ NETLINK_CB(cb->skb).portid,
+ cb->nlh->nlmsg_seq);
+ if (err && err != -EOPNOTSUPP)
+ goto out;
+ }
+ }
+out:
+ mutex_unlock(&devlink_mutex);
+
+ cb->args[0] = idx;
+ return msg->len;
+}
+
+static int devlink_sb_pool_set(struct devlink *devlink, unsigned int sb_index,
+ u16 pool_index, u32 size,
+ enum devlink_sb_threshold_type threshold_type)
+
+{
+ const struct devlink_ops *ops = devlink->ops;
+
+ if (ops && ops->sb_pool_set)
+ return ops->sb_pool_set(devlink, sb_index, pool_index,
+ size, threshold_type);
+ return -EOPNOTSUPP;
+}
+
+static int devlink_nl_cmd_sb_pool_set_doit(struct sk_buff *skb,
+ struct genl_info *info)
+{
+ struct devlink *devlink = info->user_ptr[0];
+ struct devlink_sb *devlink_sb = info->user_ptr[1];
+ enum devlink_sb_threshold_type threshold_type;
+ u16 pool_index;
+ u32 size;
+ int err;
+
+ err = devlink_sb_pool_index_get_from_info(devlink_sb, info,
+ &pool_index);
+ if (err)
+ return err;
+
+ err = devlink_sb_th_type_get_from_info(info, &threshold_type);
+ if (err)
+ return err;
+
+ if (!info->attrs[DEVLINK_ATTR_SB_POOL_SIZE])
+ return -EINVAL;
+
+ size = nla_get_u32(info->attrs[DEVLINK_ATTR_SB_POOL_SIZE]);
+ return devlink_sb_pool_set(devlink, devlink_sb->index,
+ pool_index, size, threshold_type);
+}
+
+static int devlink_nl_sb_port_pool_fill(struct sk_buff *msg,
+ struct devlink *devlink,
+ struct devlink_port *devlink_port,
+ struct devlink_sb *devlink_sb,
+ u16 pool_index,
+ enum devlink_command cmd,
+ u32 portid, u32 seq, int flags)
+{
+ const struct devlink_ops *ops = devlink->ops;
+ u32 threshold;
+ void *hdr;
+ int err;
+
+ err = ops->sb_port_pool_get(devlink_port, devlink_sb->index,
+ pool_index, &threshold);
+ if (err)
+ return err;
+
+ hdr = genlmsg_put(msg, portid, seq, &devlink_nl_family, flags, cmd);
+ if (!hdr)
+ return -EMSGSIZE;
+
+ if (devlink_nl_put_handle(msg, devlink))
+ goto nla_put_failure;
+ if (nla_put_u32(msg, DEVLINK_ATTR_PORT_INDEX, devlink_port->index))
+ goto nla_put_failure;
+ if (nla_put_u32(msg, DEVLINK_ATTR_SB_INDEX, devlink_sb->index))
+ goto nla_put_failure;
+ if (nla_put_u16(msg, DEVLINK_ATTR_SB_POOL_INDEX, pool_index))
+ goto nla_put_failure;
+ if (nla_put_u32(msg, DEVLINK_ATTR_SB_THRESHOLD, threshold))
+ goto nla_put_failure;
+
+ if (ops->sb_occ_port_pool_get) {
+ u32 cur;
+ u32 max;
+
+ err = ops->sb_occ_port_pool_get(devlink_port, devlink_sb->index,
+ pool_index, &cur, &max);
+ if (err && err != -EOPNOTSUPP)
+ return err;
+ if (!err) {
+ if (nla_put_u32(msg, DEVLINK_ATTR_SB_OCC_CUR, cur))
+ goto nla_put_failure;
+ if (nla_put_u32(msg, DEVLINK_ATTR_SB_OCC_MAX, max))
+ goto nla_put_failure;
+ }
+ }
+
+ genlmsg_end(msg, hdr);
+ return 0;
+
+nla_put_failure:
+ genlmsg_cancel(msg, hdr);
+ return -EMSGSIZE;
+}
+
+static int devlink_nl_cmd_sb_port_pool_get_doit(struct sk_buff *skb,
+ struct genl_info *info)
+{
+ struct devlink_port *devlink_port = info->user_ptr[0];
+ struct devlink *devlink = devlink_port->devlink;
+ struct devlink_sb *devlink_sb = info->user_ptr[1];
+ struct sk_buff *msg;
+ u16 pool_index;
+ int err;
+
+ err = devlink_sb_pool_index_get_from_info(devlink_sb, info,
+ &pool_index);
+ if (err)
+ return err;
+
+ if (!devlink->ops || !devlink->ops->sb_port_pool_get)
+ return -EOPNOTSUPP;
+
+ msg = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_KERNEL);
+ if (!msg)
+ return -ENOMEM;
+
+ err = devlink_nl_sb_port_pool_fill(msg, devlink, devlink_port,
+ devlink_sb, pool_index,
+ DEVLINK_CMD_SB_PORT_POOL_NEW,
+ info->snd_portid, info->snd_seq, 0);
+ if (err) {
+ nlmsg_free(msg);
+ return err;
+ }
+
+ return genlmsg_reply(msg, info);
+}
+
+static int __sb_port_pool_get_dumpit(struct sk_buff *msg, int start, int *p_idx,
+ struct devlink *devlink,
+ struct devlink_sb *devlink_sb,
+ u32 portid, u32 seq)
+{
+ struct devlink_port *devlink_port;
+ u16 pool_count = devlink_sb_pool_count(devlink_sb);
+ u16 pool_index;
+ int err;
+
+ list_for_each_entry(devlink_port, &devlink->port_list, list) {
+ for (pool_index = 0; pool_index < pool_count; pool_index++) {
+ if (*p_idx < start) {
+ (*p_idx)++;
+ continue;
+ }
+ err = devlink_nl_sb_port_pool_fill(msg, devlink,
+ devlink_port,
+ devlink_sb,
+ pool_index,
+ DEVLINK_CMD_SB_PORT_POOL_NEW,
+ portid, seq,
+ NLM_F_MULTI);
+ if (err)
+ return err;
+ (*p_idx)++;
+ }
+ }
+ return 0;
+}
+
+static int devlink_nl_cmd_sb_port_pool_get_dumpit(struct sk_buff *msg,
+ struct netlink_callback *cb)
+{
+ struct devlink *devlink;
+ struct devlink_sb *devlink_sb;
+ int start = cb->args[0];
+ int idx = 0;
+ int err;
+
+ mutex_lock(&devlink_mutex);
+ mutex_lock(&devlink_port_mutex);
+ list_for_each_entry(devlink, &devlink_list, list) {
+ if (!net_eq(devlink_net(devlink), sock_net(msg->sk)) ||
+ !devlink->ops || !devlink->ops->sb_port_pool_get)
+ continue;
+ list_for_each_entry(devlink_sb, &devlink->sb_list, list) {
+ err = __sb_port_pool_get_dumpit(msg, start, &idx,
+ devlink, devlink_sb,
+ NETLINK_CB(cb->skb).portid,
+ cb->nlh->nlmsg_seq);
+ if (err && err != -EOPNOTSUPP)
+ goto out;
+ }
+ }
+out:
+ mutex_unlock(&devlink_port_mutex);
+ mutex_unlock(&devlink_mutex);
+
+ cb->args[0] = idx;
+ return msg->len;
+}
+
+static int devlink_sb_port_pool_set(struct devlink_port *devlink_port,
+ unsigned int sb_index, u16 pool_index,
+ u32 threshold)
+
+{
+ const struct devlink_ops *ops = devlink_port->devlink->ops;
+
+ if (ops && ops->sb_port_pool_set)
+ return ops->sb_port_pool_set(devlink_port, sb_index,
+ pool_index, threshold);
+ return -EOPNOTSUPP;
+}
+
+static int devlink_nl_cmd_sb_port_pool_set_doit(struct sk_buff *skb,
+ struct genl_info *info)
+{
+ struct devlink_port *devlink_port = info->user_ptr[0];
+ struct devlink_sb *devlink_sb = info->user_ptr[1];
+ u16 pool_index;
+ u32 threshold;
+ int err;
+
+ err = devlink_sb_pool_index_get_from_info(devlink_sb, info,
+ &pool_index);
+ if (err)
+ return err;
+
+ if (!info->attrs[DEVLINK_ATTR_SB_THRESHOLD])
+ return -EINVAL;
+
+ threshold = nla_get_u32(info->attrs[DEVLINK_ATTR_SB_THRESHOLD]);
+ return devlink_sb_port_pool_set(devlink_port, devlink_sb->index,
+ pool_index, threshold);
+}
+
+static int
+devlink_nl_sb_tc_pool_bind_fill(struct sk_buff *msg, struct devlink *devlink,
+ struct devlink_port *devlink_port,
+ struct devlink_sb *devlink_sb, u16 tc_index,
+ enum devlink_sb_pool_type pool_type,
+ enum devlink_command cmd,
+ u32 portid, u32 seq, int flags)
+{
+ const struct devlink_ops *ops = devlink->ops;
+ u16 pool_index;
+ u32 threshold;
+ void *hdr;
+ int err;
+
+ err = ops->sb_tc_pool_bind_get(devlink_port, devlink_sb->index,
+ tc_index, pool_type,
+ &pool_index, &threshold);
+ if (err)
+ return err;
+
+ hdr = genlmsg_put(msg, portid, seq, &devlink_nl_family, flags, cmd);
+ if (!hdr)
+ return -EMSGSIZE;
+
+ if (devlink_nl_put_handle(msg, devlink))
+ goto nla_put_failure;
+ if (nla_put_u32(msg, DEVLINK_ATTR_PORT_INDEX, devlink_port->index))
+ goto nla_put_failure;
+ if (nla_put_u32(msg, DEVLINK_ATTR_SB_INDEX, devlink_sb->index))
+ goto nla_put_failure;
+ if (nla_put_u16(msg, DEVLINK_ATTR_SB_TC_INDEX, tc_index))
+ goto nla_put_failure;
+ if (nla_put_u8(msg, DEVLINK_ATTR_SB_POOL_TYPE, pool_type))
+ goto nla_put_failure;
+ if (nla_put_u16(msg, DEVLINK_ATTR_SB_POOL_INDEX, pool_index))
+ goto nla_put_failure;
+ if (nla_put_u32(msg, DEVLINK_ATTR_SB_THRESHOLD, threshold))
+ goto nla_put_failure;
+
+ if (ops->sb_occ_tc_port_bind_get) {
+ u32 cur;
+ u32 max;
+
+ err = ops->sb_occ_tc_port_bind_get(devlink_port,
+ devlink_sb->index,
+ tc_index, pool_type,
+ &cur, &max);
+ if (err && err != -EOPNOTSUPP)
+ return err;
+ if (!err) {
+ if (nla_put_u32(msg, DEVLINK_ATTR_SB_OCC_CUR, cur))
+ goto nla_put_failure;
+ if (nla_put_u32(msg, DEVLINK_ATTR_SB_OCC_MAX, max))
+ goto nla_put_failure;
+ }
+ }
+
+ genlmsg_end(msg, hdr);
+ return 0;
+
+nla_put_failure:
+ genlmsg_cancel(msg, hdr);
+ return -EMSGSIZE;
+}
+
+static int devlink_nl_cmd_sb_tc_pool_bind_get_doit(struct sk_buff *skb,
+ struct genl_info *info)
+{
+ struct devlink_port *devlink_port = info->user_ptr[0];
+ struct devlink *devlink = devlink_port->devlink;
+ struct devlink_sb *devlink_sb = info->user_ptr[1];
+ struct sk_buff *msg;
+ enum devlink_sb_pool_type pool_type;
+ u16 tc_index;
+ int err;
+
+ err = devlink_sb_pool_type_get_from_info(info, &pool_type);
+ if (err)
+ return err;
+
+ err = devlink_sb_tc_index_get_from_info(devlink_sb, info,
+ pool_type, &tc_index);
+ if (err)
+ return err;
+
+ if (!devlink->ops || !devlink->ops->sb_tc_pool_bind_get)
+ return -EOPNOTSUPP;
+
+ msg = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_KERNEL);
+ if (!msg)
+ return -ENOMEM;
+
+ err = devlink_nl_sb_tc_pool_bind_fill(msg, devlink, devlink_port,
+ devlink_sb, tc_index, pool_type,
+ DEVLINK_CMD_SB_TC_POOL_BIND_NEW,
+ info->snd_portid,
+ info->snd_seq, 0);
+ if (err) {
+ nlmsg_free(msg);
+ return err;
+ }
+
+ return genlmsg_reply(msg, info);
+}
+
+static int __sb_tc_pool_bind_get_dumpit(struct sk_buff *msg,
+ int start, int *p_idx,
+ struct devlink *devlink,
+ struct devlink_sb *devlink_sb,
+ u32 portid, u32 seq)
+{
+ struct devlink_port *devlink_port;
+ u16 tc_index;
+ int err;
+
+ list_for_each_entry(devlink_port, &devlink->port_list, list) {
+ for (tc_index = 0;
+ tc_index < devlink_sb->ingress_tc_count; tc_index++) {
+ if (*p_idx < start) {
+ (*p_idx)++;
+ continue;
+ }
+ err = devlink_nl_sb_tc_pool_bind_fill(msg, devlink,
+ devlink_port,
+ devlink_sb,
+ tc_index,
+ DEVLINK_SB_POOL_TYPE_INGRESS,
+ DEVLINK_CMD_SB_TC_POOL_BIND_NEW,
+ portid, seq,
+ NLM_F_MULTI);
+ if (err)
+ return err;
+ (*p_idx)++;
+ }
+ for (tc_index = 0;
+ tc_index < devlink_sb->egress_tc_count; tc_index++) {
+ if (*p_idx < start) {
+ (*p_idx)++;
+ continue;
+ }
+ err = devlink_nl_sb_tc_pool_bind_fill(msg, devlink,
+ devlink_port,
+ devlink_sb,
+ tc_index,
+ DEVLINK_SB_POOL_TYPE_EGRESS,
+ DEVLINK_CMD_SB_TC_POOL_BIND_NEW,
+ portid, seq,
+ NLM_F_MULTI);
+ if (err)
+ return err;
+ (*p_idx)++;
+ }
+ }
+ return 0;
+}
+
+static int
+devlink_nl_cmd_sb_tc_pool_bind_get_dumpit(struct sk_buff *msg,
+ struct netlink_callback *cb)
+{
+ struct devlink *devlink;
+ struct devlink_sb *devlink_sb;
+ int start = cb->args[0];
+ int idx = 0;
+ int err;
+
+ mutex_lock(&devlink_mutex);
+ mutex_lock(&devlink_port_mutex);
+ list_for_each_entry(devlink, &devlink_list, list) {
+ if (!net_eq(devlink_net(devlink), sock_net(msg->sk)) ||
+ !devlink->ops || !devlink->ops->sb_tc_pool_bind_get)
+ continue;
+ list_for_each_entry(devlink_sb, &devlink->sb_list, list) {
+ err = __sb_tc_pool_bind_get_dumpit(msg, start, &idx,
+ devlink,
+ devlink_sb,
+ NETLINK_CB(cb->skb).portid,
+ cb->nlh->nlmsg_seq);
+ if (err && err != -EOPNOTSUPP)
+ goto out;
+ }
+ }
+out:
+ mutex_unlock(&devlink_port_mutex);
+ mutex_unlock(&devlink_mutex);
+
+ cb->args[0] = idx;
+ return msg->len;
+}
+
+static int devlink_sb_tc_pool_bind_set(struct devlink_port *devlink_port,
+ unsigned int sb_index, u16 tc_index,
+ enum devlink_sb_pool_type pool_type,
+ u16 pool_index, u32 threshold)
+
+{
+ const struct devlink_ops *ops = devlink_port->devlink->ops;
+
+ if (ops && ops->sb_tc_pool_bind_set)
+ return ops->sb_tc_pool_bind_set(devlink_port, sb_index,
+ tc_index, pool_type,
+ pool_index, threshold);
+ return -EOPNOTSUPP;
+}
+
+static int devlink_nl_cmd_sb_tc_pool_bind_set_doit(struct sk_buff *skb,
+ struct genl_info *info)
+{
+ struct devlink_port *devlink_port = info->user_ptr[0];
+ struct devlink_sb *devlink_sb = info->user_ptr[1];
+ enum devlink_sb_pool_type pool_type;
+ u16 tc_index;
+ u16 pool_index;
+ u32 threshold;
+ int err;
+
+ err = devlink_sb_pool_type_get_from_info(info, &pool_type);
+ if (err)
+ return err;
+
+ err = devlink_sb_tc_index_get_from_info(devlink_sb, info,
+ pool_type, &tc_index);
+ if (err)
+ return err;
+
+ err = devlink_sb_pool_index_get_from_info(devlink_sb, info,
+ &pool_index);
+ if (err)
+ return err;
+
+ if (!info->attrs[DEVLINK_ATTR_SB_THRESHOLD])
+ return -EINVAL;
+
+ threshold = nla_get_u32(info->attrs[DEVLINK_ATTR_SB_THRESHOLD]);
+ return devlink_sb_tc_pool_bind_set(devlink_port, devlink_sb->index,
+ tc_index, pool_type,
+ pool_index, threshold);
+}
+
+static int devlink_nl_cmd_sb_occ_snapshot_doit(struct sk_buff *skb,
+ struct genl_info *info)
+{
+ struct devlink *devlink = info->user_ptr[0];
+ struct devlink_sb *devlink_sb = info->user_ptr[1];
+ const struct devlink_ops *ops = devlink->ops;
+
+ if (ops && ops->sb_occ_snapshot)
+ return ops->sb_occ_snapshot(devlink, devlink_sb->index);
+ return -EOPNOTSUPP;
+}
+
+static int devlink_nl_cmd_sb_occ_max_clear_doit(struct sk_buff *skb,
+ struct genl_info *info)
+{
+ struct devlink *devlink = info->user_ptr[0];
+ struct devlink_sb *devlink_sb = info->user_ptr[1];
+ const struct devlink_ops *ops = devlink->ops;
+
+ if (ops && ops->sb_occ_max_clear)
+ return ops->sb_occ_max_clear(devlink, devlink_sb->index);
+ return -EOPNOTSUPP;
+}
+
static const struct nla_policy devlink_nl_policy[DEVLINK_ATTR_MAX + 1] = {
[DEVLINK_ATTR_BUS_NAME] = { .type = NLA_NUL_STRING },
[DEVLINK_ATTR_DEV_NAME] = { .type = NLA_NUL_STRING },
[DEVLINK_ATTR_PORT_INDEX] = { .type = NLA_U32 },
[DEVLINK_ATTR_PORT_TYPE] = { .type = NLA_U16 },
[DEVLINK_ATTR_PORT_SPLIT_COUNT] = { .type = NLA_U32 },
+ [DEVLINK_ATTR_SB_INDEX] = { .type = NLA_U32 },
+ [DEVLINK_ATTR_SB_POOL_INDEX] = { .type = NLA_U16 },
+ [DEVLINK_ATTR_SB_POOL_TYPE] = { .type = NLA_U8 },
+ [DEVLINK_ATTR_SB_POOL_SIZE] = { .type = NLA_U32 },
+ [DEVLINK_ATTR_SB_POOL_THRESHOLD_TYPE] = { .type = NLA_U8 },
+ [DEVLINK_ATTR_SB_THRESHOLD] = { .type = NLA_U32 },
+ [DEVLINK_ATTR_SB_TC_INDEX] = { .type = NLA_U16 },
};
static const struct genl_ops devlink_nl_ops[] = {
@@ -511,6 +1415,7 @@ static const struct genl_ops devlink_nl_ops[] = {
.doit = devlink_nl_cmd_get_doit,
.dumpit = devlink_nl_cmd_get_dumpit,
.policy = devlink_nl_policy,
+ .internal_flags = DEVLINK_NL_FLAG_NEED_DEVLINK,
/* can be retrieved by unprivileged users */
},
{
@@ -533,12 +1438,92 @@ static const struct genl_ops devlink_nl_ops[] = {
.doit = devlink_nl_cmd_port_split_doit,
.policy = devlink_nl_policy,
.flags = GENL_ADMIN_PERM,
+ .internal_flags = DEVLINK_NL_FLAG_NEED_DEVLINK,
},
{
.cmd = DEVLINK_CMD_PORT_UNSPLIT,
.doit = devlink_nl_cmd_port_unsplit_doit,
.policy = devlink_nl_policy,
.flags = GENL_ADMIN_PERM,
+ .internal_flags = DEVLINK_NL_FLAG_NEED_DEVLINK,
+ },
+ {
+ .cmd = DEVLINK_CMD_SB_GET,
+ .doit = devlink_nl_cmd_sb_get_doit,
+ .dumpit = devlink_nl_cmd_sb_get_dumpit,
+ .policy = devlink_nl_policy,
+ .internal_flags = DEVLINK_NL_FLAG_NEED_DEVLINK |
+ DEVLINK_NL_FLAG_NEED_SB,
+ /* can be retrieved by unprivileged users */
+ },
+ {
+ .cmd = DEVLINK_CMD_SB_POOL_GET,
+ .doit = devlink_nl_cmd_sb_pool_get_doit,
+ .dumpit = devlink_nl_cmd_sb_pool_get_dumpit,
+ .policy = devlink_nl_policy,
+ .internal_flags = DEVLINK_NL_FLAG_NEED_DEVLINK |
+ DEVLINK_NL_FLAG_NEED_SB,
+ /* can be retrieved by unprivileged users */
+ },
+ {
+ .cmd = DEVLINK_CMD_SB_POOL_SET,
+ .doit = devlink_nl_cmd_sb_pool_set_doit,
+ .policy = devlink_nl_policy,
+ .flags = GENL_ADMIN_PERM,
+ .internal_flags = DEVLINK_NL_FLAG_NEED_DEVLINK |
+ DEVLINK_NL_FLAG_NEED_SB,
+ },
+ {
+ .cmd = DEVLINK_CMD_SB_PORT_POOL_GET,
+ .doit = devlink_nl_cmd_sb_port_pool_get_doit,
+ .dumpit = devlink_nl_cmd_sb_port_pool_get_dumpit,
+ .policy = devlink_nl_policy,
+ .internal_flags = DEVLINK_NL_FLAG_NEED_PORT |
+ DEVLINK_NL_FLAG_NEED_SB,
+ /* can be retrieved by unprivileged users */
+ },
+ {
+ .cmd = DEVLINK_CMD_SB_PORT_POOL_SET,
+ .doit = devlink_nl_cmd_sb_port_pool_set_doit,
+ .policy = devlink_nl_policy,
+ .flags = GENL_ADMIN_PERM,
+ .internal_flags = DEVLINK_NL_FLAG_NEED_PORT |
+ DEVLINK_NL_FLAG_NEED_SB,
+ },
+ {
+ .cmd = DEVLINK_CMD_SB_TC_POOL_BIND_GET,
+ .doit = devlink_nl_cmd_sb_tc_pool_bind_get_doit,
+ .dumpit = devlink_nl_cmd_sb_tc_pool_bind_get_dumpit,
+ .policy = devlink_nl_policy,
+ .internal_flags = DEVLINK_NL_FLAG_NEED_PORT |
+ DEVLINK_NL_FLAG_NEED_SB,
+ /* can be retrieved by unprivileged users */
+ },
+ {
+ .cmd = DEVLINK_CMD_SB_TC_POOL_BIND_SET,
+ .doit = devlink_nl_cmd_sb_tc_pool_bind_set_doit,
+ .policy = devlink_nl_policy,
+ .flags = GENL_ADMIN_PERM,
+ .internal_flags = DEVLINK_NL_FLAG_NEED_PORT |
+ DEVLINK_NL_FLAG_NEED_SB,
+ },
+ {
+ .cmd = DEVLINK_CMD_SB_OCC_SNAPSHOT,
+ .doit = devlink_nl_cmd_sb_occ_snapshot_doit,
+ .policy = devlink_nl_policy,
+ .flags = GENL_ADMIN_PERM,
+ .internal_flags = DEVLINK_NL_FLAG_NEED_DEVLINK |
+ DEVLINK_NL_FLAG_NEED_SB |
+ DEVLINK_NL_FLAG_LOCK_PORTS,
+ },
+ {
+ .cmd = DEVLINK_CMD_SB_OCC_MAX_CLEAR,
+ .doit = devlink_nl_cmd_sb_occ_max_clear_doit,
+ .policy = devlink_nl_policy,
+ .flags = GENL_ADMIN_PERM,
+ .internal_flags = DEVLINK_NL_FLAG_NEED_DEVLINK |
+ DEVLINK_NL_FLAG_NEED_SB |
+ DEVLINK_NL_FLAG_LOCK_PORTS,
},
};
@@ -561,6 +1546,7 @@ struct devlink *devlink_alloc(const struct devlink_ops *ops, size_t priv_size)
devlink->ops = ops;
devlink_net_set(devlink, &init_net);
INIT_LIST_HEAD(&devlink->port_list);
+ INIT_LIST_HEAD(&devlink->sb_list);
return devlink;
}
EXPORT_SYMBOL_GPL(devlink_alloc);
@@ -630,7 +1616,6 @@ int devlink_port_register(struct devlink *devlink,
}
devlink_port->devlink = devlink;
devlink_port->index = port_index;
- devlink_port->type = DEVLINK_PORT_TYPE_NOTSET;
devlink_port->registered = true;
list_add_tail(&devlink_port->list, &devlink->port_list);
mutex_unlock(&devlink_port_mutex);
@@ -717,6 +1702,51 @@ void devlink_port_split_set(struct devlink_port *devlink_port,
}
EXPORT_SYMBOL_GPL(devlink_port_split_set);
+int devlink_sb_register(struct devlink *devlink, unsigned int sb_index,
+ u32 size, u16 ingress_pools_count,
+ u16 egress_pools_count, u16 ingress_tc_count,
+ u16 egress_tc_count)
+{
+ struct devlink_sb *devlink_sb;
+ int err = 0;
+
+ mutex_lock(&devlink_mutex);
+ if (devlink_sb_index_exists(devlink, sb_index)) {
+ err = -EEXIST;
+ goto unlock;
+ }
+
+ devlink_sb = kzalloc(sizeof(*devlink_sb), GFP_KERNEL);
+ if (!devlink_sb) {
+ err = -ENOMEM;
+ goto unlock;
+ }
+ devlink_sb->index = sb_index;
+ devlink_sb->size = size;
+ devlink_sb->ingress_pools_count = ingress_pools_count;
+ devlink_sb->egress_pools_count = egress_pools_count;
+ devlink_sb->ingress_tc_count = ingress_tc_count;
+ devlink_sb->egress_tc_count = egress_tc_count;
+ list_add_tail(&devlink_sb->list, &devlink->sb_list);
+unlock:
+ mutex_unlock(&devlink_mutex);
+ return err;
+}
+EXPORT_SYMBOL_GPL(devlink_sb_register);
+
+void devlink_sb_unregister(struct devlink *devlink, unsigned int sb_index)
+{
+ struct devlink_sb *devlink_sb;
+
+ mutex_lock(&devlink_mutex);
+ devlink_sb = devlink_sb_get_by_index(devlink, sb_index);
+ WARN_ON(!devlink_sb);
+ list_del(&devlink_sb->list);
+ mutex_unlock(&devlink_mutex);
+ kfree(devlink_sb);
+}
+EXPORT_SYMBOL_GPL(devlink_sb_unregister);
+
static int __init devlink_module_init(void)
{
return genl_register_family_with_ops_groups(&devlink_nl_family,
diff --git a/net/core/ethtool.c b/net/core/ethtool.c
index f426c5ad6..f4034817d 100644
--- a/net/core/ethtool.c
+++ b/net/core/ethtool.c
@@ -79,12 +79,16 @@ static const char netdev_features_strings[NETDEV_FEATURE_COUNT][ETH_GSTRING_LEN]
[NETIF_F_UFO_BIT] = "tx-udp-fragmentation",
[NETIF_F_GSO_ROBUST_BIT] = "tx-gso-robust",
[NETIF_F_TSO_ECN_BIT] = "tx-tcp-ecn-segmentation",
+ [NETIF_F_TSO_MANGLEID_BIT] = "tx-tcp-mangleid-segmentation",
[NETIF_F_TSO6_BIT] = "tx-tcp6-segmentation",
[NETIF_F_FSO_BIT] = "tx-fcoe-segmentation",
[NETIF_F_GSO_GRE_BIT] = "tx-gre-segmentation",
- [NETIF_F_GSO_IPIP_BIT] = "tx-ipip-segmentation",
- [NETIF_F_GSO_SIT_BIT] = "tx-sit-segmentation",
+ [NETIF_F_GSO_GRE_CSUM_BIT] = "tx-gre-csum-segmentation",
+ [NETIF_F_GSO_IPXIP4_BIT] = "tx-ipxip4-segmentation",
+ [NETIF_F_GSO_IPXIP6_BIT] = "tx-ipxip6-segmentation",
[NETIF_F_GSO_UDP_TUNNEL_BIT] = "tx-udp_tnl-segmentation",
+ [NETIF_F_GSO_UDP_TUNNEL_CSUM_BIT] = "tx-udp_tnl-csum-segmentation",
+ [NETIF_F_GSO_PARTIAL_BIT] = "tx-gso-partial",
[NETIF_F_FCOE_CRC_BIT] = "tx-checksum-fcoe-crc",
[NETIF_F_SCTP_CRC_BIT] = "tx-checksum-sctp",
@@ -387,15 +391,17 @@ static int __ethtool_set_flags(struct net_device *dev, u32 data)
return 0;
}
-static void convert_legacy_u32_to_link_mode(unsigned long *dst, u32 legacy_u32)
+void ethtool_convert_legacy_u32_to_link_mode(unsigned long *dst,
+ u32 legacy_u32)
{
bitmap_zero(dst, __ETHTOOL_LINK_MODE_MASK_NBITS);
dst[0] = legacy_u32;
}
+EXPORT_SYMBOL(ethtool_convert_legacy_u32_to_link_mode);
/* return false if src had higher bits set. lower bits always updated. */
-static bool convert_link_mode_to_legacy_u32(u32 *legacy_u32,
- const unsigned long *src)
+bool ethtool_convert_link_mode_to_legacy_u32(u32 *legacy_u32,
+ const unsigned long *src)
{
bool retval = true;
@@ -415,6 +421,7 @@ static bool convert_link_mode_to_legacy_u32(u32 *legacy_u32,
*legacy_u32 = src[0];
return retval;
}
+EXPORT_SYMBOL(ethtool_convert_link_mode_to_legacy_u32);
/* return false if legacy contained non-0 deprecated fields
* transceiver/maxtxpkt/maxrxpkt. rest of ksettings always updated
@@ -437,13 +444,13 @@ convert_legacy_settings_to_link_ksettings(
legacy_settings->maxrxpkt)
retval = false;
- convert_legacy_u32_to_link_mode(
+ ethtool_convert_legacy_u32_to_link_mode(
link_ksettings->link_modes.supported,
legacy_settings->supported);
- convert_legacy_u32_to_link_mode(
+ ethtool_convert_legacy_u32_to_link_mode(
link_ksettings->link_modes.advertising,
legacy_settings->advertising);
- convert_legacy_u32_to_link_mode(
+ ethtool_convert_legacy_u32_to_link_mode(
link_ksettings->link_modes.lp_advertising,
legacy_settings->lp_advertising);
link_ksettings->base.speed
@@ -482,13 +489,13 @@ convert_link_ksettings_to_legacy_settings(
* __u32 maxrxpkt;
*/
- retval &= convert_link_mode_to_legacy_u32(
+ retval &= ethtool_convert_link_mode_to_legacy_u32(
&legacy_settings->supported,
link_ksettings->link_modes.supported);
- retval &= convert_link_mode_to_legacy_u32(
+ retval &= ethtool_convert_link_mode_to_legacy_u32(
&legacy_settings->advertising,
link_ksettings->link_modes.advertising);
- retval &= convert_link_mode_to_legacy_u32(
+ retval &= ethtool_convert_link_mode_to_legacy_u32(
&legacy_settings->lp_advertising,
link_ksettings->link_modes.lp_advertising);
ethtool_cmd_speed_set(legacy_settings, link_ksettings->base.speed);
diff --git a/net/core/fib_rules.c b/net/core/fib_rules.c
index 365de6643..840acebbb 100644
--- a/net/core/fib_rules.c
+++ b/net/core/fib_rules.c
@@ -549,7 +549,7 @@ static inline size_t fib_rule_nlmsg_size(struct fib_rules_ops *ops,
+ nla_total_size(4) /* FRA_SUPPRESS_IFGROUP */
+ nla_total_size(4) /* FRA_FWMARK */
+ nla_total_size(4) /* FRA_FWMASK */
- + nla_total_size(8); /* FRA_TUN_ID */
+ + nla_total_size_64bit(8); /* FRA_TUN_ID */
if (ops->nlmsg_payload)
payload += ops->nlmsg_payload(rule);
@@ -607,7 +607,7 @@ static int fib_nl_fill_rule(struct sk_buff *skb, struct fib_rule *rule,
(rule->target &&
nla_put_u32(skb, FRA_GOTO, rule->target)) ||
(rule->tun_id &&
- nla_put_be64(skb, FRA_TUN_ID, rule->tun_id)))
+ nla_put_be64(skb, FRA_TUN_ID, rule->tun_id, FRA_PAD)))
goto nla_put_failure;
if (rule->suppress_ifgroup != -1) {
diff --git a/net/core/filter.c b/net/core/filter.c
index ca7f832b2..e759d90e8 100644
--- a/net/core/filter.c
+++ b/net/core/filter.c
@@ -53,9 +53,10 @@
#include <net/sock_reuseport.h>
/**
- * sk_filter - run a packet through a socket filter
+ * sk_filter_trim_cap - run a packet through a socket filter
* @sk: sock associated with &sk_buff
* @skb: buffer to filter
+ * @cap: limit on how short the eBPF program may trim the packet
*
* Run the eBPF program and then cut skb->data to correct size returned by
* the program. If pkt_len is 0 we toss packet. If skb->len is smaller
@@ -64,7 +65,7 @@
* be accepted or -EPERM if the packet should be tossed.
*
*/
-int sk_filter(struct sock *sk, struct sk_buff *skb)
+int sk_filter_trim_cap(struct sock *sk, struct sk_buff *skb, unsigned int cap)
{
int err;
struct sk_filter *filter;
@@ -85,14 +86,13 @@ int sk_filter(struct sock *sk, struct sk_buff *skb)
filter = rcu_dereference(sk->sk_filter);
if (filter) {
unsigned int pkt_len = bpf_prog_run_save_cb(filter->prog, skb);
-
- err = pkt_len ? pskb_trim(skb, pkt_len) : -EPERM;
+ err = pkt_len ? pskb_trim(skb, max(cap, pkt_len)) : -EPERM;
}
rcu_read_unlock();
return err;
}
-EXPORT_SYMBOL(sk_filter);
+EXPORT_SYMBOL(sk_filter_trim_cap);
static u64 __skb_get_pay_offset(u64 ctx, u64 a, u64 x, u64 r4, u64 r5)
{
@@ -994,7 +994,11 @@ static struct bpf_prog *bpf_migrate_filter(struct bpf_prog *fp)
*/
goto out_err_free;
- bpf_prog_select_runtime(fp);
+ /* We are guaranteed to never error here with cBPF to eBPF
+ * transitions, since there's no issue with type compatibility
+ * checks on program arrays.
+ */
+ fp = bpf_prog_select_runtime(fp, &err);
kfree(old_prog);
return fp;
@@ -1149,8 +1153,7 @@ void bpf_prog_destroy(struct bpf_prog *fp)
}
EXPORT_SYMBOL_GPL(bpf_prog_destroy);
-static int __sk_attach_prog(struct bpf_prog *prog, struct sock *sk,
- bool locked)
+static int __sk_attach_prog(struct bpf_prog *prog, struct sock *sk)
{
struct sk_filter *fp, *old_fp;
@@ -1166,8 +1169,10 @@ static int __sk_attach_prog(struct bpf_prog *prog, struct sock *sk,
return -ENOMEM;
}
- old_fp = rcu_dereference_protected(sk->sk_filter, locked);
+ old_fp = rcu_dereference_protected(sk->sk_filter,
+ lockdep_sock_is_held(sk));
rcu_assign_pointer(sk->sk_filter, fp);
+
if (old_fp)
sk_filter_uncharge(sk, old_fp);
@@ -1246,8 +1251,7 @@ struct bpf_prog *__get_filter(struct sock_fprog *fprog, struct sock *sk)
* occurs or there is insufficient memory for the filter a negative
* errno code is returned. On success the return is zero.
*/
-int __sk_attach_filter(struct sock_fprog *fprog, struct sock *sk,
- bool locked)
+int sk_attach_filter(struct sock_fprog *fprog, struct sock *sk)
{
struct bpf_prog *prog = __get_filter(fprog, sk);
int err;
@@ -1255,7 +1259,7 @@ int __sk_attach_filter(struct sock_fprog *fprog, struct sock *sk,
if (IS_ERR(prog))
return PTR_ERR(prog);
- err = __sk_attach_prog(prog, sk, locked);
+ err = __sk_attach_prog(prog, sk);
if (err < 0) {
__bpf_prog_release(prog);
return err;
@@ -1263,12 +1267,7 @@ int __sk_attach_filter(struct sock_fprog *fprog, struct sock *sk,
return 0;
}
-EXPORT_SYMBOL_GPL(__sk_attach_filter);
-
-int sk_attach_filter(struct sock_fprog *fprog, struct sock *sk)
-{
- return __sk_attach_filter(fprog, sk, sock_owned_by_user(sk));
-}
+EXPORT_SYMBOL_GPL(sk_attach_filter);
int sk_reuseport_attach_filter(struct sock_fprog *fprog, struct sock *sk)
{
@@ -1314,7 +1313,7 @@ int sk_attach_bpf(u32 ufd, struct sock *sk)
if (IS_ERR(prog))
return PTR_ERR(prog);
- err = __sk_attach_prog(prog, sk, sock_owned_by_user(sk));
+ err = __sk_attach_prog(prog, sk);
if (err < 0) {
bpf_prog_put(prog);
return err;
@@ -1349,6 +1348,21 @@ struct bpf_scratchpad {
static DEFINE_PER_CPU(struct bpf_scratchpad, bpf_sp);
+static inline int bpf_try_make_writable(struct sk_buff *skb,
+ unsigned int write_len)
+{
+ int err;
+
+ if (!skb_cloned(skb))
+ return 0;
+ if (skb_clone_writable(skb, write_len))
+ return 0;
+ err = pskb_expand_head(skb, 0, 0, GFP_ATOMIC);
+ if (!err)
+ bpf_compute_data_end(skb);
+ return err;
+}
+
static u64 bpf_skb_store_bytes(u64 r1, u64 r2, u64 r3, u64 r4, u64 flags)
{
struct bpf_scratchpad *sp = this_cpu_ptr(&bpf_sp);
@@ -1371,7 +1385,7 @@ static u64 bpf_skb_store_bytes(u64 r1, u64 r2, u64 r3, u64 r4, u64 flags)
*/
if (unlikely((u32) offset > 0xffff || len > sizeof(sp->buff)))
return -EFAULT;
- if (unlikely(skb_try_make_writable(skb, offset + len)))
+ if (unlikely(bpf_try_make_writable(skb, offset + len)))
return -EFAULT;
ptr = skb_header_pointer(skb, offset, len, sp->buff);
@@ -1414,16 +1428,19 @@ static u64 bpf_skb_load_bytes(u64 r1, u64 r2, u64 r3, u64 r4, u64 r5)
unsigned int len = (unsigned int) r4;
void *ptr;
- if (unlikely((u32) offset > 0xffff || len > MAX_BPF_STACK))
- return -EFAULT;
+ if (unlikely((u32) offset > 0xffff))
+ goto err_clear;
ptr = skb_header_pointer(skb, offset, len, to);
if (unlikely(!ptr))
- return -EFAULT;
+ goto err_clear;
if (ptr != to)
memcpy(to, ptr, len);
return 0;
+err_clear:
+ memset(to, 0, len);
+ return -EFAULT;
}
static const struct bpf_func_proto bpf_skb_load_bytes_proto = {
@@ -1432,7 +1449,7 @@ static const struct bpf_func_proto bpf_skb_load_bytes_proto = {
.ret_type = RET_INTEGER,
.arg1_type = ARG_PTR_TO_CTX,
.arg2_type = ARG_ANYTHING,
- .arg3_type = ARG_PTR_TO_STACK,
+ .arg3_type = ARG_PTR_TO_RAW_STACK,
.arg4_type = ARG_CONST_STACK_SIZE,
};
@@ -1446,7 +1463,7 @@ static u64 bpf_l3_csum_replace(u64 r1, u64 r2, u64 from, u64 to, u64 flags)
return -EINVAL;
if (unlikely((u32) offset > 0xffff))
return -EFAULT;
- if (unlikely(skb_try_make_writable(skb, offset + sizeof(sum))))
+ if (unlikely(bpf_try_make_writable(skb, offset + sizeof(sum))))
return -EFAULT;
ptr = skb_header_pointer(skb, offset, sizeof(sum), &sum);
@@ -1501,7 +1518,7 @@ static u64 bpf_l4_csum_replace(u64 r1, u64 r2, u64 from, u64 to, u64 flags)
return -EINVAL;
if (unlikely((u32) offset > 0xffff))
return -EFAULT;
- if (unlikely(skb_try_make_writable(skb, offset + sizeof(sum))))
+ if (unlikely(bpf_try_make_writable(skb, offset + sizeof(sum))))
return -EFAULT;
ptr = skb_header_pointer(skb, offset, sizeof(sum), &sum);
@@ -1701,12 +1718,15 @@ static u64 bpf_skb_vlan_push(u64 r1, u64 r2, u64 vlan_tci, u64 r4, u64 r5)
{
struct sk_buff *skb = (struct sk_buff *) (long) r1;
__be16 vlan_proto = (__force __be16) r2;
+ int ret;
if (unlikely(vlan_proto != htons(ETH_P_8021Q) &&
vlan_proto != htons(ETH_P_8021AD)))
vlan_proto = htons(ETH_P_8021Q);
- return skb_vlan_push(skb, vlan_proto, vlan_tci);
+ ret = skb_vlan_push(skb, vlan_proto, vlan_tci);
+ bpf_compute_data_end(skb);
+ return ret;
}
const struct bpf_func_proto bpf_skb_vlan_push_proto = {
@@ -1722,8 +1742,11 @@ EXPORT_SYMBOL_GPL(bpf_skb_vlan_push_proto);
static u64 bpf_skb_vlan_pop(u64 r1, u64 r2, u64 r3, u64 r4, u64 r5)
{
struct sk_buff *skb = (struct sk_buff *) (long) r1;
+ int ret;
- return skb_vlan_pop(skb);
+ ret = skb_vlan_pop(skb);
+ bpf_compute_data_end(skb);
+ return ret;
}
const struct bpf_func_proto bpf_skb_vlan_pop_proto = {
@@ -1761,12 +1784,19 @@ static u64 bpf_skb_get_tunnel_key(u64 r1, u64 r2, u64 size, u64 flags, u64 r5)
struct bpf_tunnel_key *to = (struct bpf_tunnel_key *) (long) r2;
const struct ip_tunnel_info *info = skb_tunnel_info(skb);
u8 compat[sizeof(struct bpf_tunnel_key)];
+ void *to_orig = to;
+ int err;
- if (unlikely(!info || (flags & ~(BPF_F_TUNINFO_IPV6))))
- return -EINVAL;
- if (ip_tunnel_info_af(info) != bpf_tunnel_key_af(flags))
- return -EPROTO;
+ if (unlikely(!info || (flags & ~(BPF_F_TUNINFO_IPV6)))) {
+ err = -EINVAL;
+ goto err_clear;
+ }
+ if (ip_tunnel_info_af(info) != bpf_tunnel_key_af(flags)) {
+ err = -EPROTO;
+ goto err_clear;
+ }
if (unlikely(size != sizeof(struct bpf_tunnel_key))) {
+ err = -EINVAL;
switch (size) {
case offsetof(struct bpf_tunnel_key, tunnel_label):
case offsetof(struct bpf_tunnel_key, tunnel_ext):
@@ -1776,12 +1806,12 @@ static u64 bpf_skb_get_tunnel_key(u64 r1, u64 r2, u64 size, u64 flags, u64 r5)
* a common path later on.
*/
if (ip_tunnel_info_af(info) != AF_INET)
- return -EINVAL;
+ goto err_clear;
set_compat:
to = (struct bpf_tunnel_key *)compat;
break;
default:
- return -EINVAL;
+ goto err_clear;
}
}
@@ -1798,9 +1828,12 @@ set_compat:
}
if (unlikely(size != sizeof(struct bpf_tunnel_key)))
- memcpy((void *)(long) r2, to, size);
+ memcpy(to_orig, to, size);
return 0;
+err_clear:
+ memset(to_orig, 0, size);
+ return err;
}
static const struct bpf_func_proto bpf_skb_get_tunnel_key_proto = {
@@ -1808,7 +1841,7 @@ static const struct bpf_func_proto bpf_skb_get_tunnel_key_proto = {
.gpl_only = false,
.ret_type = RET_INTEGER,
.arg1_type = ARG_PTR_TO_CTX,
- .arg2_type = ARG_PTR_TO_STACK,
+ .arg2_type = ARG_PTR_TO_RAW_STACK,
.arg3_type = ARG_CONST_STACK_SIZE,
.arg4_type = ARG_ANYTHING,
};
@@ -1818,16 +1851,26 @@ static u64 bpf_skb_get_tunnel_opt(u64 r1, u64 r2, u64 size, u64 r4, u64 r5)
struct sk_buff *skb = (struct sk_buff *) (long) r1;
u8 *to = (u8 *) (long) r2;
const struct ip_tunnel_info *info = skb_tunnel_info(skb);
+ int err;
if (unlikely(!info ||
- !(info->key.tun_flags & TUNNEL_OPTIONS_PRESENT)))
- return -ENOENT;
- if (unlikely(size < info->options_len))
- return -ENOMEM;
+ !(info->key.tun_flags & TUNNEL_OPTIONS_PRESENT))) {
+ err = -ENOENT;
+ goto err_clear;
+ }
+ if (unlikely(size < info->options_len)) {
+ err = -ENOMEM;
+ goto err_clear;
+ }
ip_tunnel_info_opts_get(to, info);
+ if (size > info->options_len)
+ memset(to + info->options_len, 0, size - info->options_len);
return info->options_len;
+err_clear:
+ memset(to, 0, size);
+ return err;
}
static const struct bpf_func_proto bpf_skb_get_tunnel_opt_proto = {
@@ -1835,7 +1878,7 @@ static const struct bpf_func_proto bpf_skb_get_tunnel_opt_proto = {
.gpl_only = false,
.ret_type = RET_INTEGER,
.arg1_type = ARG_PTR_TO_CTX,
- .arg2_type = ARG_PTR_TO_STACK,
+ .arg2_type = ARG_PTR_TO_RAW_STACK,
.arg3_type = ARG_CONST_STACK_SIZE,
};
@@ -2021,6 +2064,8 @@ tc_cls_act_func_proto(enum bpf_func_id func_id)
return &bpf_redirect_proto;
case BPF_FUNC_get_route_realm:
return &bpf_get_route_realm_proto;
+ case BPF_FUNC_perf_event_output:
+ return bpf_get_event_output_proto();
default:
return sk_filter_func_proto(func_id);
}
@@ -2028,31 +2073,32 @@ tc_cls_act_func_proto(enum bpf_func_id func_id)
static bool __is_valid_access(int off, int size, enum bpf_access_type type)
{
- /* check bounds */
if (off < 0 || off >= sizeof(struct __sk_buff))
return false;
-
- /* disallow misaligned access */
+ /* The verifier guarantees that size > 0. */
if (off % size != 0)
return false;
-
- /* all __sk_buff fields are __u32 */
- if (size != 4)
+ if (size != sizeof(__u32))
return false;
return true;
}
static bool sk_filter_is_valid_access(int off, int size,
- enum bpf_access_type type)
+ enum bpf_access_type type,
+ enum bpf_reg_type *reg_type)
{
- if (off == offsetof(struct __sk_buff, tc_classid))
+ switch (off) {
+ case offsetof(struct __sk_buff, tc_classid):
+ case offsetof(struct __sk_buff, data):
+ case offsetof(struct __sk_buff, data_end):
return false;
+ }
if (type == BPF_WRITE) {
switch (off) {
case offsetof(struct __sk_buff, cb[0]) ...
- offsetof(struct __sk_buff, cb[4]):
+ offsetof(struct __sk_buff, cb[4]):
break;
default:
return false;
@@ -2063,7 +2109,8 @@ static bool sk_filter_is_valid_access(int off, int size,
}
static bool tc_cls_act_is_valid_access(int off, int size,
- enum bpf_access_type type)
+ enum bpf_access_type type,
+ enum bpf_reg_type *reg_type)
{
if (type == BPF_WRITE) {
switch (off) {
@@ -2078,6 +2125,16 @@ static bool tc_cls_act_is_valid_access(int off, int size,
return false;
}
}
+
+ switch (off) {
+ case offsetof(struct __sk_buff, data):
+ *reg_type = PTR_TO_PACKET;
+ break;
+ case offsetof(struct __sk_buff, data_end):
+ *reg_type = PTR_TO_PACKET_END;
+ break;
+ }
+
return __is_valid_access(off, size, type);
}
@@ -2195,6 +2252,20 @@ static u32 bpf_net_convert_ctx_access(enum bpf_access_type type, int dst_reg,
*insn++ = BPF_LDX_MEM(BPF_H, dst_reg, src_reg, ctx_off);
break;
+ case offsetof(struct __sk_buff, data):
+ *insn++ = BPF_LDX_MEM(bytes_to_bpf_size(FIELD_SIZEOF(struct sk_buff, data)),
+ dst_reg, src_reg,
+ offsetof(struct sk_buff, data));
+ break;
+
+ case offsetof(struct __sk_buff, data_end):
+ ctx_off -= offsetof(struct __sk_buff, data_end);
+ ctx_off += offsetof(struct sk_buff, cb);
+ ctx_off += offsetof(struct bpf_skb_data_end, data_end);
+ *insn++ = BPF_LDX_MEM(bytes_to_bpf_size(sizeof(void *)),
+ dst_reg, src_reg, ctx_off);
+ break;
+
case offsetof(struct __sk_buff, tc_index):
#ifdef CONFIG_NET_SCHED
BUILD_BUG_ON(FIELD_SIZEOF(struct sk_buff, tc_index) != 2);
@@ -2219,30 +2290,30 @@ static u32 bpf_net_convert_ctx_access(enum bpf_access_type type, int dst_reg,
}
static const struct bpf_verifier_ops sk_filter_ops = {
- .get_func_proto = sk_filter_func_proto,
- .is_valid_access = sk_filter_is_valid_access,
- .convert_ctx_access = bpf_net_convert_ctx_access,
+ .get_func_proto = sk_filter_func_proto,
+ .is_valid_access = sk_filter_is_valid_access,
+ .convert_ctx_access = bpf_net_convert_ctx_access,
};
static const struct bpf_verifier_ops tc_cls_act_ops = {
- .get_func_proto = tc_cls_act_func_proto,
- .is_valid_access = tc_cls_act_is_valid_access,
- .convert_ctx_access = bpf_net_convert_ctx_access,
+ .get_func_proto = tc_cls_act_func_proto,
+ .is_valid_access = tc_cls_act_is_valid_access,
+ .convert_ctx_access = bpf_net_convert_ctx_access,
};
static struct bpf_prog_type_list sk_filter_type __read_mostly = {
- .ops = &sk_filter_ops,
- .type = BPF_PROG_TYPE_SOCKET_FILTER,
+ .ops = &sk_filter_ops,
+ .type = BPF_PROG_TYPE_SOCKET_FILTER,
};
static struct bpf_prog_type_list sched_cls_type __read_mostly = {
- .ops = &tc_cls_act_ops,
- .type = BPF_PROG_TYPE_SCHED_CLS,
+ .ops = &tc_cls_act_ops,
+ .type = BPF_PROG_TYPE_SCHED_CLS,
};
static struct bpf_prog_type_list sched_act_type __read_mostly = {
- .ops = &tc_cls_act_ops,
- .type = BPF_PROG_TYPE_SCHED_ACT,
+ .ops = &tc_cls_act_ops,
+ .type = BPF_PROG_TYPE_SCHED_ACT,
};
static int __init register_sk_filter_ops(void)
@@ -2255,7 +2326,7 @@ static int __init register_sk_filter_ops(void)
}
late_initcall(register_sk_filter_ops);
-int __sk_detach_filter(struct sock *sk, bool locked)
+int sk_detach_filter(struct sock *sk)
{
int ret = -ENOENT;
struct sk_filter *filter;
@@ -2263,7 +2334,8 @@ int __sk_detach_filter(struct sock *sk, bool locked)
if (sock_flag(sk, SOCK_FILTER_LOCKED))
return -EPERM;
- filter = rcu_dereference_protected(sk->sk_filter, locked);
+ filter = rcu_dereference_protected(sk->sk_filter,
+ lockdep_sock_is_held(sk));
if (filter) {
RCU_INIT_POINTER(sk->sk_filter, NULL);
sk_filter_uncharge(sk, filter);
@@ -2272,12 +2344,7 @@ int __sk_detach_filter(struct sock *sk, bool locked)
return ret;
}
-EXPORT_SYMBOL_GPL(__sk_detach_filter);
-
-int sk_detach_filter(struct sock *sk)
-{
- return __sk_detach_filter(sk, sock_owned_by_user(sk));
-}
+EXPORT_SYMBOL_GPL(sk_detach_filter);
int sk_get_filter(struct sock *sk, struct sock_filter __user *ubuf,
unsigned int len)
@@ -2288,7 +2355,7 @@ int sk_get_filter(struct sock *sk, struct sock_filter __user *ubuf,
lock_sock(sk);
filter = rcu_dereference_protected(sk->sk_filter,
- sock_owned_by_user(sk));
+ lockdep_sock_is_held(sk));
if (!filter)
goto out;
diff --git a/net/core/gen_stats.c b/net/core/gen_stats.c
index e640462ea..be873e4e3 100644
--- a/net/core/gen_stats.c
+++ b/net/core/gen_stats.c
@@ -25,9 +25,9 @@
static inline int
-gnet_stats_copy(struct gnet_dump *d, int type, void *buf, int size)
+gnet_stats_copy(struct gnet_dump *d, int type, void *buf, int size, int padattr)
{
- if (nla_put(d->skb, type, size, buf))
+ if (nla_put_64bit(d->skb, type, size, buf, padattr))
goto nla_put_failure;
return 0;
@@ -47,6 +47,7 @@ nla_put_failure:
* @xstats_type: TLV type for backward compatibility xstats TLV
* @lock: statistics lock
* @d: dumping handle
+ * @padattr: padding attribute
*
* Initializes the dumping handle, grabs the statistic lock and appends
* an empty TLV header to the socket buffer for use a container for all
@@ -59,7 +60,8 @@ nla_put_failure:
*/
int
gnet_stats_start_copy_compat(struct sk_buff *skb, int type, int tc_stats_type,
- int xstats_type, spinlock_t *lock, struct gnet_dump *d)
+ int xstats_type, spinlock_t *lock,
+ struct gnet_dump *d, int padattr)
__acquires(lock)
{
memset(d, 0, sizeof(*d));
@@ -71,20 +73,22 @@ gnet_stats_start_copy_compat(struct sk_buff *skb, int type, int tc_stats_type,
d->skb = skb;
d->compat_tc_stats = tc_stats_type;
d->compat_xstats = xstats_type;
+ d->padattr = padattr;
if (d->tail)
- return gnet_stats_copy(d, type, NULL, 0);
+ return gnet_stats_copy(d, type, NULL, 0, padattr);
return 0;
}
EXPORT_SYMBOL(gnet_stats_start_copy_compat);
/**
- * gnet_stats_start_copy_compat - start dumping procedure in compatibility mode
+ * gnet_stats_start_copy - start dumping procedure in compatibility mode
* @skb: socket buffer to put statistics TLVs into
* @type: TLV type for top level statistic TLV
* @lock: statistics lock
* @d: dumping handle
+ * @padattr: padding attribute
*
* Initializes the dumping handle, grabs the statistic lock and appends
* an empty TLV header to the socket buffer for use a container for all
@@ -94,9 +98,9 @@ EXPORT_SYMBOL(gnet_stats_start_copy_compat);
*/
int
gnet_stats_start_copy(struct sk_buff *skb, int type, spinlock_t *lock,
- struct gnet_dump *d)
+ struct gnet_dump *d, int padattr)
{
- return gnet_stats_start_copy_compat(skb, type, 0, 0, lock, d);
+ return gnet_stats_start_copy_compat(skb, type, 0, 0, lock, d, padattr);
}
EXPORT_SYMBOL(gnet_stats_start_copy);
@@ -169,7 +173,8 @@ gnet_stats_copy_basic(struct gnet_dump *d,
memset(&sb, 0, sizeof(sb));
sb.bytes = bstats.bytes;
sb.packets = bstats.packets;
- return gnet_stats_copy(d, TCA_STATS_BASIC, &sb, sizeof(sb));
+ return gnet_stats_copy(d, TCA_STATS_BASIC, &sb, sizeof(sb),
+ TCA_STATS_PAD);
}
return 0;
}
@@ -208,11 +213,13 @@ gnet_stats_copy_rate_est(struct gnet_dump *d,
}
if (d->tail) {
- res = gnet_stats_copy(d, TCA_STATS_RATE_EST, &est, sizeof(est));
+ res = gnet_stats_copy(d, TCA_STATS_RATE_EST, &est, sizeof(est),
+ TCA_STATS_PAD);
if (res < 0 || est.bps == r->bps)
return res;
/* emit 64bit stats only if needed */
- return gnet_stats_copy(d, TCA_STATS_RATE_EST64, r, sizeof(*r));
+ return gnet_stats_copy(d, TCA_STATS_RATE_EST64, r, sizeof(*r),
+ TCA_STATS_PAD);
}
return 0;
@@ -286,7 +293,8 @@ gnet_stats_copy_queue(struct gnet_dump *d,
if (d->tail)
return gnet_stats_copy(d, TCA_STATS_QUEUE,
- &qstats, sizeof(qstats));
+ &qstats, sizeof(qstats),
+ TCA_STATS_PAD);
return 0;
}
@@ -316,7 +324,8 @@ gnet_stats_copy_app(struct gnet_dump *d, void *st, int len)
}
if (d->tail)
- return gnet_stats_copy(d, TCA_STATS_APP, st, len);
+ return gnet_stats_copy(d, TCA_STATS_APP, st, len,
+ TCA_STATS_PAD);
return 0;
@@ -347,12 +356,12 @@ gnet_stats_finish_copy(struct gnet_dump *d)
if (d->compat_tc_stats)
if (gnet_stats_copy(d, d->compat_tc_stats, &d->tc_stats,
- sizeof(d->tc_stats)) < 0)
+ sizeof(d->tc_stats), d->padattr) < 0)
return -1;
if (d->compat_xstats && d->xstats) {
if (gnet_stats_copy(d, d->compat_xstats, d->xstats,
- d->xstats_len) < 0)
+ d->xstats_len, d->padattr) < 0)
return -1;
}
diff --git a/net/core/neighbour.c b/net/core/neighbour.c
index 769cece9b..510cd62fc 100644
--- a/net/core/neighbour.c
+++ b/net/core/neighbour.c
@@ -1763,21 +1763,22 @@ static int neightbl_fill_parms(struct sk_buff *skb, struct neigh_parms *parms)
NEIGH_VAR(parms, MCAST_PROBES)) ||
nla_put_u32(skb, NDTPA_MCAST_REPROBES,
NEIGH_VAR(parms, MCAST_REPROBES)) ||
- nla_put_msecs(skb, NDTPA_REACHABLE_TIME, parms->reachable_time) ||
+ nla_put_msecs(skb, NDTPA_REACHABLE_TIME, parms->reachable_time,
+ NDTPA_PAD) ||
nla_put_msecs(skb, NDTPA_BASE_REACHABLE_TIME,
- NEIGH_VAR(parms, BASE_REACHABLE_TIME)) ||
+ NEIGH_VAR(parms, BASE_REACHABLE_TIME), NDTPA_PAD) ||
nla_put_msecs(skb, NDTPA_GC_STALETIME,
- NEIGH_VAR(parms, GC_STALETIME)) ||
+ NEIGH_VAR(parms, GC_STALETIME), NDTPA_PAD) ||
nla_put_msecs(skb, NDTPA_DELAY_PROBE_TIME,
- NEIGH_VAR(parms, DELAY_PROBE_TIME)) ||
+ NEIGH_VAR(parms, DELAY_PROBE_TIME), NDTPA_PAD) ||
nla_put_msecs(skb, NDTPA_RETRANS_TIME,
- NEIGH_VAR(parms, RETRANS_TIME)) ||
+ NEIGH_VAR(parms, RETRANS_TIME), NDTPA_PAD) ||
nla_put_msecs(skb, NDTPA_ANYCAST_DELAY,
- NEIGH_VAR(parms, ANYCAST_DELAY)) ||
+ NEIGH_VAR(parms, ANYCAST_DELAY), NDTPA_PAD) ||
nla_put_msecs(skb, NDTPA_PROXY_DELAY,
- NEIGH_VAR(parms, PROXY_DELAY)) ||
+ NEIGH_VAR(parms, PROXY_DELAY), NDTPA_PAD) ||
nla_put_msecs(skb, NDTPA_LOCKTIME,
- NEIGH_VAR(parms, LOCKTIME)))
+ NEIGH_VAR(parms, LOCKTIME), NDTPA_PAD))
goto nla_put_failure;
return nla_nest_end(skb, nest);
@@ -1804,7 +1805,7 @@ static int neightbl_fill_info(struct sk_buff *skb, struct neigh_table *tbl,
ndtmsg->ndtm_pad2 = 0;
if (nla_put_string(skb, NDTA_NAME, tbl->id) ||
- nla_put_msecs(skb, NDTA_GC_INTERVAL, tbl->gc_interval) ||
+ nla_put_msecs(skb, NDTA_GC_INTERVAL, tbl->gc_interval, NDTA_PAD) ||
nla_put_u32(skb, NDTA_THRESH1, tbl->gc_thresh1) ||
nla_put_u32(skb, NDTA_THRESH2, tbl->gc_thresh2) ||
nla_put_u32(skb, NDTA_THRESH3, tbl->gc_thresh3))
@@ -1856,7 +1857,8 @@ static int neightbl_fill_info(struct sk_buff *skb, struct neigh_table *tbl,
ndst.ndts_table_fulls += st->table_fulls;
}
- if (nla_put(skb, NDTA_STATS, sizeof(ndst), &ndst))
+ if (nla_put_64bit(skb, NDTA_STATS, sizeof(ndst), &ndst,
+ NDTA_PAD))
goto nla_put_failure;
}
diff --git a/net/core/net-procfs.c b/net/core/net-procfs.c
index 2bf832996..14d09345f 100644
--- a/net/core/net-procfs.c
+++ b/net/core/net-procfs.c
@@ -162,7 +162,8 @@ static int softnet_seq_show(struct seq_file *seq, void *v)
"%08x %08x %08x %08x %08x %08x %08x %08x %08x %08x %08x\n",
sd->processed, sd->dropped, sd->time_squeeze, 0,
0, 0, 0, 0, /* was fastroute */
- sd->cpu_collision, sd->received_rps, flow_limit_count);
+ 0, /* was cpu_collision */
+ sd->received_rps, flow_limit_count);
return 0;
}
diff --git a/net/core/net-sysfs.c b/net/core/net-sysfs.c
index 2b3f76fe6..7a0b61655 100644
--- a/net/core/net-sysfs.c
+++ b/net/core/net-sysfs.c
@@ -24,6 +24,7 @@
#include <linux/jiffies.h>
#include <linux/pm_runtime.h>
#include <linux/of.h>
+#include <linux/of_net.h>
#include "net-sysfs.h"
diff --git a/net/core/pktgen.c b/net/core/pktgen.c
index 20999aa59..8b02df0d3 100644
--- a/net/core/pktgen.c
+++ b/net/core/pktgen.c
@@ -2245,10 +2245,8 @@ static void spin(struct pktgen_dev *pkt_dev, ktime_t spin_until)
hrtimer_set_expires(&t.timer, spin_until);
remaining = ktime_to_ns(hrtimer_expires_remaining(&t.timer));
- if (remaining <= 0) {
- pkt_dev->next_tx = ktime_add_ns(spin_until, pkt_dev->delay);
- return;
- }
+ if (remaining <= 0)
+ goto out;
start_time = ktime_get();
if (remaining < 100000) {
@@ -2273,7 +2271,9 @@ static void spin(struct pktgen_dev *pkt_dev, ktime_t spin_until)
}
pkt_dev->idle_acc += ktime_to_ns(ktime_sub(end_time, start_time));
+out:
pkt_dev->next_tx = ktime_add_ns(spin_until, pkt_dev->delay);
+ destroy_hrtimer_on_stack(&t.timer);
}
static inline void set_pkt_overhead(struct pktgen_dev *pkt_dev)
@@ -3472,7 +3472,6 @@ xmit_more:
pkt_dev->odevname, ret);
pkt_dev->errors++;
/* fallthru */
- case NETDEV_TX_LOCKED:
case NETDEV_TX_BUSY:
/* Retry it next time */
atomic_dec(&(pkt_dev->skb->users));
diff --git a/net/core/rtnetlink.c b/net/core/rtnetlink.c
index 65763c29f..d69c4644f 100644
--- a/net/core/rtnetlink.c
+++ b/net/core/rtnetlink.c
@@ -808,11 +808,6 @@ static void copy_rtnl_link_stats(struct rtnl_link_stats *a,
a->rx_nohandler = b->rx_nohandler;
}
-static void copy_rtnl_link_stats64(void *v, const struct rtnl_link_stats64 *b)
-{
- memcpy(v, b, sizeof(*b));
-}
-
/* All VF info */
static inline int rtnl_vfinfo_size(const struct net_device *dev,
u32 ext_filter_mask)
@@ -830,17 +825,17 @@ static inline int rtnl_vfinfo_size(const struct net_device *dev,
nla_total_size(sizeof(struct ifla_vf_link_state)) +
nla_total_size(sizeof(struct ifla_vf_rss_query_en)) +
/* IFLA_VF_STATS_RX_PACKETS */
- nla_total_size(sizeof(__u64)) +
+ nla_total_size_64bit(sizeof(__u64)) +
/* IFLA_VF_STATS_TX_PACKETS */
- nla_total_size(sizeof(__u64)) +
+ nla_total_size_64bit(sizeof(__u64)) +
/* IFLA_VF_STATS_RX_BYTES */
- nla_total_size(sizeof(__u64)) +
+ nla_total_size_64bit(sizeof(__u64)) +
/* IFLA_VF_STATS_TX_BYTES */
- nla_total_size(sizeof(__u64)) +
+ nla_total_size_64bit(sizeof(__u64)) +
/* IFLA_VF_STATS_BROADCAST */
- nla_total_size(sizeof(__u64)) +
+ nla_total_size_64bit(sizeof(__u64)) +
/* IFLA_VF_STATS_MULTICAST */
- nla_total_size(sizeof(__u64)) +
+ nla_total_size_64bit(sizeof(__u64)) +
nla_total_size(sizeof(struct ifla_vf_trust)));
return size;
} else
@@ -881,9 +876,9 @@ static noinline size_t if_nlmsg_size(const struct net_device *dev,
+ nla_total_size(IFNAMSIZ) /* IFLA_IFNAME */
+ nla_total_size(IFALIASZ) /* IFLA_IFALIAS */
+ nla_total_size(IFNAMSIZ) /* IFLA_QDISC */
- + nla_total_size(sizeof(struct rtnl_link_ifmap))
+ + nla_total_size_64bit(sizeof(struct rtnl_link_ifmap))
+ nla_total_size(sizeof(struct rtnl_link_stats))
- + nla_total_size(sizeof(struct rtnl_link_stats64))
+ + nla_total_size_64bit(sizeof(struct rtnl_link_stats64))
+ nla_total_size(MAX_ADDR_LEN) /* IFLA_ADDRESS */
+ nla_total_size(MAX_ADDR_LEN) /* IFLA_BROADCAST */
+ nla_total_size(4) /* IFLA_TXQLEN */
@@ -1054,25 +1049,23 @@ static int rtnl_phys_switch_id_fill(struct sk_buff *skb, struct net_device *dev)
static noinline_for_stack int rtnl_fill_stats(struct sk_buff *skb,
struct net_device *dev)
{
- const struct rtnl_link_stats64 *stats;
- struct rtnl_link_stats64 temp;
+ struct rtnl_link_stats64 *sp;
struct nlattr *attr;
- stats = dev_get_stats(dev, &temp);
-
- attr = nla_reserve(skb, IFLA_STATS,
- sizeof(struct rtnl_link_stats));
+ attr = nla_reserve_64bit(skb, IFLA_STATS64,
+ sizeof(struct rtnl_link_stats64), IFLA_PAD);
if (!attr)
return -EMSGSIZE;
- copy_rtnl_link_stats(nla_data(attr), stats);
+ sp = nla_data(attr);
+ dev_get_stats(dev, sp);
- attr = nla_reserve(skb, IFLA_STATS64,
- sizeof(struct rtnl_link_stats64));
+ attr = nla_reserve(skb, IFLA_STATS,
+ sizeof(struct rtnl_link_stats));
if (!attr)
return -EMSGSIZE;
- copy_rtnl_link_stats64(nla_data(attr), stats);
+ copy_rtnl_link_stats(nla_data(attr), sp);
return 0;
}
@@ -1160,18 +1153,18 @@ static noinline_for_stack int rtnl_fill_vfinfo(struct sk_buff *skb,
nla_nest_cancel(skb, vfinfo);
return -EMSGSIZE;
}
- if (nla_put_u64(skb, IFLA_VF_STATS_RX_PACKETS,
- vf_stats.rx_packets) ||
- nla_put_u64(skb, IFLA_VF_STATS_TX_PACKETS,
- vf_stats.tx_packets) ||
- nla_put_u64(skb, IFLA_VF_STATS_RX_BYTES,
- vf_stats.rx_bytes) ||
- nla_put_u64(skb, IFLA_VF_STATS_TX_BYTES,
- vf_stats.tx_bytes) ||
- nla_put_u64(skb, IFLA_VF_STATS_BROADCAST,
- vf_stats.broadcast) ||
- nla_put_u64(skb, IFLA_VF_STATS_MULTICAST,
- vf_stats.multicast))
+ if (nla_put_u64_64bit(skb, IFLA_VF_STATS_RX_PACKETS,
+ vf_stats.rx_packets, IFLA_VF_STATS_PAD) ||
+ nla_put_u64_64bit(skb, IFLA_VF_STATS_TX_PACKETS,
+ vf_stats.tx_packets, IFLA_VF_STATS_PAD) ||
+ nla_put_u64_64bit(skb, IFLA_VF_STATS_RX_BYTES,
+ vf_stats.rx_bytes, IFLA_VF_STATS_PAD) ||
+ nla_put_u64_64bit(skb, IFLA_VF_STATS_TX_BYTES,
+ vf_stats.tx_bytes, IFLA_VF_STATS_PAD) ||
+ nla_put_u64_64bit(skb, IFLA_VF_STATS_BROADCAST,
+ vf_stats.broadcast, IFLA_VF_STATS_PAD) ||
+ nla_put_u64_64bit(skb, IFLA_VF_STATS_MULTICAST,
+ vf_stats.multicast, IFLA_VF_STATS_PAD))
return -EMSGSIZE;
nla_nest_end(skb, vfstats);
nla_nest_end(skb, vf);
@@ -1190,7 +1183,7 @@ static int rtnl_fill_link_ifmap(struct sk_buff *skb, struct net_device *dev)
map.dma = dev->dma;
map.port = dev->if_port;
- if (nla_put(skb, IFLA_MAP, sizeof(map), &map))
+ if (nla_put_64bit(skb, IFLA_MAP, sizeof(map), &map, IFLA_PAD))
return -EMSGSIZE;
return 0;
@@ -3453,6 +3446,202 @@ out:
return err;
}
+static bool stats_attr_valid(unsigned int mask, int attrid, int idxattr)
+{
+ return (mask & IFLA_STATS_FILTER_BIT(attrid)) &&
+ (!idxattr || idxattr == attrid);
+}
+
+static int rtnl_fill_statsinfo(struct sk_buff *skb, struct net_device *dev,
+ int type, u32 pid, u32 seq, u32 change,
+ unsigned int flags, unsigned int filter_mask,
+ int *idxattr, int *prividx)
+{
+ struct if_stats_msg *ifsm;
+ struct nlmsghdr *nlh;
+ struct nlattr *attr;
+ int s_prividx = *prividx;
+
+ ASSERT_RTNL();
+
+ nlh = nlmsg_put(skb, pid, seq, type, sizeof(*ifsm), flags);
+ if (!nlh)
+ return -EMSGSIZE;
+
+ ifsm = nlmsg_data(nlh);
+ ifsm->ifindex = dev->ifindex;
+ ifsm->filter_mask = filter_mask;
+
+ if (stats_attr_valid(filter_mask, IFLA_STATS_LINK_64, *idxattr)) {
+ struct rtnl_link_stats64 *sp;
+
+ attr = nla_reserve_64bit(skb, IFLA_STATS_LINK_64,
+ sizeof(struct rtnl_link_stats64),
+ IFLA_STATS_UNSPEC);
+ if (!attr)
+ goto nla_put_failure;
+
+ sp = nla_data(attr);
+ dev_get_stats(dev, sp);
+ }
+
+ if (stats_attr_valid(filter_mask, IFLA_STATS_LINK_XSTATS, *idxattr)) {
+ const struct rtnl_link_ops *ops = dev->rtnl_link_ops;
+
+ if (ops && ops->fill_linkxstats) {
+ int err;
+
+ *idxattr = IFLA_STATS_LINK_XSTATS;
+ attr = nla_nest_start(skb,
+ IFLA_STATS_LINK_XSTATS);
+ if (!attr)
+ goto nla_put_failure;
+
+ err = ops->fill_linkxstats(skb, dev, prividx);
+ nla_nest_end(skb, attr);
+ if (err)
+ goto nla_put_failure;
+ *idxattr = 0;
+ }
+ }
+
+ nlmsg_end(skb, nlh);
+
+ return 0;
+
+nla_put_failure:
+ /* not a multi message or no progress mean a real error */
+ if (!(flags & NLM_F_MULTI) || s_prividx == *prividx)
+ nlmsg_cancel(skb, nlh);
+ else
+ nlmsg_end(skb, nlh);
+
+ return -EMSGSIZE;
+}
+
+static const struct nla_policy ifla_stats_policy[IFLA_STATS_MAX + 1] = {
+ [IFLA_STATS_LINK_64] = { .len = sizeof(struct rtnl_link_stats64) },
+};
+
+static size_t if_nlmsg_stats_size(const struct net_device *dev,
+ u32 filter_mask)
+{
+ size_t size = 0;
+
+ if (stats_attr_valid(filter_mask, IFLA_STATS_LINK_64, 0))
+ size += nla_total_size_64bit(sizeof(struct rtnl_link_stats64));
+
+ if (stats_attr_valid(filter_mask, IFLA_STATS_LINK_XSTATS, 0)) {
+ const struct rtnl_link_ops *ops = dev->rtnl_link_ops;
+
+ if (ops && ops->get_linkxstats_size) {
+ size += nla_total_size(ops->get_linkxstats_size(dev));
+ /* for IFLA_STATS_LINK_XSTATS */
+ size += nla_total_size(0);
+ }
+ }
+
+ return size;
+}
+
+static int rtnl_stats_get(struct sk_buff *skb, struct nlmsghdr *nlh)
+{
+ struct net *net = sock_net(skb->sk);
+ struct net_device *dev = NULL;
+ int idxattr = 0, prividx = 0;
+ struct if_stats_msg *ifsm;
+ struct sk_buff *nskb;
+ u32 filter_mask;
+ int err;
+
+ ifsm = nlmsg_data(nlh);
+ if (ifsm->ifindex > 0)
+ dev = __dev_get_by_index(net, ifsm->ifindex);
+ else
+ return -EINVAL;
+
+ if (!dev)
+ return -ENODEV;
+
+ filter_mask = ifsm->filter_mask;
+ if (!filter_mask)
+ return -EINVAL;
+
+ nskb = nlmsg_new(if_nlmsg_stats_size(dev, filter_mask), GFP_KERNEL);
+ if (!nskb)
+ return -ENOBUFS;
+
+ err = rtnl_fill_statsinfo(nskb, dev, RTM_NEWSTATS,
+ NETLINK_CB(skb).portid, nlh->nlmsg_seq, 0,
+ 0, filter_mask, &idxattr, &prividx);
+ if (err < 0) {
+ /* -EMSGSIZE implies BUG in if_nlmsg_stats_size */
+ WARN_ON(err == -EMSGSIZE);
+ kfree_skb(nskb);
+ } else {
+ err = rtnl_unicast(nskb, net, NETLINK_CB(skb).portid);
+ }
+
+ return err;
+}
+
+static int rtnl_stats_dump(struct sk_buff *skb, struct netlink_callback *cb)
+{
+ int h, s_h, err, s_idx, s_idxattr, s_prividx;
+ struct net *net = sock_net(skb->sk);
+ unsigned int flags = NLM_F_MULTI;
+ struct if_stats_msg *ifsm;
+ struct hlist_head *head;
+ struct net_device *dev;
+ u32 filter_mask = 0;
+ int idx = 0;
+
+ s_h = cb->args[0];
+ s_idx = cb->args[1];
+ s_idxattr = cb->args[2];
+ s_prividx = cb->args[3];
+
+ cb->seq = net->dev_base_seq;
+
+ ifsm = nlmsg_data(cb->nlh);
+ filter_mask = ifsm->filter_mask;
+ if (!filter_mask)
+ return -EINVAL;
+
+ for (h = s_h; h < NETDEV_HASHENTRIES; h++, s_idx = 0) {
+ idx = 0;
+ head = &net->dev_index_head[h];
+ hlist_for_each_entry(dev, head, index_hlist) {
+ if (idx < s_idx)
+ goto cont;
+ err = rtnl_fill_statsinfo(skb, dev, RTM_NEWSTATS,
+ NETLINK_CB(cb->skb).portid,
+ cb->nlh->nlmsg_seq, 0,
+ flags, filter_mask,
+ &s_idxattr, &s_prividx);
+ /* If we ran out of room on the first message,
+ * we're in trouble
+ */
+ WARN_ON((err == -EMSGSIZE) && (skb->len == 0));
+
+ if (err < 0)
+ goto out;
+ s_prividx = 0;
+ s_idxattr = 0;
+ nl_dump_check_consistent(cb, nlmsg_hdr(skb));
+cont:
+ idx++;
+ }
+ }
+out:
+ cb->args[3] = s_prividx;
+ cb->args[2] = s_idxattr;
+ cb->args[1] = idx;
+ cb->args[0] = h;
+
+ return skb->len;
+}
+
/* Process one rtnetlink message. */
static int rtnetlink_rcv_msg(struct sk_buff *skb, struct nlmsghdr *nlh)
@@ -3602,4 +3791,7 @@ void __init rtnetlink_init(void)
rtnl_register(PF_BRIDGE, RTM_GETLINK, NULL, rtnl_bridge_getlink, NULL);
rtnl_register(PF_BRIDGE, RTM_DELLINK, rtnl_bridge_dellink, NULL, NULL);
rtnl_register(PF_BRIDGE, RTM_SETLINK, rtnl_bridge_setlink, NULL, NULL);
+
+ rtnl_register(PF_UNSPEC, RTM_GETSTATS, rtnl_stats_get, rtnl_stats_dump,
+ NULL);
}
diff --git a/net/core/skbuff.c b/net/core/skbuff.c
index 59bf4d771..eb12d2161 100644
--- a/net/core/skbuff.c
+++ b/net/core/skbuff.c
@@ -3058,11 +3058,11 @@ struct sk_buff *skb_segment(struct sk_buff *head_skb,
struct sk_buff *frag_skb = head_skb;
unsigned int offset = doffset;
unsigned int tnl_hlen = skb_tnl_header_len(head_skb);
+ unsigned int partial_segs = 0;
unsigned int headroom;
- unsigned int len;
+ unsigned int len = head_skb->len;
__be16 proto;
- bool csum;
- int sg = !!(features & NETIF_F_SG);
+ bool csum, sg;
int nfrags = skb_shinfo(head_skb)->nr_frags;
int err = -ENOMEM;
int i = 0;
@@ -3074,8 +3074,21 @@ struct sk_buff *skb_segment(struct sk_buff *head_skb,
if (unlikely(!proto))
return ERR_PTR(-EINVAL);
+ sg = !!(features & NETIF_F_SG);
csum = !!can_checksum_protocol(features, proto);
+ /* GSO partial only requires that we trim off any excess that
+ * doesn't fit into an MSS sized block, so take care of that
+ * now.
+ */
+ if (sg && csum && (features & NETIF_F_GSO_PARTIAL)) {
+ partial_segs = len / mss;
+ if (partial_segs > 1)
+ mss *= partial_segs;
+ else
+ partial_segs = 0;
+ }
+
headroom = skb_headroom(head_skb);
pos = skb_headlen(head_skb);
@@ -3263,6 +3276,23 @@ perform_csum_check:
*/
segs->prev = tail;
+ /* Update GSO info on first skb in partial sequence. */
+ if (partial_segs) {
+ int type = skb_shinfo(head_skb)->gso_type;
+
+ /* Update type to add partial and then remove dodgy if set */
+ type |= SKB_GSO_PARTIAL;
+ type &= ~SKB_GSO_DODGY;
+
+ /* Update GSO info and prepare to start updating headers on
+ * our way back down the stack of protocols.
+ */
+ skb_shinfo(segs)->gso_size = skb_shinfo(head_skb)->gso_size;
+ skb_shinfo(segs)->gso_segs = partial_segs;
+ skb_shinfo(segs)->gso_type = type;
+ SKB_GSO_CB(segs)->data_offset = skb_headroom(segs) + doffset;
+ }
+
/* Following permits correct backpressure, for protocols
* using skb_set_owner_w().
* Idea is to tranfert ownership from head_skb to last segment.
@@ -4577,3 +4607,239 @@ failure:
return NULL;
}
EXPORT_SYMBOL(alloc_skb_with_frags);
+
+/* carve out the first off bytes from skb when off < headlen */
+static int pskb_carve_inside_header(struct sk_buff *skb, const u32 off,
+ const int headlen, gfp_t gfp_mask)
+{
+ int i;
+ int size = skb_end_offset(skb);
+ int new_hlen = headlen - off;
+ u8 *data;
+
+ size = SKB_DATA_ALIGN(size);
+
+ if (skb_pfmemalloc(skb))
+ gfp_mask |= __GFP_MEMALLOC;
+ data = kmalloc_reserve(size +
+ SKB_DATA_ALIGN(sizeof(struct skb_shared_info)),
+ gfp_mask, NUMA_NO_NODE, NULL);
+ if (!data)
+ return -ENOMEM;
+
+ size = SKB_WITH_OVERHEAD(ksize(data));
+
+ /* Copy real data, and all frags */
+ skb_copy_from_linear_data_offset(skb, off, data, new_hlen);
+ skb->len -= off;
+
+ memcpy((struct skb_shared_info *)(data + size),
+ skb_shinfo(skb),
+ offsetof(struct skb_shared_info,
+ frags[skb_shinfo(skb)->nr_frags]));
+ if (skb_cloned(skb)) {
+ /* drop the old head gracefully */
+ if (skb_orphan_frags(skb, gfp_mask)) {
+ kfree(data);
+ return -ENOMEM;
+ }
+ for (i = 0; i < skb_shinfo(skb)->nr_frags; i++)
+ skb_frag_ref(skb, i);
+ if (skb_has_frag_list(skb))
+ skb_clone_fraglist(skb);
+ skb_release_data(skb);
+ } else {
+ /* we can reuse existing recount- all we did was
+ * relocate values
+ */
+ skb_free_head(skb);
+ }
+
+ skb->head = data;
+ skb->data = data;
+ skb->head_frag = 0;
+#ifdef NET_SKBUFF_DATA_USES_OFFSET
+ skb->end = size;
+#else
+ skb->end = skb->head + size;
+#endif
+ skb_set_tail_pointer(skb, skb_headlen(skb));
+ skb_headers_offset_update(skb, 0);
+ skb->cloned = 0;
+ skb->hdr_len = 0;
+ skb->nohdr = 0;
+ atomic_set(&skb_shinfo(skb)->dataref, 1);
+
+ return 0;
+}
+
+static int pskb_carve(struct sk_buff *skb, const u32 off, gfp_t gfp);
+
+/* carve out the first eat bytes from skb's frag_list. May recurse into
+ * pskb_carve()
+ */
+static int pskb_carve_frag_list(struct sk_buff *skb,
+ struct skb_shared_info *shinfo, int eat,
+ gfp_t gfp_mask)
+{
+ struct sk_buff *list = shinfo->frag_list;
+ struct sk_buff *clone = NULL;
+ struct sk_buff *insp = NULL;
+
+ do {
+ if (!list) {
+ pr_err("Not enough bytes to eat. Want %d\n", eat);
+ return -EFAULT;
+ }
+ if (list->len <= eat) {
+ /* Eaten as whole. */
+ eat -= list->len;
+ list = list->next;
+ insp = list;
+ } else {
+ /* Eaten partially. */
+ if (skb_shared(list)) {
+ clone = skb_clone(list, gfp_mask);
+ if (!clone)
+ return -ENOMEM;
+ insp = list->next;
+ list = clone;
+ } else {
+ /* This may be pulled without problems. */
+ insp = list;
+ }
+ if (pskb_carve(list, eat, gfp_mask) < 0) {
+ kfree_skb(clone);
+ return -ENOMEM;
+ }
+ break;
+ }
+ } while (eat);
+
+ /* Free pulled out fragments. */
+ while ((list = shinfo->frag_list) != insp) {
+ shinfo->frag_list = list->next;
+ kfree_skb(list);
+ }
+ /* And insert new clone at head. */
+ if (clone) {
+ clone->next = list;
+ shinfo->frag_list = clone;
+ }
+ return 0;
+}
+
+/* carve off first len bytes from skb. Split line (off) is in the
+ * non-linear part of skb
+ */
+static int pskb_carve_inside_nonlinear(struct sk_buff *skb, const u32 off,
+ int pos, gfp_t gfp_mask)
+{
+ int i, k = 0;
+ int size = skb_end_offset(skb);
+ u8 *data;
+ const int nfrags = skb_shinfo(skb)->nr_frags;
+ struct skb_shared_info *shinfo;
+
+ size = SKB_DATA_ALIGN(size);
+
+ if (skb_pfmemalloc(skb))
+ gfp_mask |= __GFP_MEMALLOC;
+ data = kmalloc_reserve(size +
+ SKB_DATA_ALIGN(sizeof(struct skb_shared_info)),
+ gfp_mask, NUMA_NO_NODE, NULL);
+ if (!data)
+ return -ENOMEM;
+
+ size = SKB_WITH_OVERHEAD(ksize(data));
+
+ memcpy((struct skb_shared_info *)(data + size),
+ skb_shinfo(skb), offsetof(struct skb_shared_info,
+ frags[skb_shinfo(skb)->nr_frags]));
+ if (skb_orphan_frags(skb, gfp_mask)) {
+ kfree(data);
+ return -ENOMEM;
+ }
+ shinfo = (struct skb_shared_info *)(data + size);
+ for (i = 0; i < nfrags; i++) {
+ int fsize = skb_frag_size(&skb_shinfo(skb)->frags[i]);
+
+ if (pos + fsize > off) {
+ shinfo->frags[k] = skb_shinfo(skb)->frags[i];
+
+ if (pos < off) {
+ /* Split frag.
+ * We have two variants in this case:
+ * 1. Move all the frag to the second
+ * part, if it is possible. F.e.
+ * this approach is mandatory for TUX,
+ * where splitting is expensive.
+ * 2. Split is accurately. We make this.
+ */
+ shinfo->frags[0].page_offset += off - pos;
+ skb_frag_size_sub(&shinfo->frags[0], off - pos);
+ }
+ skb_frag_ref(skb, i);
+ k++;
+ }
+ pos += fsize;
+ }
+ shinfo->nr_frags = k;
+ if (skb_has_frag_list(skb))
+ skb_clone_fraglist(skb);
+
+ if (k == 0) {
+ /* split line is in frag list */
+ pskb_carve_frag_list(skb, shinfo, off - pos, gfp_mask);
+ }
+ skb_release_data(skb);
+
+ skb->head = data;
+ skb->head_frag = 0;
+ skb->data = data;
+#ifdef NET_SKBUFF_DATA_USES_OFFSET
+ skb->end = size;
+#else
+ skb->end = skb->head + size;
+#endif
+ skb_reset_tail_pointer(skb);
+ skb_headers_offset_update(skb, 0);
+ skb->cloned = 0;
+ skb->hdr_len = 0;
+ skb->nohdr = 0;
+ skb->len -= off;
+ skb->data_len = skb->len;
+ atomic_set(&skb_shinfo(skb)->dataref, 1);
+ return 0;
+}
+
+/* remove len bytes from the beginning of the skb */
+static int pskb_carve(struct sk_buff *skb, const u32 len, gfp_t gfp)
+{
+ int headlen = skb_headlen(skb);
+
+ if (len < headlen)
+ return pskb_carve_inside_header(skb, len, headlen, gfp);
+ else
+ return pskb_carve_inside_nonlinear(skb, len, headlen, gfp);
+}
+
+/* Extract to_copy bytes starting at off from skb, and return this in
+ * a new skb
+ */
+struct sk_buff *pskb_extract(struct sk_buff *skb, int off,
+ int to_copy, gfp_t gfp)
+{
+ struct sk_buff *clone = skb_clone(skb, gfp);
+
+ if (!clone)
+ return NULL;
+
+ if (pskb_carve(clone, off, gfp) < 0 ||
+ pskb_trim(clone, to_copy)) {
+ kfree_skb(clone);
+ return NULL;
+ }
+ return clone;
+}
+EXPORT_SYMBOL(pskb_extract);
diff --git a/net/core/sock.c b/net/core/sock.c
index 7e73c26b6..25dab8b60 100644
--- a/net/core/sock.c
+++ b/net/core/sock.c
@@ -405,9 +405,8 @@ static void sock_disable_timestamp(struct sock *sk, unsigned long flags)
}
-int sock_queue_rcv_skb(struct sock *sk, struct sk_buff *skb)
+int __sock_queue_rcv_skb(struct sock *sk, struct sk_buff *skb)
{
- int err;
unsigned long flags;
struct sk_buff_head *list = &sk->sk_receive_queue;
@@ -417,10 +416,6 @@ int sock_queue_rcv_skb(struct sock *sk, struct sk_buff *skb)
return -ENOMEM;
}
- err = sk_filter(sk, skb);
- if (err)
- return err;
-
if (!sk_rmem_schedule(sk, skb, skb->truesize)) {
atomic_inc(&sk->sk_drops);
return -ENOBUFS;
@@ -443,13 +438,26 @@ int sock_queue_rcv_skb(struct sock *sk, struct sk_buff *skb)
sk->sk_data_ready(sk);
return 0;
}
+EXPORT_SYMBOL(__sock_queue_rcv_skb);
+
+int sock_queue_rcv_skb(struct sock *sk, struct sk_buff *skb)
+{
+ int err;
+
+ err = sk_filter(sk, skb);
+ if (err)
+ return err;
+
+ return __sock_queue_rcv_skb(sk, skb);
+}
EXPORT_SYMBOL(sock_queue_rcv_skb);
-int sk_receive_skb(struct sock *sk, struct sk_buff *skb, const int nested)
+int __sk_receive_skb(struct sock *sk, struct sk_buff *skb,
+ const int nested, unsigned int trim_cap)
{
int rc = NET_RX_SUCCESS;
- if (sk_filter(sk, skb))
+ if (sk_filter_trim_cap(sk, skb, trim_cap))
goto discard_and_relse;
skb->dev = NULL;
@@ -485,7 +493,7 @@ discard_and_relse:
kfree_skb(skb);
goto out;
}
-EXPORT_SYMBOL(sk_receive_skb);
+EXPORT_SYMBOL(__sk_receive_skb);
struct dst_entry *__sk_dst_check(struct sock *sk, u32 cookie)
{
@@ -835,7 +843,8 @@ set_rcvbuf:
!(sk->sk_tsflags & SOF_TIMESTAMPING_OPT_ID)) {
if (sk->sk_protocol == IPPROTO_TCP &&
sk->sk_type == SOCK_STREAM) {
- if (sk->sk_state != TCP_ESTABLISHED) {
+ if ((1 << sk->sk_state) &
+ (TCPF_CLOSE | TCPF_LISTEN)) {
ret = -EINVAL;
break;
}
@@ -1421,8 +1430,12 @@ struct sock *sk_alloc(struct net *net, int family, gfp_t priority,
}
EXPORT_SYMBOL(sk_alloc);
-void sk_destruct(struct sock *sk)
+/* Sockets having SOCK_RCU_FREE will call this function after one RCU
+ * grace period. This is the case for UDP sockets and TCP listeners.
+ */
+static void __sk_destruct(struct rcu_head *head)
{
+ struct sock *sk = container_of(head, struct sock, sk_rcu);
struct sk_filter *filter;
if (sk->sk_destruct)
@@ -1451,6 +1464,14 @@ void sk_destruct(struct sock *sk)
sk_prot_free(sk->sk_prot_creator, sk);
}
+void sk_destruct(struct sock *sk)
+{
+ if (sock_flag(sk, SOCK_RCU_FREE))
+ call_rcu(&sk->sk_rcu, __sk_destruct);
+ else
+ __sk_destruct(&sk->sk_rcu);
+}
+
static void __sk_free(struct sock *sk)
{
if (unlikely(sock_diag_has_destroy_listeners(sk) && sk->sk_net_refcnt))
@@ -1515,6 +1536,7 @@ struct sock *sk_clone_lock(const struct sock *sk, const gfp_t priority)
newsk->sk_dst_cache = NULL;
newsk->sk_wmem_queued = 0;
newsk->sk_forward_alloc = 0;
+ atomic_set(&newsk->sk_drops, 0);
newsk->sk_send_head = NULL;
newsk->sk_userlocks = sk->sk_userlocks & ~SOCK_BINDPORT_LOCK;
@@ -1634,6 +1656,17 @@ void sock_wfree(struct sk_buff *skb)
}
EXPORT_SYMBOL(sock_wfree);
+/* This variant of sock_wfree() is used by TCP,
+ * since it sets SOCK_USE_WRITE_QUEUE.
+ */
+void __sock_wfree(struct sk_buff *skb)
+{
+ struct sock *sk = skb->sk;
+
+ if (atomic_sub_and_test(skb->truesize, &sk->sk_wmem_alloc))
+ __sk_free(sk);
+}
+
void skb_set_owner_w(struct sk_buff *skb, struct sock *sk)
{
skb_orphan(skb);
@@ -1656,8 +1689,21 @@ void skb_set_owner_w(struct sk_buff *skb, struct sock *sk)
}
EXPORT_SYMBOL(skb_set_owner_w);
+/* This helper is used by netem, as it can hold packets in its
+ * delay queue. We want to allow the owner socket to send more
+ * packets, as if they were already TX completed by a typical driver.
+ * But we also want to keep skb->sk set because some packet schedulers
+ * rely on it (sch_fq for example). So we set skb->truesize to a small
+ * amount (1) and decrease sk_wmem_alloc accordingly.
+ */
void skb_orphan_partial(struct sk_buff *skb)
{
+ /* If this skb is a TCP pure ACK or already went here,
+ * we have nothing to do. 2 is already a very small truesize.
+ */
+ if (skb->truesize <= 2)
+ return;
+
/* TCP stack sets skb->ooo_okay based on sk_wmem_alloc,
* so we do not completely orphan skb, but transfert all
* accounted bytes but one, to avoid unexpected reorders.
@@ -1869,27 +1915,55 @@ struct sk_buff *sock_alloc_send_skb(struct sock *sk, unsigned long size,
}
EXPORT_SYMBOL(sock_alloc_send_skb);
+int __sock_cmsg_send(struct sock *sk, struct msghdr *msg, struct cmsghdr *cmsg,
+ struct sockcm_cookie *sockc)
+{
+ u32 tsflags;
+
+ switch (cmsg->cmsg_type) {
+ case SO_MARK:
+ if (!ns_capable(sock_net(sk)->user_ns, CAP_NET_ADMIN))
+ return -EPERM;
+ if (cmsg->cmsg_len != CMSG_LEN(sizeof(u32)))
+ return -EINVAL;
+ sockc->mark = *(u32 *)CMSG_DATA(cmsg);
+ break;
+ case SO_TIMESTAMPING:
+ if (cmsg->cmsg_len != CMSG_LEN(sizeof(u32)))
+ return -EINVAL;
+
+ tsflags = *(u32 *)CMSG_DATA(cmsg);
+ if (tsflags & ~SOF_TIMESTAMPING_TX_RECORD_MASK)
+ return -EINVAL;
+
+ sockc->tsflags &= ~SOF_TIMESTAMPING_TX_RECORD_MASK;
+ sockc->tsflags |= tsflags;
+ break;
+ /* SCM_RIGHTS and SCM_CREDENTIALS are semantically in SOL_UNIX. */
+ case SCM_RIGHTS:
+ case SCM_CREDENTIALS:
+ break;
+ default:
+ return -EINVAL;
+ }
+ return 0;
+}
+EXPORT_SYMBOL(__sock_cmsg_send);
+
int sock_cmsg_send(struct sock *sk, struct msghdr *msg,
struct sockcm_cookie *sockc)
{
struct cmsghdr *cmsg;
+ int ret;
for_each_cmsghdr(cmsg, msg) {
if (!CMSG_OK(msg, cmsg))
return -EINVAL;
if (cmsg->cmsg_level != SOL_SOCKET)
continue;
- switch (cmsg->cmsg_type) {
- case SO_MARK:
- if (!ns_capable(sock_net(sk)->user_ns, CAP_NET_ADMIN))
- return -EPERM;
- if (cmsg->cmsg_len != CMSG_LEN(sizeof(u32)))
- return -EINVAL;
- sockc->mark = *(u32 *)CMSG_DATA(cmsg);
- break;
- default:
- return -EINVAL;
- }
+ ret = __sock_cmsg_send(sk, msg, cmsg, sockc);
+ if (ret)
+ return ret;
}
return 0;
}
@@ -1974,33 +2048,27 @@ static void __release_sock(struct sock *sk)
__releases(&sk->sk_lock.slock)
__acquires(&sk->sk_lock.slock)
{
- struct sk_buff *skb = sk->sk_backlog.head;
+ struct sk_buff *skb, *next;
- do {
+ while ((skb = sk->sk_backlog.head) != NULL) {
sk->sk_backlog.head = sk->sk_backlog.tail = NULL;
- bh_unlock_sock(sk);
- do {
- struct sk_buff *next = skb->next;
+ spin_unlock_bh(&sk->sk_lock.slock);
+ do {
+ next = skb->next;
prefetch(next);
WARN_ON_ONCE(skb_dst_is_noref(skb));
skb->next = NULL;
sk_backlog_rcv(sk, skb);
- /*
- * We are in process context here with softirqs
- * disabled, use cond_resched_softirq() to preempt.
- * This is safe to do because we've taken the backlog
- * queue private:
- */
- cond_resched_softirq();
+ cond_resched();
skb = next;
} while (skb != NULL);
- bh_lock_sock(sk);
- } while ((skb = sk->sk_backlog.head) != NULL);
+ spin_lock_bh(&sk->sk_lock.slock);
+ }
/*
* Doing the zeroing here guarantee we can not loop forever
@@ -2009,6 +2077,13 @@ static void __release_sock(struct sock *sk)
sk->sk_backlog.len = 0;
}
+void __sk_flush_backlog(struct sock *sk)
+{
+ spin_lock_bh(&sk->sk_lock.slock);
+ __release_sock(sk);
+ spin_unlock_bh(&sk->sk_lock.slock);
+}
+
/**
* sk_wait_data - wait for data to arrive at sk_receive_queue
* @sk: sock to wait on
@@ -2145,6 +2220,15 @@ void __sk_mem_reclaim(struct sock *sk, int amount)
}
EXPORT_SYMBOL(__sk_mem_reclaim);
+int sk_set_peek_off(struct sock *sk, int val)
+{
+ if (val < 0)
+ return -EINVAL;
+
+ sk->sk_peek_off = val;
+ return 0;
+}
+EXPORT_SYMBOL_GPL(sk_set_peek_off);
/*
* Set of default routines for initialising struct proto_ops when
@@ -2432,11 +2516,6 @@ EXPORT_SYMBOL(lock_sock_nested);
void release_sock(struct sock *sk)
{
- /*
- * The sk_lock has mutex_unlock() semantics:
- */
- mutex_release(&sk->sk_lock.dep_map, 1, _RET_IP_);
-
spin_lock_bh(&sk->sk_lock.slock);
if (sk->sk_backlog.tail)
__release_sock(sk);
diff --git a/net/core/sock_diag.c b/net/core/sock_diag.c
index a996ce8c8..6b10573cc 100644
--- a/net/core/sock_diag.c
+++ b/net/core/sock_diag.c
@@ -67,6 +67,7 @@ int sock_diag_put_meminfo(struct sock *sk, struct sk_buff *skb, int attrtype)
mem[SK_MEMINFO_WMEM_QUEUED] = sk->sk_wmem_queued;
mem[SK_MEMINFO_OPTMEM] = atomic_read(&sk->sk_omem_alloc);
mem[SK_MEMINFO_BACKLOG] = sk->sk_backlog.len;
+ mem[SK_MEMINFO_DROPS] = atomic_read(&sk->sk_drops);
return nla_put(skb, attrtype, sizeof(mem), &mem);
}
@@ -119,7 +120,7 @@ static size_t sock_diag_nlmsg_size(void)
{
return NLMSG_ALIGN(sizeof(struct inet_diag_msg)
+ nla_total_size(sizeof(u8)) /* INET_DIAG_PROTOCOL */
- + nla_total_size(sizeof(struct tcp_info))); /* INET_DIAG_INFO */
+ + nla_total_size_64bit(sizeof(struct tcp_info))); /* INET_DIAG_INFO */
}
static void sock_diag_broadcast_destroy_work(struct work_struct *work)
diff --git a/net/core/sysctl_net_core.c b/net/core/sysctl_net_core.c
index a6beb7b6a..0df2aa652 100644
--- a/net/core/sysctl_net_core.c
+++ b/net/core/sysctl_net_core.c
@@ -294,6 +294,15 @@ static struct ctl_table net_core_table[] = {
.mode = 0644,
.proc_handler = proc_dointvec
},
+# ifdef CONFIG_HAVE_EBPF_JIT
+ {
+ .procname = "bpf_jit_harden",
+ .data = &bpf_jit_harden,
+ .maxlen = sizeof(int),
+ .mode = 0600,
+ .proc_handler = proc_dointvec,
+ },
+# endif
#endif
{
.procname = "netdev_tstamp_prequeue",