summaryrefslogtreecommitdiff
path: root/drivers/net/ethernet/mellanox/mlx4
diff options
context:
space:
mode:
Diffstat (limited to 'drivers/net/ethernet/mellanox/mlx4')
-rw-r--r--drivers/net/ethernet/mellanox/mlx4/Kconfig7
-rw-r--r--drivers/net/ethernet/mellanox/mlx4/en_dcb_nl.c280
-rw-r--r--drivers/net/ethernet/mellanox/mlx4/en_ethtool.c34
-rw-r--r--drivers/net/ethernet/mellanox/mlx4/en_netdev.c190
-rw-r--r--drivers/net/ethernet/mellanox/mlx4/en_rx.c126
-rw-r--r--drivers/net/ethernet/mellanox/mlx4/en_tx.c282
-rw-r--r--drivers/net/ethernet/mellanox/mlx4/eq.c4
-rw-r--r--drivers/net/ethernet/mellanox/mlx4/fw.c41
-rw-r--r--drivers/net/ethernet/mellanox/mlx4/fw.h1
-rw-r--r--drivers/net/ethernet/mellanox/mlx4/intf.c5
-rw-r--r--drivers/net/ethernet/mellanox/mlx4/main.c6
-rw-r--r--drivers/net/ethernet/mellanox/mlx4/mcg.c8
-rw-r--r--drivers/net/ethernet/mellanox/mlx4/mlx4_en.h63
-rw-r--r--drivers/net/ethernet/mellanox/mlx4/mr.c2
-rw-r--r--drivers/net/ethernet/mellanox/mlx4/pd.c4
-rw-r--r--drivers/net/ethernet/mellanox/mlx4/port.c12
-rw-r--r--drivers/net/ethernet/mellanox/mlx4/resource_tracker.c22
17 files changed, 924 insertions, 163 deletions
diff --git a/drivers/net/ethernet/mellanox/mlx4/Kconfig b/drivers/net/ethernet/mellanox/mlx4/Kconfig
index 9ca3734eb..5098e7f21 100644
--- a/drivers/net/ethernet/mellanox/mlx4/Kconfig
+++ b/drivers/net/ethernet/mellanox/mlx4/Kconfig
@@ -24,13 +24,6 @@ config MLX4_EN_DCB
If unsure, set to Y
-config MLX4_EN_VXLAN
- bool "VXLAN offloads Support"
- default y
- depends on MLX4_EN && VXLAN && !(MLX4_EN=y && VXLAN=m)
- ---help---
- Say Y here if you want to use VXLAN offloads in the driver.
-
config MLX4_CORE
tristate
depends on PCI
diff --git a/drivers/net/ethernet/mellanox/mlx4/en_dcb_nl.c b/drivers/net/ethernet/mellanox/mlx4/en_dcb_nl.c
index f01918c63..b04760a50 100644
--- a/drivers/net/ethernet/mellanox/mlx4/en_dcb_nl.c
+++ b/drivers/net/ethernet/mellanox/mlx4/en_dcb_nl.c
@@ -37,6 +37,11 @@
#include "mlx4_en.h"
#include "fw_qos.h"
+enum {
+ MLX4_CEE_STATE_DOWN = 0,
+ MLX4_CEE_STATE_UP = 1,
+};
+
/* Definitions for QCN
*/
@@ -80,13 +85,205 @@ struct mlx4_congestion_control_mb_prio_802_1_qau_statistics {
__be32 reserved3[4];
};
+static u8 mlx4_en_dcbnl_getcap(struct net_device *dev, int capid, u8 *cap)
+{
+ struct mlx4_en_priv *priv = netdev_priv(dev);
+
+ switch (capid) {
+ case DCB_CAP_ATTR_PFC:
+ *cap = true;
+ break;
+ case DCB_CAP_ATTR_DCBX:
+ *cap = priv->dcbx_cap;
+ break;
+ case DCB_CAP_ATTR_PFC_TCS:
+ *cap = 1 << mlx4_max_tc(priv->mdev->dev);
+ break;
+ default:
+ *cap = false;
+ break;
+ }
+
+ return 0;
+}
+
+static u8 mlx4_en_dcbnl_getpfcstate(struct net_device *netdev)
+{
+ struct mlx4_en_priv *priv = netdev_priv(netdev);
+
+ return priv->cee_config.pfc_state;
+}
+
+static void mlx4_en_dcbnl_setpfcstate(struct net_device *netdev, u8 state)
+{
+ struct mlx4_en_priv *priv = netdev_priv(netdev);
+
+ priv->cee_config.pfc_state = state;
+}
+
+static void mlx4_en_dcbnl_get_pfc_cfg(struct net_device *netdev, int priority,
+ u8 *setting)
+{
+ struct mlx4_en_priv *priv = netdev_priv(netdev);
+
+ *setting = priv->cee_config.dcb_pfc[priority];
+}
+
+static void mlx4_en_dcbnl_set_pfc_cfg(struct net_device *netdev, int priority,
+ u8 setting)
+{
+ struct mlx4_en_priv *priv = netdev_priv(netdev);
+
+ priv->cee_config.dcb_pfc[priority] = setting;
+ priv->cee_config.pfc_state = true;
+}
+
+static int mlx4_en_dcbnl_getnumtcs(struct net_device *netdev, int tcid, u8 *num)
+{
+ struct mlx4_en_priv *priv = netdev_priv(netdev);
+
+ if (!(priv->flags & MLX4_EN_FLAG_DCB_ENABLED))
+ return -EINVAL;
+
+ if (tcid == DCB_NUMTCS_ATTR_PFC)
+ *num = mlx4_max_tc(priv->mdev->dev);
+ else
+ *num = 0;
+
+ return 0;
+}
+
+static u8 mlx4_en_dcbnl_set_all(struct net_device *netdev)
+{
+ struct mlx4_en_priv *priv = netdev_priv(netdev);
+ struct mlx4_en_dev *mdev = priv->mdev;
+
+ if (!(priv->dcbx_cap & DCB_CAP_DCBX_VER_CEE))
+ return 1;
+
+ if (priv->cee_config.pfc_state) {
+ int tc;
+
+ priv->prof->rx_pause = 0;
+ priv->prof->tx_pause = 0;
+ for (tc = 0; tc < CEE_DCBX_MAX_PRIO; tc++) {
+ u8 tc_mask = 1 << tc;
+
+ switch (priv->cee_config.dcb_pfc[tc]) {
+ case pfc_disabled:
+ priv->prof->tx_ppp &= ~tc_mask;
+ priv->prof->rx_ppp &= ~tc_mask;
+ break;
+ case pfc_enabled_full:
+ priv->prof->tx_ppp |= tc_mask;
+ priv->prof->rx_ppp |= tc_mask;
+ break;
+ case pfc_enabled_tx:
+ priv->prof->tx_ppp |= tc_mask;
+ priv->prof->rx_ppp &= ~tc_mask;
+ break;
+ case pfc_enabled_rx:
+ priv->prof->tx_ppp &= ~tc_mask;
+ priv->prof->rx_ppp |= tc_mask;
+ break;
+ default:
+ break;
+ }
+ }
+ en_dbg(DRV, priv, "Set pfc on\n");
+ } else {
+ priv->prof->rx_pause = 1;
+ priv->prof->tx_pause = 1;
+ en_dbg(DRV, priv, "Set pfc off\n");
+ }
+
+ if (mlx4_SET_PORT_general(mdev->dev, priv->port,
+ priv->rx_skb_size + ETH_FCS_LEN,
+ priv->prof->tx_pause,
+ priv->prof->tx_ppp,
+ priv->prof->rx_pause,
+ priv->prof->rx_ppp)) {
+ en_err(priv, "Failed setting pause params\n");
+ return 1;
+ }
+
+ return 0;
+}
+
+static u8 mlx4_en_dcbnl_get_state(struct net_device *dev)
+{
+ struct mlx4_en_priv *priv = netdev_priv(dev);
+
+ if (priv->flags & MLX4_EN_FLAG_DCB_ENABLED)
+ return MLX4_CEE_STATE_UP;
+
+ return MLX4_CEE_STATE_DOWN;
+}
+
+static u8 mlx4_en_dcbnl_set_state(struct net_device *dev, u8 state)
+{
+ struct mlx4_en_priv *priv = netdev_priv(dev);
+ int num_tcs = 0;
+
+ if (!(priv->dcbx_cap & DCB_CAP_DCBX_VER_CEE))
+ return 1;
+
+ if (!!(state) == !!(priv->flags & MLX4_EN_FLAG_DCB_ENABLED))
+ return 0;
+
+ if (state) {
+ priv->flags |= MLX4_EN_FLAG_DCB_ENABLED;
+ num_tcs = IEEE_8021QAZ_MAX_TCS;
+ } else {
+ priv->flags &= ~MLX4_EN_FLAG_DCB_ENABLED;
+ }
+
+ if (mlx4_en_setup_tc(dev, num_tcs))
+ return 1;
+
+ return 0;
+}
+
+/* On success returns a non-zero 802.1p user priority bitmap
+ * otherwise returns 0 as the invalid user priority bitmap to
+ * indicate an error.
+ */
+static int mlx4_en_dcbnl_getapp(struct net_device *netdev, u8 idtype, u16 id)
+{
+ struct mlx4_en_priv *priv = netdev_priv(netdev);
+ struct dcb_app app = {
+ .selector = idtype,
+ .protocol = id,
+ };
+ if (!(priv->dcbx_cap & DCB_CAP_DCBX_VER_CEE))
+ return 0;
+
+ return dcb_getapp(netdev, &app);
+}
+
+static int mlx4_en_dcbnl_setapp(struct net_device *netdev, u8 idtype,
+ u16 id, u8 up)
+{
+ struct mlx4_en_priv *priv = netdev_priv(netdev);
+ struct dcb_app app;
+
+ if (!(priv->dcbx_cap & DCB_CAP_DCBX_VER_CEE))
+ return -EINVAL;
+
+ memset(&app, 0, sizeof(struct dcb_app));
+ app.selector = idtype;
+ app.protocol = id;
+ app.priority = up;
+
+ return dcb_setapp(netdev, &app);
+}
+
static int mlx4_en_dcbnl_ieee_getets(struct net_device *dev,
struct ieee_ets *ets)
{
struct mlx4_en_priv *priv = netdev_priv(dev);
struct ieee_ets *my_ets = &priv->ets;
- /* No IEEE PFC settings available */
if (!my_ets)
return -EINVAL;
@@ -237,18 +434,51 @@ static int mlx4_en_dcbnl_ieee_setpfc(struct net_device *dev,
static u8 mlx4_en_dcbnl_getdcbx(struct net_device *dev)
{
- return DCB_CAP_DCBX_HOST | DCB_CAP_DCBX_VER_IEEE;
+ struct mlx4_en_priv *priv = netdev_priv(dev);
+
+ return priv->dcbx_cap;
}
static u8 mlx4_en_dcbnl_setdcbx(struct net_device *dev, u8 mode)
{
+ struct mlx4_en_priv *priv = netdev_priv(dev);
+ struct ieee_ets ets = {0};
+ struct ieee_pfc pfc = {0};
+
+ if (mode == priv->dcbx_cap)
+ return 0;
+
if ((mode & DCB_CAP_DCBX_LLD_MANAGED) ||
- (mode & DCB_CAP_DCBX_VER_CEE) ||
- !(mode & DCB_CAP_DCBX_VER_IEEE) ||
+ ((mode & DCB_CAP_DCBX_VER_IEEE) &&
+ (mode & DCB_CAP_DCBX_VER_CEE)) ||
!(mode & DCB_CAP_DCBX_HOST))
- return 1;
+ goto err;
+
+ priv->dcbx_cap = mode;
+
+ ets.ets_cap = IEEE_8021QAZ_MAX_TCS;
+ pfc.pfc_cap = IEEE_8021QAZ_MAX_TCS;
+
+ if (mode & DCB_CAP_DCBX_VER_IEEE) {
+ if (mlx4_en_dcbnl_ieee_setets(dev, &ets))
+ goto err;
+ if (mlx4_en_dcbnl_ieee_setpfc(dev, &pfc))
+ goto err;
+ } else if (mode & DCB_CAP_DCBX_VER_CEE) {
+ if (mlx4_en_dcbnl_set_all(dev))
+ goto err;
+ } else {
+ if (mlx4_en_dcbnl_ieee_setets(dev, &ets))
+ goto err;
+ if (mlx4_en_dcbnl_ieee_setpfc(dev, &pfc))
+ goto err;
+ if (mlx4_en_setup_tc(dev, 0))
+ goto err;
+ }
return 0;
+err:
+ return 1;
}
#define MLX4_RATELIMIT_UNITS_IN_KB 100000 /* rate-limit HW unit in Kbps */
@@ -463,24 +693,46 @@ static int mlx4_en_dcbnl_ieee_getqcnstats(struct net_device *dev,
}
const struct dcbnl_rtnl_ops mlx4_en_dcbnl_ops = {
- .ieee_getets = mlx4_en_dcbnl_ieee_getets,
- .ieee_setets = mlx4_en_dcbnl_ieee_setets,
- .ieee_getmaxrate = mlx4_en_dcbnl_ieee_getmaxrate,
- .ieee_setmaxrate = mlx4_en_dcbnl_ieee_setmaxrate,
- .ieee_getpfc = mlx4_en_dcbnl_ieee_getpfc,
- .ieee_setpfc = mlx4_en_dcbnl_ieee_setpfc,
+ .ieee_getets = mlx4_en_dcbnl_ieee_getets,
+ .ieee_setets = mlx4_en_dcbnl_ieee_setets,
+ .ieee_getmaxrate = mlx4_en_dcbnl_ieee_getmaxrate,
+ .ieee_setmaxrate = mlx4_en_dcbnl_ieee_setmaxrate,
+ .ieee_getqcn = mlx4_en_dcbnl_ieee_getqcn,
+ .ieee_setqcn = mlx4_en_dcbnl_ieee_setqcn,
+ .ieee_getqcnstats = mlx4_en_dcbnl_ieee_getqcnstats,
+ .ieee_getpfc = mlx4_en_dcbnl_ieee_getpfc,
+ .ieee_setpfc = mlx4_en_dcbnl_ieee_setpfc,
+
+ .getstate = mlx4_en_dcbnl_get_state,
+ .setstate = mlx4_en_dcbnl_set_state,
+ .getpfccfg = mlx4_en_dcbnl_get_pfc_cfg,
+ .setpfccfg = mlx4_en_dcbnl_set_pfc_cfg,
+ .setall = mlx4_en_dcbnl_set_all,
+ .getcap = mlx4_en_dcbnl_getcap,
+ .getnumtcs = mlx4_en_dcbnl_getnumtcs,
+ .getpfcstate = mlx4_en_dcbnl_getpfcstate,
+ .setpfcstate = mlx4_en_dcbnl_setpfcstate,
+ .getapp = mlx4_en_dcbnl_getapp,
+ .setapp = mlx4_en_dcbnl_setapp,
.getdcbx = mlx4_en_dcbnl_getdcbx,
.setdcbx = mlx4_en_dcbnl_setdcbx,
- .ieee_getqcn = mlx4_en_dcbnl_ieee_getqcn,
- .ieee_setqcn = mlx4_en_dcbnl_ieee_setqcn,
- .ieee_getqcnstats = mlx4_en_dcbnl_ieee_getqcnstats,
};
const struct dcbnl_rtnl_ops mlx4_en_dcbnl_pfc_ops = {
.ieee_getpfc = mlx4_en_dcbnl_ieee_getpfc,
.ieee_setpfc = mlx4_en_dcbnl_ieee_setpfc,
+ .setstate = mlx4_en_dcbnl_set_state,
+ .getpfccfg = mlx4_en_dcbnl_get_pfc_cfg,
+ .setpfccfg = mlx4_en_dcbnl_set_pfc_cfg,
+ .setall = mlx4_en_dcbnl_set_all,
+ .getnumtcs = mlx4_en_dcbnl_getnumtcs,
+ .getpfcstate = mlx4_en_dcbnl_getpfcstate,
+ .setpfcstate = mlx4_en_dcbnl_setpfcstate,
+ .getapp = mlx4_en_dcbnl_getapp,
+ .setapp = mlx4_en_dcbnl_setapp,
+
.getdcbx = mlx4_en_dcbnl_getdcbx,
.setdcbx = mlx4_en_dcbnl_setdcbx,
};
diff --git a/drivers/net/ethernet/mellanox/mlx4/en_ethtool.c b/drivers/net/ethernet/mellanox/mlx4/en_ethtool.c
index 44cf16d01..bdda17d2e 100644
--- a/drivers/net/ethernet/mellanox/mlx4/en_ethtool.c
+++ b/drivers/net/ethernet/mellanox/mlx4/en_ethtool.c
@@ -1112,7 +1112,7 @@ static u32 mlx4_en_get_rxfh_indir_size(struct net_device *dev)
{
struct mlx4_en_priv *priv = netdev_priv(dev);
- return priv->rx_ring_num;
+ return rounddown_pow_of_two(priv->rx_ring_num);
}
static u32 mlx4_en_get_rxfh_key_size(struct net_device *netdev)
@@ -1146,19 +1146,17 @@ static int mlx4_en_get_rxfh(struct net_device *dev, u32 *ring_index, u8 *key,
u8 *hfunc)
{
struct mlx4_en_priv *priv = netdev_priv(dev);
- struct mlx4_en_rss_map *rss_map = &priv->rss_map;
- int rss_rings;
- size_t n = priv->rx_ring_num;
+ u32 n = mlx4_en_get_rxfh_indir_size(dev);
+ u32 i, rss_rings;
int err = 0;
- rss_rings = priv->prof->rss_rings ?: priv->rx_ring_num;
- rss_rings = 1 << ilog2(rss_rings);
+ rss_rings = priv->prof->rss_rings ?: n;
+ rss_rings = rounddown_pow_of_two(rss_rings);
- while (n--) {
+ for (i = 0; i < n; i++) {
if (!ring_index)
break;
- ring_index[n] = rss_map->qps[n % rss_rings].qpn -
- rss_map->base_qpn;
+ ring_index[i] = i % rss_rings;
}
if (key)
memcpy(key, priv->rss_key, MLX4_EN_RSS_KEY_SIZE);
@@ -1171,6 +1169,7 @@ static int mlx4_en_set_rxfh(struct net_device *dev, const u32 *ring_index,
const u8 *key, const u8 hfunc)
{
struct mlx4_en_priv *priv = netdev_priv(dev);
+ u32 n = mlx4_en_get_rxfh_indir_size(dev);
struct mlx4_en_dev *mdev = priv->mdev;
int port_up = 0;
int err = 0;
@@ -1180,18 +1179,18 @@ static int mlx4_en_set_rxfh(struct net_device *dev, const u32 *ring_index,
/* Calculate RSS table size and make sure flows are spread evenly
* between rings
*/
- for (i = 0; i < priv->rx_ring_num; i++) {
+ for (i = 0; i < n; i++) {
if (!ring_index)
- continue;
+ break;
if (i > 0 && !ring_index[i] && !rss_rings)
rss_rings = i;
- if (ring_index[i] != (i % (rss_rings ?: priv->rx_ring_num)))
+ if (ring_index[i] != (i % (rss_rings ?: n)))
return -EINVAL;
}
if (!rss_rings)
- rss_rings = priv->rx_ring_num;
+ rss_rings = n;
/* RSS table size must be an order of 2 */
if (!is_power_of_2(rss_rings))
@@ -1730,6 +1729,12 @@ static int mlx4_en_set_channels(struct net_device *dev,
!channel->tx_count || !channel->rx_count)
return -EINVAL;
+ if (channel->tx_count * MLX4_EN_NUM_UP <= priv->xdp_ring_num) {
+ en_err(priv, "Minimum %d tx channels required with XDP on\n",
+ priv->xdp_ring_num / MLX4_EN_NUM_UP + 1);
+ return -EINVAL;
+ }
+
tmp = kzalloc(sizeof(*tmp), GFP_KERNEL);
if (!tmp)
return -ENOMEM;
@@ -1751,7 +1756,8 @@ static int mlx4_en_set_channels(struct net_device *dev,
mlx4_en_safe_replace_resources(priv, tmp);
- netif_set_real_num_tx_queues(dev, priv->tx_ring_num);
+ netif_set_real_num_tx_queues(dev, priv->tx_ring_num -
+ priv->xdp_ring_num);
netif_set_real_num_rx_queues(dev, priv->rx_ring_num);
if (dev->num_tc)
diff --git a/drivers/net/ethernet/mellanox/mlx4/en_netdev.c b/drivers/net/ethernet/mellanox/mlx4/en_netdev.c
index 8359e9e51..fedb82927 100644
--- a/drivers/net/ethernet/mellanox/mlx4/en_netdev.c
+++ b/drivers/net/ethernet/mellanox/mlx4/en_netdev.c
@@ -31,6 +31,7 @@
*
*/
+#include <linux/bpf.h>
#include <linux/etherdevice.h>
#include <linux/tcp.h>
#include <linux/if_vlan.h>
@@ -67,6 +68,18 @@ int mlx4_en_setup_tc(struct net_device *dev, u8 up)
offset += priv->num_tx_rings_p_up;
}
+#ifdef CONFIG_MLX4_EN_DCB
+ if (!mlx4_is_slave(priv->mdev->dev)) {
+ if (up) {
+ if (priv->dcbx_cap)
+ priv->flags |= MLX4_EN_FLAG_DCB_ENABLED;
+ } else {
+ priv->flags &= ~MLX4_EN_FLAG_DCB_ENABLED;
+ priv->cee_config.pfc_state = false;
+ }
+ }
+#endif /* CONFIG_MLX4_EN_DCB */
+
return 0;
}
@@ -1201,8 +1214,8 @@ static void mlx4_en_netpoll(struct net_device *dev)
struct mlx4_en_cq *cq;
int i;
- for (i = 0; i < priv->rx_ring_num; i++) {
- cq = priv->rx_cq[i];
+ for (i = 0; i < priv->tx_ring_num; i++) {
+ cq = priv->tx_cq[i];
napi_schedule(&cq->napi);
}
}
@@ -1510,6 +1523,24 @@ static void mlx4_en_free_affinity_hint(struct mlx4_en_priv *priv, int ring_idx)
free_cpumask_var(priv->rx_ring[ring_idx]->affinity_mask);
}
+static void mlx4_en_init_recycle_ring(struct mlx4_en_priv *priv,
+ int tx_ring_idx)
+{
+ struct mlx4_en_tx_ring *tx_ring = priv->tx_ring[tx_ring_idx];
+ int rr_index;
+
+ rr_index = (priv->xdp_ring_num - priv->tx_ring_num) + tx_ring_idx;
+ if (rr_index >= 0) {
+ tx_ring->free_tx_desc = mlx4_en_recycle_tx_desc;
+ tx_ring->recycle_ring = priv->rx_ring[rr_index];
+ en_dbg(DRV, priv,
+ "Set tx_ring[%d]->recycle_ring = rx_ring[%d]\n",
+ tx_ring_idx, rr_index);
+ } else {
+ tx_ring->recycle_ring = NULL;
+ }
+}
+
int mlx4_en_start_port(struct net_device *dev)
{
struct mlx4_en_priv *priv = netdev_priv(dev);
@@ -1632,6 +1663,8 @@ int mlx4_en_start_port(struct net_device *dev)
}
tx_ring->tx_queue = netdev_get_tx_queue(dev, i);
+ mlx4_en_init_recycle_ring(priv, i);
+
/* Arm CQ for TX completions */
mlx4_en_arm_cq(priv, cq);
@@ -1696,10 +1729,9 @@ int mlx4_en_start_port(struct net_device *dev)
/* Schedule multicast task to populate multicast list */
queue_work(mdev->workqueue, &priv->rx_mode_task);
-#ifdef CONFIG_MLX4_EN_VXLAN
if (priv->mdev->dev->caps.tunnel_offload_mode == MLX4_TUNNEL_OFFLOAD_MODE_VXLAN)
- vxlan_get_rx_port(dev);
-#endif
+ udp_tunnel_get_rx_info(dev);
+
priv->port_up = true;
netif_tx_start_all_queues(dev);
netif_device_attach(dev);
@@ -2177,6 +2209,11 @@ static int mlx4_en_change_mtu(struct net_device *dev, int new_mtu)
en_err(priv, "Bad MTU size:%d.\n", new_mtu);
return -EPERM;
}
+ if (priv->xdp_ring_num && MLX4_EN_EFF_MTU(new_mtu) > FRAG_SZ0) {
+ en_err(priv, "MTU size:%d requires frags but XDP running\n",
+ new_mtu);
+ return -EOPNOTSUPP;
+ }
dev->mtu = new_mtu;
if (netif_running(dev)) {
@@ -2434,7 +2471,6 @@ static int mlx4_en_get_phys_port_id(struct net_device *dev,
return 0;
}
-#ifdef CONFIG_MLX4_EN_VXLAN
static void mlx4_en_add_vxlan_offloads(struct work_struct *work)
{
int ret;
@@ -2484,15 +2520,19 @@ static void mlx4_en_del_vxlan_offloads(struct work_struct *work)
}
static void mlx4_en_add_vxlan_port(struct net_device *dev,
- sa_family_t sa_family, __be16 port)
+ struct udp_tunnel_info *ti)
{
struct mlx4_en_priv *priv = netdev_priv(dev);
+ __be16 port = ti->port;
__be16 current_port;
- if (priv->mdev->dev->caps.tunnel_offload_mode != MLX4_TUNNEL_OFFLOAD_MODE_VXLAN)
+ if (ti->type != UDP_TUNNEL_TYPE_VXLAN)
return;
- if (sa_family == AF_INET6)
+ if (ti->sa_family != AF_INET)
+ return;
+
+ if (priv->mdev->dev->caps.tunnel_offload_mode != MLX4_TUNNEL_OFFLOAD_MODE_VXLAN)
return;
current_port = priv->vxlan_port;
@@ -2507,15 +2547,19 @@ static void mlx4_en_add_vxlan_port(struct net_device *dev,
}
static void mlx4_en_del_vxlan_port(struct net_device *dev,
- sa_family_t sa_family, __be16 port)
+ struct udp_tunnel_info *ti)
{
struct mlx4_en_priv *priv = netdev_priv(dev);
+ __be16 port = ti->port;
__be16 current_port;
- if (priv->mdev->dev->caps.tunnel_offload_mode != MLX4_TUNNEL_OFFLOAD_MODE_VXLAN)
+ if (ti->type != UDP_TUNNEL_TYPE_VXLAN)
return;
- if (sa_family == AF_INET6)
+ if (ti->sa_family != AF_INET)
+ return;
+
+ if (priv->mdev->dev->caps.tunnel_offload_mode != MLX4_TUNNEL_OFFLOAD_MODE_VXLAN)
return;
current_port = priv->vxlan_port;
@@ -2550,7 +2594,6 @@ static netdev_features_t mlx4_en_features_check(struct sk_buff *skb,
return features;
}
-#endif
static int mlx4_en_set_tx_maxrate(struct net_device *dev, int queue_index, u32 maxrate)
{
@@ -2579,6 +2622,103 @@ static int mlx4_en_set_tx_maxrate(struct net_device *dev, int queue_index, u32 m
return err;
}
+static int mlx4_xdp_set(struct net_device *dev, struct bpf_prog *prog)
+{
+ struct mlx4_en_priv *priv = netdev_priv(dev);
+ struct mlx4_en_dev *mdev = priv->mdev;
+ struct bpf_prog *old_prog;
+ int xdp_ring_num;
+ int port_up = 0;
+ int err;
+ int i;
+
+ xdp_ring_num = prog ? ALIGN(priv->rx_ring_num, MLX4_EN_NUM_UP) : 0;
+
+ /* No need to reconfigure buffers when simply swapping the
+ * program for a new one.
+ */
+ if (priv->xdp_ring_num == xdp_ring_num) {
+ if (prog) {
+ prog = bpf_prog_add(prog, priv->rx_ring_num - 1);
+ if (IS_ERR(prog))
+ return PTR_ERR(prog);
+ }
+ for (i = 0; i < priv->rx_ring_num; i++) {
+ /* This xchg is paired with READ_ONCE in the fastpath */
+ old_prog = xchg(&priv->rx_ring[i]->xdp_prog, prog);
+ if (old_prog)
+ bpf_prog_put(old_prog);
+ }
+ return 0;
+ }
+
+ if (priv->num_frags > 1) {
+ en_err(priv, "Cannot set XDP if MTU requires multiple frags\n");
+ return -EOPNOTSUPP;
+ }
+
+ if (priv->tx_ring_num < xdp_ring_num + MLX4_EN_NUM_UP) {
+ en_err(priv,
+ "Minimum %d tx channels required to run XDP\n",
+ (xdp_ring_num + MLX4_EN_NUM_UP) / MLX4_EN_NUM_UP);
+ return -EINVAL;
+ }
+
+ if (prog) {
+ prog = bpf_prog_add(prog, priv->rx_ring_num - 1);
+ if (IS_ERR(prog))
+ return PTR_ERR(prog);
+ }
+
+ mutex_lock(&mdev->state_lock);
+ if (priv->port_up) {
+ port_up = 1;
+ mlx4_en_stop_port(dev, 1);
+ }
+
+ priv->xdp_ring_num = xdp_ring_num;
+ netif_set_real_num_tx_queues(dev, priv->tx_ring_num -
+ priv->xdp_ring_num);
+
+ for (i = 0; i < priv->rx_ring_num; i++) {
+ old_prog = xchg(&priv->rx_ring[i]->xdp_prog, prog);
+ if (old_prog)
+ bpf_prog_put(old_prog);
+ }
+
+ if (port_up) {
+ err = mlx4_en_start_port(dev);
+ if (err) {
+ en_err(priv, "Failed starting port %d for XDP change\n",
+ priv->port);
+ queue_work(mdev->workqueue, &priv->watchdog_task);
+ }
+ }
+
+ mutex_unlock(&mdev->state_lock);
+ return 0;
+}
+
+static bool mlx4_xdp_attached(struct net_device *dev)
+{
+ struct mlx4_en_priv *priv = netdev_priv(dev);
+
+ return !!priv->xdp_ring_num;
+}
+
+static int mlx4_xdp(struct net_device *dev, struct netdev_xdp *xdp)
+{
+ switch (xdp->command) {
+ case XDP_SETUP_PROG:
+ return mlx4_xdp_set(dev, xdp->prog);
+ case XDP_QUERY_PROG:
+ xdp->prog_attached = mlx4_xdp_attached(dev);
+ return 0;
+ default:
+ return -EINVAL;
+ }
+}
+
static const struct net_device_ops mlx4_netdev_ops = {
.ndo_open = mlx4_en_open,
.ndo_stop = mlx4_en_close,
@@ -2603,12 +2743,11 @@ static const struct net_device_ops mlx4_netdev_ops = {
.ndo_rx_flow_steer = mlx4_en_filter_rfs,
#endif
.ndo_get_phys_port_id = mlx4_en_get_phys_port_id,
-#ifdef CONFIG_MLX4_EN_VXLAN
- .ndo_add_vxlan_port = mlx4_en_add_vxlan_port,
- .ndo_del_vxlan_port = mlx4_en_del_vxlan_port,
+ .ndo_udp_tunnel_add = mlx4_en_add_vxlan_port,
+ .ndo_udp_tunnel_del = mlx4_en_del_vxlan_port,
.ndo_features_check = mlx4_en_features_check,
-#endif
.ndo_set_tx_maxrate = mlx4_en_set_tx_maxrate,
+ .ndo_xdp = mlx4_xdp,
};
static const struct net_device_ops mlx4_netdev_ops_master = {
@@ -2641,12 +2780,11 @@ static const struct net_device_ops mlx4_netdev_ops_master = {
.ndo_rx_flow_steer = mlx4_en_filter_rfs,
#endif
.ndo_get_phys_port_id = mlx4_en_get_phys_port_id,
-#ifdef CONFIG_MLX4_EN_VXLAN
- .ndo_add_vxlan_port = mlx4_en_add_vxlan_port,
- .ndo_del_vxlan_port = mlx4_en_del_vxlan_port,
+ .ndo_udp_tunnel_add = mlx4_en_add_vxlan_port,
+ .ndo_udp_tunnel_del = mlx4_en_del_vxlan_port,
.ndo_features_check = mlx4_en_features_check,
-#endif
.ndo_set_tx_maxrate = mlx4_en_set_tx_maxrate,
+ .ndo_xdp = mlx4_xdp,
};
struct mlx4_en_bond {
@@ -2936,10 +3074,8 @@ int mlx4_en_init_netdev(struct mlx4_en_dev *mdev, int port,
INIT_WORK(&priv->linkstate_task, mlx4_en_linkstate);
INIT_DELAYED_WORK(&priv->stats_task, mlx4_en_do_get_stats);
INIT_DELAYED_WORK(&priv->service_task, mlx4_en_service_task);
-#ifdef CONFIG_MLX4_EN_VXLAN
INIT_WORK(&priv->vxlan_add_task, mlx4_en_add_vxlan_offloads);
INIT_WORK(&priv->vxlan_del_task, mlx4_en_del_vxlan_offloads);
-#endif
#ifdef CONFIG_RFS_ACCEL
INIT_LIST_HEAD(&priv->filters);
spin_lock_init(&priv->filters_lock);
@@ -2979,6 +3115,14 @@ int mlx4_en_init_netdev(struct mlx4_en_dev *mdev, int port,
priv->msg_enable = MLX4_EN_MSG_LEVEL;
#ifdef CONFIG_MLX4_EN_DCB
if (!mlx4_is_slave(priv->mdev->dev)) {
+ priv->dcbx_cap = DCB_CAP_DCBX_VER_CEE | DCB_CAP_DCBX_HOST |
+ DCB_CAP_DCBX_VER_IEEE;
+ priv->flags |= MLX4_EN_DCB_ENABLED;
+ priv->cee_config.pfc_state = false;
+
+ for (i = 0; i < MLX4_EN_NUM_UP; i++)
+ priv->cee_config.dcb_pfc[i] = pfc_disabled;
+
if (mdev->dev->caps.flags2 & MLX4_DEV_CAP_FLAG2_ETS_CFG) {
dev->dcbnl_ops = &mlx4_en_dcbnl_ops;
} else {
diff --git a/drivers/net/ethernet/mellanox/mlx4/en_rx.c b/drivers/net/ethernet/mellanox/mlx4/en_rx.c
index 99b5407f2..2040dad86 100644
--- a/drivers/net/ethernet/mellanox/mlx4/en_rx.c
+++ b/drivers/net/ethernet/mellanox/mlx4/en_rx.c
@@ -32,6 +32,7 @@
*/
#include <net/busy_poll.h>
+#include <linux/bpf.h>
#include <linux/mlx4/cq.h>
#include <linux/slab.h>
#include <linux/mlx4/qp.h>
@@ -57,7 +58,7 @@ static int mlx4_alloc_pages(struct mlx4_en_priv *priv,
struct page *page;
dma_addr_t dma;
- for (order = MLX4_EN_ALLOC_PREFER_ORDER; ;) {
+ for (order = frag_info->order; ;) {
gfp_t gfp = _gfp;
if (order)
@@ -70,7 +71,7 @@ static int mlx4_alloc_pages(struct mlx4_en_priv *priv,
return -ENOMEM;
}
dma = dma_map_page(priv->ddev, page, 0, PAGE_SIZE << order,
- PCI_DMA_FROMDEVICE);
+ frag_info->dma_dir);
if (dma_mapping_error(priv->ddev, dma)) {
put_page(page);
return -ENOMEM;
@@ -124,7 +125,8 @@ out:
while (i--) {
if (page_alloc[i].page != ring_alloc[i].page) {
dma_unmap_page(priv->ddev, page_alloc[i].dma,
- page_alloc[i].page_size, PCI_DMA_FROMDEVICE);
+ page_alloc[i].page_size,
+ priv->frag_info[i].dma_dir);
page = page_alloc[i].page;
/* Revert changes done by mlx4_alloc_pages */
page_ref_sub(page, page_alloc[i].page_size /
@@ -145,7 +147,7 @@ static void mlx4_en_free_frag(struct mlx4_en_priv *priv,
if (next_frag_end > frags[i].page_size)
dma_unmap_page(priv->ddev, frags[i].dma, frags[i].page_size,
- PCI_DMA_FROMDEVICE);
+ frag_info->dma_dir);
if (frags[i].page)
put_page(frags[i].page);
@@ -176,7 +178,8 @@ out:
page_alloc = &ring->page_alloc[i];
dma_unmap_page(priv->ddev, page_alloc->dma,
- page_alloc->page_size, PCI_DMA_FROMDEVICE);
+ page_alloc->page_size,
+ priv->frag_info[i].dma_dir);
page = page_alloc->page;
/* Revert changes done by mlx4_alloc_pages */
page_ref_sub(page, page_alloc->page_size /
@@ -201,7 +204,7 @@ static void mlx4_en_destroy_allocator(struct mlx4_en_priv *priv,
i, page_count(page_alloc->page));
dma_unmap_page(priv->ddev, page_alloc->dma,
- page_alloc->page_size, PCI_DMA_FROMDEVICE);
+ page_alloc->page_size, frag_info->dma_dir);
while (page_alloc->page_offset + frag_info->frag_stride <
page_alloc->page_size) {
put_page(page_alloc->page);
@@ -244,6 +247,12 @@ static int mlx4_en_prepare_rx_desc(struct mlx4_en_priv *priv,
struct mlx4_en_rx_alloc *frags = ring->rx_info +
(index << priv->log_rx_info);
+ if (ring->page_cache.index > 0) {
+ frags[0] = ring->page_cache.buf[--ring->page_cache.index];
+ rx_desc->data[0].addr = cpu_to_be64(frags[0].dma);
+ return 0;
+ }
+
return mlx4_en_alloc_frags(priv, rx_desc, frags, ring->page_alloc, gfp);
}
@@ -502,13 +511,35 @@ void mlx4_en_recover_from_oom(struct mlx4_en_priv *priv)
}
}
+/* When the rx ring is running in page-per-packet mode, a released frame can go
+ * directly into a small cache, to avoid unmapping or touching the page
+ * allocator. In bpf prog performance scenarios, buffers are either forwarded
+ * or dropped, never converted to skbs, so every page can come directly from
+ * this cache when it is sized to be a multiple of the napi budget.
+ */
+bool mlx4_en_rx_recycle(struct mlx4_en_rx_ring *ring,
+ struct mlx4_en_rx_alloc *frame)
+{
+ struct mlx4_en_page_cache *cache = &ring->page_cache;
+
+ if (cache->index >= MLX4_EN_CACHE_SIZE)
+ return false;
+
+ cache->buf[cache->index++] = *frame;
+ return true;
+}
+
void mlx4_en_destroy_rx_ring(struct mlx4_en_priv *priv,
struct mlx4_en_rx_ring **pring,
u32 size, u16 stride)
{
struct mlx4_en_dev *mdev = priv->mdev;
struct mlx4_en_rx_ring *ring = *pring;
+ struct bpf_prog *old_prog;
+ old_prog = READ_ONCE(ring->xdp_prog);
+ if (old_prog)
+ bpf_prog_put(old_prog);
mlx4_free_hwq_res(mdev->dev, &ring->wqres, size * stride + TXBB_SIZE);
vfree(ring->rx_info);
ring->rx_info = NULL;
@@ -519,6 +550,16 @@ void mlx4_en_destroy_rx_ring(struct mlx4_en_priv *priv,
void mlx4_en_deactivate_rx_ring(struct mlx4_en_priv *priv,
struct mlx4_en_rx_ring *ring)
{
+ int i;
+
+ for (i = 0; i < ring->page_cache.index; i++) {
+ struct mlx4_en_rx_alloc *frame = &ring->page_cache.buf[i];
+
+ dma_unmap_page(priv->ddev, frame->dma, frame->page_size,
+ priv->frag_info[0].dma_dir);
+ put_page(frame->page);
+ }
+ ring->page_cache.index = 0;
mlx4_en_free_rx_buf(priv, ring);
if (ring->stride <= TXBB_SIZE)
ring->buf -= TXBB_SIZE;
@@ -740,7 +781,10 @@ int mlx4_en_process_rx_cq(struct net_device *dev, struct mlx4_en_cq *cq, int bud
struct mlx4_en_rx_ring *ring = priv->rx_ring[cq->ring];
struct mlx4_en_rx_alloc *frags;
struct mlx4_en_rx_desc *rx_desc;
+ struct bpf_prog *xdp_prog;
+ int doorbell_pending;
struct sk_buff *skb;
+ int tx_index;
int index;
int nr;
unsigned int length;
@@ -756,6 +800,10 @@ int mlx4_en_process_rx_cq(struct net_device *dev, struct mlx4_en_cq *cq, int bud
if (budget <= 0)
return polled;
+ xdp_prog = READ_ONCE(ring->xdp_prog);
+ doorbell_pending = 0;
+ tx_index = (priv->tx_ring_num - priv->xdp_ring_num) + cq->ring;
+
/* We assume a 1:1 mapping between CQEs and Rx descriptors, so Rx
* descriptor offset can be deduced from the CQE index instead of
* reading 'cqe->index' */
@@ -832,6 +880,43 @@ int mlx4_en_process_rx_cq(struct net_device *dev, struct mlx4_en_cq *cq, int bud
l2_tunnel = (dev->hw_enc_features & NETIF_F_RXCSUM) &&
(cqe->vlan_my_qpn & cpu_to_be32(MLX4_CQE_L2_TUNNEL));
+ /* A bpf program gets first chance to drop the packet. It may
+ * read bytes but not past the end of the frag.
+ */
+ if (xdp_prog) {
+ struct xdp_buff xdp;
+ dma_addr_t dma;
+ u32 act;
+
+ dma = be64_to_cpu(rx_desc->data[0].addr);
+ dma_sync_single_for_cpu(priv->ddev, dma,
+ priv->frag_info[0].frag_size,
+ DMA_FROM_DEVICE);
+
+ xdp.data = page_address(frags[0].page) +
+ frags[0].page_offset;
+ xdp.data_end = xdp.data + length;
+
+ act = bpf_prog_run_xdp(xdp_prog, &xdp);
+ switch (act) {
+ case XDP_PASS:
+ break;
+ case XDP_TX:
+ if (!mlx4_en_xmit_frame(frags, dev,
+ length, tx_index,
+ &doorbell_pending))
+ goto consumed;
+ break;
+ default:
+ bpf_warn_invalid_xdp_action(act);
+ case XDP_ABORTED:
+ case XDP_DROP:
+ if (mlx4_en_rx_recycle(ring, frags))
+ goto consumed;
+ goto next;
+ }
+ }
+
if (likely(dev->features & NETIF_F_RXCSUM)) {
if (cqe->status & cpu_to_be16(MLX4_CQE_STATUS_TCP |
MLX4_CQE_STATUS_UDP)) {
@@ -983,6 +1068,7 @@ next:
for (nr = 0; nr < priv->num_frags; nr++)
mlx4_en_free_frag(priv, frags, nr);
+consumed:
++cq->mcq.cons_index;
index = (cq->mcq.cons_index) & ring->size_mask;
cqe = mlx4_en_get_cqe(cq->buf, index, priv->cqe_size) + factor;
@@ -991,6 +1077,9 @@ next:
}
out:
+ if (doorbell_pending)
+ mlx4_en_xmit_doorbell(priv->tx_ring[tx_index]);
+
AVG_PERF_COUNTER(priv->pstats.rx_coal_avg, polled);
mlx4_cq_set_ci(&cq->mcq);
wmb(); /* ensure HW sees CQ consumer before we post new buffers */
@@ -1058,22 +1147,35 @@ static const int frag_sizes[] = {
void mlx4_en_calc_rx_buf(struct net_device *dev)
{
+ enum dma_data_direction dma_dir = PCI_DMA_FROMDEVICE;
struct mlx4_en_priv *priv = netdev_priv(dev);
- /* VLAN_HLEN is added twice,to support skb vlan tagged with multiple
- * headers. (For example: ETH_P_8021Q and ETH_P_8021AD).
- */
- int eff_mtu = dev->mtu + ETH_HLEN + (2 * VLAN_HLEN);
+ int eff_mtu = MLX4_EN_EFF_MTU(dev->mtu);
+ int order = MLX4_EN_ALLOC_PREFER_ORDER;
+ u32 align = SMP_CACHE_BYTES;
int buf_size = 0;
int i = 0;
+ /* bpf requires buffers to be set up as 1 packet per page.
+ * This only works when num_frags == 1.
+ */
+ if (priv->xdp_ring_num) {
+ dma_dir = PCI_DMA_BIDIRECTIONAL;
+ /* This will gain efficient xdp frame recycling at the expense
+ * of more costly truesize accounting
+ */
+ align = PAGE_SIZE;
+ order = 0;
+ }
+
while (buf_size < eff_mtu) {
+ priv->frag_info[i].order = order;
priv->frag_info[i].frag_size =
(eff_mtu > buf_size + frag_sizes[i]) ?
frag_sizes[i] : eff_mtu - buf_size;
priv->frag_info[i].frag_prefix_size = buf_size;
priv->frag_info[i].frag_stride =
- ALIGN(priv->frag_info[i].frag_size,
- SMP_CACHE_BYTES);
+ ALIGN(priv->frag_info[i].frag_size, align);
+ priv->frag_info[i].dma_dir = dma_dir;
buf_size += priv->frag_info[i].frag_size;
i++;
}
diff --git a/drivers/net/ethernet/mellanox/mlx4/en_tx.c b/drivers/net/ethernet/mellanox/mlx4/en_tx.c
index 76aa4d271..e2509bba3 100644
--- a/drivers/net/ethernet/mellanox/mlx4/en_tx.c
+++ b/drivers/net/ethernet/mellanox/mlx4/en_tx.c
@@ -196,6 +196,7 @@ int mlx4_en_activate_tx_ring(struct mlx4_en_priv *priv,
ring->last_nr_txbb = 1;
memset(ring->tx_info, 0, ring->size * sizeof(struct mlx4_en_tx_info));
memset(ring->buf, 0, ring->buf_size);
+ ring->free_tx_desc = mlx4_en_free_tx_desc;
ring->qp_state = MLX4_QP_STATE_RST;
ring->doorbell_qpn = cpu_to_be32(ring->qp.qpn << 8);
@@ -265,10 +266,10 @@ static void mlx4_en_stamp_wqe(struct mlx4_en_priv *priv,
}
-static u32 mlx4_en_free_tx_desc(struct mlx4_en_priv *priv,
- struct mlx4_en_tx_ring *ring,
- int index, u8 owner, u64 timestamp,
- int napi_mode)
+u32 mlx4_en_free_tx_desc(struct mlx4_en_priv *priv,
+ struct mlx4_en_tx_ring *ring,
+ int index, u8 owner, u64 timestamp,
+ int napi_mode)
{
struct mlx4_en_tx_info *tx_info = &ring->tx_info[index];
struct mlx4_en_tx_desc *tx_desc = ring->buf + index * TXBB_SIZE;
@@ -344,6 +345,27 @@ static u32 mlx4_en_free_tx_desc(struct mlx4_en_priv *priv,
return tx_info->nr_txbb;
}
+u32 mlx4_en_recycle_tx_desc(struct mlx4_en_priv *priv,
+ struct mlx4_en_tx_ring *ring,
+ int index, u8 owner, u64 timestamp,
+ int napi_mode)
+{
+ struct mlx4_en_tx_info *tx_info = &ring->tx_info[index];
+ struct mlx4_en_rx_alloc frame = {
+ .page = tx_info->page,
+ .dma = tx_info->map0_dma,
+ .page_offset = 0,
+ .page_size = PAGE_SIZE,
+ };
+
+ if (!mlx4_en_rx_recycle(ring->recycle_ring, &frame)) {
+ dma_unmap_page(priv->ddev, tx_info->map0_dma,
+ PAGE_SIZE, priv->frag_info[0].dma_dir);
+ put_page(tx_info->page);
+ }
+
+ return tx_info->nr_txbb;
+}
int mlx4_en_free_tx_buf(struct net_device *dev, struct mlx4_en_tx_ring *ring)
{
@@ -362,7 +384,7 @@ int mlx4_en_free_tx_buf(struct net_device *dev, struct mlx4_en_tx_ring *ring)
}
while (ring->cons != ring->prod) {
- ring->last_nr_txbb = mlx4_en_free_tx_desc(priv, ring,
+ ring->last_nr_txbb = ring->free_tx_desc(priv, ring,
ring->cons & ring->size_mask,
!!(ring->cons & ring->size), 0,
0 /* Non-NAPI caller */);
@@ -444,7 +466,7 @@ static bool mlx4_en_process_tx_cq(struct net_device *dev,
timestamp = mlx4_en_get_cqe_ts(cqe);
/* free next descriptor */
- last_nr_txbb = mlx4_en_free_tx_desc(
+ last_nr_txbb = ring->free_tx_desc(
priv, ring, ring_index,
!!((ring_cons + txbbs_skipped) &
ring->size), timestamp, napi_budget);
@@ -476,6 +498,9 @@ static bool mlx4_en_process_tx_cq(struct net_device *dev,
ACCESS_ONCE(ring->last_nr_txbb) = last_nr_txbb;
ACCESS_ONCE(ring->cons) = ring_cons + txbbs_skipped;
+ if (ring->free_tx_desc == mlx4_en_recycle_tx_desc)
+ return done < budget;
+
netdev_tx_completed_queue(ring->tx_queue, packets, bytes);
/* Wakeup Tx queue if this stopped, and ring is not full.
@@ -631,8 +656,7 @@ static int get_real_size(const struct sk_buff *skb,
static void build_inline_wqe(struct mlx4_en_tx_desc *tx_desc,
const struct sk_buff *skb,
const struct skb_shared_info *shinfo,
- int real_size, u16 *vlan_tag,
- int tx_ind, void *fragptr)
+ void *fragptr)
{
struct mlx4_wqe_inline_seg *inl = &tx_desc->inl;
int spc = MLX4_INLINE_ALIGN - CTRL_SIZE - sizeof *inl;
@@ -700,10 +724,66 @@ static void mlx4_bf_copy(void __iomem *dst, const void *src,
__iowrite64_copy(dst, src, bytecnt / 8);
}
+void mlx4_en_xmit_doorbell(struct mlx4_en_tx_ring *ring)
+{
+ wmb();
+ /* Since there is no iowrite*_native() that writes the
+ * value as is, without byteswapping - using the one
+ * the doesn't do byteswapping in the relevant arch
+ * endianness.
+ */
+#if defined(__LITTLE_ENDIAN)
+ iowrite32(
+#else
+ iowrite32be(
+#endif
+ ring->doorbell_qpn,
+ ring->bf.uar->map + MLX4_SEND_DOORBELL);
+}
+
+static void mlx4_en_tx_write_desc(struct mlx4_en_tx_ring *ring,
+ struct mlx4_en_tx_desc *tx_desc,
+ union mlx4_wqe_qpn_vlan qpn_vlan,
+ int desc_size, int bf_index,
+ __be32 op_own, bool bf_ok,
+ bool send_doorbell)
+{
+ tx_desc->ctrl.qpn_vlan = qpn_vlan;
+
+ if (bf_ok) {
+ op_own |= htonl((bf_index & 0xffff) << 8);
+ /* Ensure new descriptor hits memory
+ * before setting ownership of this descriptor to HW
+ */
+ dma_wmb();
+ tx_desc->ctrl.owner_opcode = op_own;
+
+ wmb();
+
+ mlx4_bf_copy(ring->bf.reg + ring->bf.offset, &tx_desc->ctrl,
+ desc_size);
+
+ wmb();
+
+ ring->bf.offset ^= ring->bf.buf_size;
+ } else {
+ /* Ensure new descriptor hits memory
+ * before setting ownership of this descriptor to HW
+ */
+ dma_wmb();
+ tx_desc->ctrl.owner_opcode = op_own;
+ if (send_doorbell)
+ mlx4_en_xmit_doorbell(ring);
+ else
+ ring->xmit_more++;
+ }
+}
+
netdev_tx_t mlx4_en_xmit(struct sk_buff *skb, struct net_device *dev)
{
struct skb_shared_info *shinfo = skb_shinfo(skb);
struct mlx4_en_priv *priv = netdev_priv(dev);
+ union mlx4_wqe_qpn_vlan qpn_vlan = {};
struct device *ddev = priv->ddev;
struct mlx4_en_tx_ring *ring;
struct mlx4_en_tx_desc *tx_desc;
@@ -715,7 +795,6 @@ netdev_tx_t mlx4_en_xmit(struct sk_buff *skb, struct net_device *dev)
int real_size;
u32 index, bf_index;
__be32 op_own;
- u16 vlan_tag = 0;
u16 vlan_proto = 0;
int i_frag;
int lso_header_size;
@@ -725,6 +804,7 @@ netdev_tx_t mlx4_en_xmit(struct sk_buff *skb, struct net_device *dev)
bool stop_queue;
bool inline_ok;
u32 ring_cons;
+ bool bf_ok;
tx_ind = skb_get_queue_mapping(skb);
ring = priv->tx_ring[tx_ind];
@@ -738,7 +818,7 @@ netdev_tx_t mlx4_en_xmit(struct sk_buff *skb, struct net_device *dev)
real_size = get_real_size(skb, shinfo, dev, &lso_header_size,
&inline_ok, &fragptr);
if (unlikely(!real_size))
- goto tx_drop;
+ goto tx_drop_count;
/* Align descriptor to TXBB size */
desc_size = ALIGN(real_size, TXBB_SIZE);
@@ -746,12 +826,20 @@ netdev_tx_t mlx4_en_xmit(struct sk_buff *skb, struct net_device *dev)
if (unlikely(nr_txbb > MAX_DESC_TXBBS)) {
if (netif_msg_tx_err(priv))
en_warn(priv, "Oversized header or SG list\n");
- goto tx_drop;
+ goto tx_drop_count;
}
+ bf_ok = ring->bf_enabled;
if (skb_vlan_tag_present(skb)) {
- vlan_tag = skb_vlan_tag_get(skb);
+ qpn_vlan.vlan_tag = cpu_to_be16(skb_vlan_tag_get(skb));
vlan_proto = be16_to_cpu(skb->vlan_proto);
+ if (vlan_proto == ETH_P_8021AD)
+ qpn_vlan.ins_vlan = MLX4_WQE_CTRL_INS_SVLAN;
+ else if (vlan_proto == ETH_P_8021Q)
+ qpn_vlan.ins_vlan = MLX4_WQE_CTRL_INS_CVLAN;
+ else
+ qpn_vlan.ins_vlan = 0;
+ bf_ok = false;
}
netdev_txq_bql_enqueue_prefetchw(ring->tx_queue);
@@ -771,6 +859,7 @@ netdev_tx_t mlx4_en_xmit(struct sk_buff *skb, struct net_device *dev)
else {
tx_desc = (struct mlx4_en_tx_desc *) ring->bounce_buf;
bounce = true;
+ bf_ok = false;
}
/* Save skb in tx_info ring */
@@ -907,8 +996,7 @@ netdev_tx_t mlx4_en_xmit(struct sk_buff *skb, struct net_device *dev)
AVG_PERF_COUNTER(priv->pstats.tx_pktsz_avg, skb->len);
if (tx_info->inl)
- build_inline_wqe(tx_desc, skb, shinfo, real_size, &vlan_tag,
- tx_ind, fragptr);
+ build_inline_wqe(tx_desc, skb, shinfo, fragptr);
if (skb->encapsulation) {
union {
@@ -946,60 +1034,15 @@ netdev_tx_t mlx4_en_xmit(struct sk_buff *skb, struct net_device *dev)
real_size = (real_size / 16) & 0x3f;
- if (ring->bf_enabled && desc_size <= MAX_BF && !bounce &&
- !skb_vlan_tag_present(skb) && send_doorbell) {
- tx_desc->ctrl.bf_qpn = ring->doorbell_qpn |
- cpu_to_be32(real_size);
+ bf_ok &= desc_size <= MAX_BF && send_doorbell;
- op_own |= htonl((bf_index & 0xffff) << 8);
- /* Ensure new descriptor hits memory
- * before setting ownership of this descriptor to HW
- */
- dma_wmb();
- tx_desc->ctrl.owner_opcode = op_own;
-
- wmb();
-
- mlx4_bf_copy(ring->bf.reg + ring->bf.offset, &tx_desc->ctrl,
- desc_size);
-
- wmb();
-
- ring->bf.offset ^= ring->bf.buf_size;
- } else {
- tx_desc->ctrl.vlan_tag = cpu_to_be16(vlan_tag);
- if (vlan_proto == ETH_P_8021AD)
- tx_desc->ctrl.ins_vlan = MLX4_WQE_CTRL_INS_SVLAN;
- else if (vlan_proto == ETH_P_8021Q)
- tx_desc->ctrl.ins_vlan = MLX4_WQE_CTRL_INS_CVLAN;
- else
- tx_desc->ctrl.ins_vlan = 0;
+ if (bf_ok)
+ qpn_vlan.bf_qpn = ring->doorbell_qpn | cpu_to_be32(real_size);
+ else
+ qpn_vlan.fence_size = real_size;
- tx_desc->ctrl.fence_size = real_size;
-
- /* Ensure new descriptor hits memory
- * before setting ownership of this descriptor to HW
- */
- dma_wmb();
- tx_desc->ctrl.owner_opcode = op_own;
- if (send_doorbell) {
- wmb();
- /* Since there is no iowrite*_native() that writes the
- * value as is, without byteswapping - using the one
- * the doesn't do byteswapping in the relevant arch
- * endianness.
- */
-#if defined(__LITTLE_ENDIAN)
- iowrite32(
-#else
- iowrite32be(
-#endif
- ring->doorbell_qpn,
- ring->bf.uar->map + MLX4_SEND_DOORBELL);
- } else {
- ring->xmit_more++;
- }
- }
+ mlx4_en_tx_write_desc(ring, tx_desc, qpn_vlan, desc_size, bf_index,
+ op_own, bf_ok, send_doorbell);
if (unlikely(stop_queue)) {
/* If queue was emptied after the if (stop_queue) , and before
@@ -1028,9 +1071,114 @@ tx_drop_unmap:
PCI_DMA_TODEVICE);
}
+tx_drop_count:
+ ring->tx_dropped++;
tx_drop:
dev_kfree_skb_any(skb);
- ring->tx_dropped++;
return NETDEV_TX_OK;
}
+netdev_tx_t mlx4_en_xmit_frame(struct mlx4_en_rx_alloc *frame,
+ struct net_device *dev, unsigned int length,
+ int tx_ind, int *doorbell_pending)
+{
+ struct mlx4_en_priv *priv = netdev_priv(dev);
+ union mlx4_wqe_qpn_vlan qpn_vlan = {};
+ struct mlx4_en_tx_ring *ring;
+ struct mlx4_en_tx_desc *tx_desc;
+ struct mlx4_wqe_data_seg *data;
+ struct mlx4_en_tx_info *tx_info;
+ int index, bf_index;
+ bool send_doorbell;
+ int nr_txbb = 1;
+ bool stop_queue;
+ dma_addr_t dma;
+ int real_size;
+ __be32 op_own;
+ u32 ring_cons;
+ bool bf_ok;
+
+ BUILD_BUG_ON_MSG(ALIGN(CTRL_SIZE + DS_SIZE, TXBB_SIZE) != TXBB_SIZE,
+ "mlx4_en_xmit_frame requires minimum size tx desc");
+
+ ring = priv->tx_ring[tx_ind];
+
+ if (!priv->port_up)
+ goto tx_drop;
+
+ if (mlx4_en_is_tx_ring_full(ring))
+ goto tx_drop_count;
+
+ /* fetch ring->cons far ahead before needing it to avoid stall */
+ ring_cons = READ_ONCE(ring->cons);
+
+ index = ring->prod & ring->size_mask;
+ tx_info = &ring->tx_info[index];
+
+ bf_ok = ring->bf_enabled;
+
+ /* Track current inflight packets for performance analysis */
+ AVG_PERF_COUNTER(priv->pstats.inflight_avg,
+ (u32)(ring->prod - ring_cons - 1));
+
+ bf_index = ring->prod;
+ tx_desc = ring->buf + index * TXBB_SIZE;
+ data = &tx_desc->data;
+
+ dma = frame->dma;
+
+ tx_info->page = frame->page;
+ frame->page = NULL;
+ tx_info->map0_dma = dma;
+ tx_info->map0_byte_count = length;
+ tx_info->nr_txbb = nr_txbb;
+ tx_info->nr_bytes = max_t(unsigned int, length, ETH_ZLEN);
+ tx_info->data_offset = (void *)data - (void *)tx_desc;
+ tx_info->ts_requested = 0;
+ tx_info->nr_maps = 1;
+ tx_info->linear = 1;
+ tx_info->inl = 0;
+
+ dma_sync_single_for_device(priv->ddev, dma, length, PCI_DMA_TODEVICE);
+
+ data->addr = cpu_to_be64(dma);
+ data->lkey = ring->mr_key;
+ dma_wmb();
+ data->byte_count = cpu_to_be32(length);
+
+ /* tx completion can avoid cache line miss for common cases */
+ tx_desc->ctrl.srcrb_flags = priv->ctrl_flags;
+
+ op_own = cpu_to_be32(MLX4_OPCODE_SEND) |
+ ((ring->prod & ring->size) ?
+ cpu_to_be32(MLX4_EN_BIT_DESC_OWN) : 0);
+
+ ring->packets++;
+ ring->bytes += tx_info->nr_bytes;
+ AVG_PERF_COUNTER(priv->pstats.tx_pktsz_avg, length);
+
+ ring->prod += nr_txbb;
+
+ stop_queue = mlx4_en_is_tx_ring_full(ring);
+ send_doorbell = stop_queue ||
+ *doorbell_pending > MLX4_EN_DOORBELL_BUDGET;
+ bf_ok &= send_doorbell;
+
+ real_size = ((CTRL_SIZE + nr_txbb * DS_SIZE) / 16) & 0x3f;
+
+ if (bf_ok)
+ qpn_vlan.bf_qpn = ring->doorbell_qpn | cpu_to_be32(real_size);
+ else
+ qpn_vlan.fence_size = real_size;
+
+ mlx4_en_tx_write_desc(ring, tx_desc, qpn_vlan, TXBB_SIZE, bf_index,
+ op_own, bf_ok, send_doorbell);
+ *doorbell_pending = send_doorbell ? 0 : *doorbell_pending + 1;
+
+ return NETDEV_TX_OK;
+
+tx_drop_count:
+ ring->tx_dropped++;
+tx_drop:
+ return NETDEV_TX_BUSY;
+}
diff --git a/drivers/net/ethernet/mellanox/mlx4/eq.c b/drivers/net/ethernet/mellanox/mlx4/eq.c
index f61397745..cf8f8a72a 100644
--- a/drivers/net/ethernet/mellanox/mlx4/eq.c
+++ b/drivers/net/ethernet/mellanox/mlx4/eq.c
@@ -1305,8 +1305,8 @@ int mlx4_init_eq_table(struct mlx4_dev *dev)
return 0;
err_out_unmap:
- while (i >= 0)
- mlx4_free_eq(dev, &priv->eq_table.eq[i--]);
+ while (i > 0)
+ mlx4_free_eq(dev, &priv->eq_table.eq[--i]);
#ifdef CONFIG_RFS_ACCEL
for (i = 1; i <= dev->caps.num_ports; i++) {
if (mlx4_priv(dev)->port[i].rmap) {
diff --git a/drivers/net/ethernet/mellanox/mlx4/fw.c b/drivers/net/ethernet/mellanox/mlx4/fw.c
index e97094598..d728704d0 100644
--- a/drivers/net/ethernet/mellanox/mlx4/fw.c
+++ b/drivers/net/ethernet/mellanox/mlx4/fw.c
@@ -721,6 +721,7 @@ int mlx4_QUERY_DEV_CAP(struct mlx4_dev *dev, struct mlx4_dev_cap *dev_cap)
#define QUERY_DEV_CAP_RSVD_LKEY_OFFSET 0x98
#define QUERY_DEV_CAP_MAX_ICM_SZ_OFFSET 0xa0
#define QUERY_DEV_CAP_ETH_BACKPL_OFFSET 0x9c
+#define QUERY_DEV_CAP_DIAG_RPRT_PER_PORT 0x9c
#define QUERY_DEV_CAP_FW_REASSIGN_MAC 0x9d
#define QUERY_DEV_CAP_VXLAN 0x9e
#define QUERY_DEV_CAP_MAD_DEMUX_OFFSET 0xb0
@@ -935,6 +936,9 @@ int mlx4_QUERY_DEV_CAP(struct mlx4_dev *dev, struct mlx4_dev_cap *dev_cap)
dev_cap->flags2 |= MLX4_DEV_CAP_FLAG2_ETH_BACKPL_AN_REP;
if (field32 & (1 << 7))
dev_cap->flags2 |= MLX4_DEV_CAP_FLAG2_RECOVERABLE_ERROR_EVENT;
+ MLX4_GET(field32, outbox, QUERY_DEV_CAP_DIAG_RPRT_PER_PORT);
+ if (field32 & (1 << 17))
+ dev_cap->flags2 |= MLX4_DEV_CAP_FLAG2_DIAG_PER_PORT;
MLX4_GET(field, outbox, QUERY_DEV_CAP_FW_REASSIGN_MAC);
if (field & 1<<6)
dev_cap->flags2 |= MLX4_DEV_CAP_FLAG2_REASSIGN_MAC_EN;
@@ -1128,6 +1132,7 @@ int mlx4_QUERY_PORT(struct mlx4_dev *dev, int port, struct mlx4_port_cap *port_c
port_cap->max_pkeys = 1 << (field & 0xf);
MLX4_GET(field, outbox, QUERY_PORT_MAX_VL_OFFSET);
port_cap->max_vl = field & 0xf;
+ port_cap->max_tc_eth = field >> 4;
MLX4_GET(field, outbox, QUERY_PORT_MAX_MACVLAN_OFFSET);
port_cap->log_max_macs = field & 0xf;
port_cap->log_max_vlans = field >> 4;
@@ -2456,6 +2461,42 @@ int mlx4_NOP(struct mlx4_dev *dev)
MLX4_CMD_NATIVE);
}
+int mlx4_query_diag_counters(struct mlx4_dev *dev, u8 op_modifier,
+ const u32 offset[],
+ u32 value[], size_t array_len, u8 port)
+{
+ struct mlx4_cmd_mailbox *mailbox;
+ u32 *outbox;
+ size_t i;
+ int ret;
+
+ mailbox = mlx4_alloc_cmd_mailbox(dev);
+ if (IS_ERR(mailbox))
+ return PTR_ERR(mailbox);
+
+ outbox = mailbox->buf;
+
+ ret = mlx4_cmd_box(dev, 0, mailbox->dma, port, op_modifier,
+ MLX4_CMD_DIAG_RPRT, MLX4_CMD_TIME_CLASS_A,
+ MLX4_CMD_NATIVE);
+ if (ret)
+ goto out;
+
+ for (i = 0; i < array_len; i++) {
+ if (offset[i] > MLX4_MAILBOX_SIZE) {
+ ret = -EINVAL;
+ goto out;
+ }
+
+ MLX4_GET(value[i], outbox, offset[i]);
+ }
+
+out:
+ mlx4_free_cmd_mailbox(dev, mailbox);
+ return ret;
+}
+EXPORT_SYMBOL(mlx4_query_diag_counters);
+
int mlx4_get_phys_port_id(struct mlx4_dev *dev)
{
u8 port;
diff --git a/drivers/net/ethernet/mellanox/mlx4/fw.h b/drivers/net/ethernet/mellanox/mlx4/fw.h
index 7ea258af6..cdbd76f10 100644
--- a/drivers/net/ethernet/mellanox/mlx4/fw.h
+++ b/drivers/net/ethernet/mellanox/mlx4/fw.h
@@ -53,6 +53,7 @@ struct mlx4_port_cap {
int ib_mtu;
int max_port_width;
int max_vl;
+ int max_tc_eth;
int max_gids;
int max_pkeys;
u64 def_mac;
diff --git a/drivers/net/ethernet/mellanox/mlx4/intf.c b/drivers/net/ethernet/mellanox/mlx4/intf.c
index dec77d6f0..0e8b7c449 100644
--- a/drivers/net/ethernet/mellanox/mlx4/intf.c
+++ b/drivers/net/ethernet/mellanox/mlx4/intf.c
@@ -147,7 +147,7 @@ int mlx4_do_bond(struct mlx4_dev *dev, bool enable)
if (enable) {
dev->flags |= MLX4_FLAG_BONDED;
} else {
- ret = mlx4_virt2phy_port_map(dev, 1, 2);
+ ret = mlx4_virt2phy_port_map(dev, 1, 2);
if (ret) {
mlx4_err(dev, "Fail to reset port map\n");
return ret;
@@ -218,6 +218,9 @@ void mlx4_unregister_device(struct mlx4_dev *dev)
struct mlx4_priv *priv = mlx4_priv(dev);
struct mlx4_interface *intf;
+ if (!(dev->persist->interface_state & MLX4_INTERFACE_STATE_UP))
+ return;
+
mlx4_stop_catas_poll(dev);
mutex_lock(&intf_mutex);
diff --git a/drivers/net/ethernet/mellanox/mlx4/main.c b/drivers/net/ethernet/mellanox/mlx4/main.c
index 546fab0ec..7183ac413 100644
--- a/drivers/net/ethernet/mellanox/mlx4/main.c
+++ b/drivers/net/ethernet/mellanox/mlx4/main.c
@@ -292,6 +292,7 @@ static int _mlx4_dev_port(struct mlx4_dev *dev, int port,
dev->caps.pkey_table_len[port] = port_cap->max_pkeys;
dev->caps.port_width_cap[port] = port_cap->max_port_width;
dev->caps.eth_mtu_cap[port] = port_cap->eth_mtu;
+ dev->caps.max_tc_eth = port_cap->max_tc_eth;
dev->caps.def_mac[port] = port_cap->def_mac;
dev->caps.supported_type[port] = port_cap->supported_port_types;
dev->caps.suggested_type[port] = port_cap->suggested_type;
@@ -2599,7 +2600,7 @@ static int mlx4_setup_hca(struct mlx4_dev *dev)
err = mlx4_init_uar_table(dev);
if (err) {
mlx4_err(dev, "Failed to initialize user access region table, aborting\n");
- return err;
+ return err;
}
err = mlx4_uar_alloc(dev, &priv->driver_uar);
@@ -2969,6 +2970,7 @@ static int mlx4_init_port_info(struct mlx4_dev *dev, int port)
mlx4_err(dev, "Failed to create mtu file for port %d\n", port);
device_remove_file(&info->dev->persist->pdev->dev,
&info->port_attr);
+ devlink_port_unregister(&info->devlink_port);
info->port = -1;
}
@@ -2983,6 +2985,8 @@ static void mlx4_cleanup_port_info(struct mlx4_port_info *info)
device_remove_file(&info->dev->persist->pdev->dev, &info->port_attr);
device_remove_file(&info->dev->persist->pdev->dev,
&info->port_mtu_attr);
+ devlink_port_unregister(&info->devlink_port);
+
#ifdef CONFIG_RFS_ACCEL
free_irq_cpu_rmap(info->rmap);
info->rmap = NULL;
diff --git a/drivers/net/ethernet/mellanox/mlx4/mcg.c b/drivers/net/ethernet/mellanox/mlx4/mcg.c
index f2d092001..94b891c11 100644
--- a/drivers/net/ethernet/mellanox/mlx4/mcg.c
+++ b/drivers/net/ethernet/mellanox/mlx4/mcg.c
@@ -618,8 +618,8 @@ static int remove_promisc_qp(struct mlx4_dev *dev, u8 port,
err = mlx4_READ_ENTRY(dev,
entry->index,
mailbox);
- if (err)
- goto out_mailbox;
+ if (err)
+ goto out_mailbox;
members_count =
be32_to_cpu(mgm->members_count) &
0xffffff;
@@ -657,8 +657,8 @@ static int remove_promisc_qp(struct mlx4_dev *dev, u8 port,
err = mlx4_WRITE_ENTRY(dev,
entry->index,
mailbox);
- if (err)
- goto out_mailbox;
+ if (err)
+ goto out_mailbox;
}
}
}
diff --git a/drivers/net/ethernet/mellanox/mlx4/mlx4_en.h b/drivers/net/ethernet/mellanox/mlx4/mlx4_en.h
index 13d297ee3..9099dbd04 100644
--- a/drivers/net/ethernet/mellanox/mlx4/mlx4_en.h
+++ b/drivers/net/ethernet/mellanox/mlx4/mlx4_en.h
@@ -132,6 +132,7 @@ enum {
MLX4_EN_NUM_UP)
#define MLX4_EN_DEFAULT_TX_WORK 256
+#define MLX4_EN_DOORBELL_BUDGET 8
/* Target number of packets to coalesce with interrupt moderation */
#define MLX4_EN_RX_COAL_TARGET 44
@@ -164,6 +165,10 @@ enum {
#define MLX4_LOOPBACK_TEST_PAYLOAD (HEADER_COPY_SIZE - ETH_HLEN)
#define MLX4_EN_MIN_MTU 46
+/* VLAN_HLEN is added twice,to support skb vlan tagged with multiple
+ * headers. (For example: ETH_P_8021Q and ETH_P_8021AD).
+ */
+#define MLX4_EN_EFF_MTU(mtu) ((mtu) + ETH_HLEN + (2 * VLAN_HLEN))
#define ETH_BCAST 0xffffffffffffULL
#define MLX4_EN_LOOPBACK_RETRIES 5
@@ -215,7 +220,10 @@ enum cq_type {
struct mlx4_en_tx_info {
- struct sk_buff *skb;
+ union {
+ struct sk_buff *skb;
+ struct page *page;
+ };
dma_addr_t map0_dma;
u32 map0_byte_count;
u32 nr_txbb;
@@ -255,6 +263,14 @@ struct mlx4_en_rx_alloc {
u32 page_size;
};
+#define MLX4_EN_CACHE_SIZE (2 * NAPI_POLL_WEIGHT)
+struct mlx4_en_page_cache {
+ u32 index;
+ struct mlx4_en_rx_alloc buf[MLX4_EN_CACHE_SIZE];
+};
+
+struct mlx4_en_priv;
+
struct mlx4_en_tx_ring {
/* cache line used and dirtied in tx completion
* (mlx4_en_free_tx_buf())
@@ -288,6 +304,11 @@ struct mlx4_en_tx_ring {
__be32 mr_key;
void *buf;
struct mlx4_en_tx_info *tx_info;
+ struct mlx4_en_rx_ring *recycle_ring;
+ u32 (*free_tx_desc)(struct mlx4_en_priv *priv,
+ struct mlx4_en_tx_ring *ring,
+ int index, u8 owner,
+ u64 timestamp, int napi_mode);
u8 *bounce_buf;
struct mlx4_qp_context context;
int qpn;
@@ -319,6 +340,8 @@ struct mlx4_en_rx_ring {
u8 fcs_del;
void *buf;
void *rx_info;
+ struct bpf_prog *xdp_prog;
+ struct mlx4_en_page_cache page_cache;
unsigned long bytes;
unsigned long packets;
unsigned long csum_ok;
@@ -440,7 +463,9 @@ struct mlx4_en_mc_list {
struct mlx4_en_frag_info {
u16 frag_size;
u16 frag_prefix_size;
- u16 frag_stride;
+ u32 frag_stride;
+ enum dma_data_direction dma_dir;
+ int order;
};
#ifdef CONFIG_MLX4_EN_DCB
@@ -450,6 +475,17 @@ struct mlx4_en_frag_info {
#define MLX4_EN_TC_ETS 7
+enum dcb_pfc_type {
+ pfc_disabled = 0,
+ pfc_enabled_full,
+ pfc_enabled_tx,
+ pfc_enabled_rx
+};
+
+struct mlx4_en_cee_config {
+ bool pfc_state;
+ enum dcb_pfc_type dcb_pfc[MLX4_EN_NUM_UP];
+};
#endif
struct ethtool_flow_id {
@@ -469,6 +505,9 @@ enum {
MLX4_EN_FLAG_RX_FILTER_NEEDED = (1 << 3),
MLX4_EN_FLAG_FORCE_PROMISC = (1 << 4),
MLX4_EN_FLAG_RX_CSUM_NON_TCP_UDP = (1 << 5),
+#ifdef CONFIG_MLX4_EN_DCB
+ MLX4_EN_FLAG_DCB_ENABLED = (1 << 6),
+#endif
};
#define PORT_BEACON_MAX_LIMIT (65535)
@@ -536,6 +575,7 @@ struct mlx4_en_priv {
struct mlx4_en_frag_info frag_info[MLX4_EN_MAX_RX_FRAGS];
u16 num_frags;
u16 log_rx_info;
+ int xdp_ring_num;
struct mlx4_en_tx_ring **tx_ring;
struct mlx4_en_rx_ring *rx_ring[MAX_RX_RINGS];
@@ -547,10 +587,8 @@ struct mlx4_en_priv {
struct work_struct linkstate_task;
struct delayed_work stats_task;
struct delayed_work service_task;
-#ifdef CONFIG_MLX4_EN_VXLAN
struct work_struct vxlan_add_task;
struct work_struct vxlan_del_task;
-#endif
struct mlx4_en_perf_stats pstats;
struct mlx4_en_pkt_stats pkstats;
struct mlx4_en_counter_stats pf_stats;
@@ -572,9 +610,12 @@ struct mlx4_en_priv {
u32 counter_index;
#ifdef CONFIG_MLX4_EN_DCB
+#define MLX4_EN_DCB_ENABLED 0x3
struct ieee_ets ets;
u16 maxrate[IEEE_8021QAZ_MAX_TCS];
enum dcbnl_cndd_states cndd_state[IEEE_8021QAZ_MAX_TCS];
+ struct mlx4_en_cee_config cee_config;
+ u8 dcbx_cap;
#endif
#ifdef CONFIG_RFS_ACCEL
spinlock_t filters_lock;
@@ -644,6 +685,12 @@ void mlx4_en_tx_irq(struct mlx4_cq *mcq);
u16 mlx4_en_select_queue(struct net_device *dev, struct sk_buff *skb,
void *accel_priv, select_queue_fallback_t fallback);
netdev_tx_t mlx4_en_xmit(struct sk_buff *skb, struct net_device *dev);
+netdev_tx_t mlx4_en_xmit_frame(struct mlx4_en_rx_alloc *frame,
+ struct net_device *dev, unsigned int length,
+ int tx_ind, int *doorbell_pending);
+void mlx4_en_xmit_doorbell(struct mlx4_en_tx_ring *ring);
+bool mlx4_en_rx_recycle(struct mlx4_en_rx_ring *ring,
+ struct mlx4_en_rx_alloc *frame);
int mlx4_en_create_tx_ring(struct mlx4_en_priv *priv,
struct mlx4_en_tx_ring **pring,
@@ -672,6 +719,14 @@ int mlx4_en_process_rx_cq(struct net_device *dev,
int budget);
int mlx4_en_poll_rx_cq(struct napi_struct *napi, int budget);
int mlx4_en_poll_tx_cq(struct napi_struct *napi, int budget);
+u32 mlx4_en_free_tx_desc(struct mlx4_en_priv *priv,
+ struct mlx4_en_tx_ring *ring,
+ int index, u8 owner, u64 timestamp,
+ int napi_mode);
+u32 mlx4_en_recycle_tx_desc(struct mlx4_en_priv *priv,
+ struct mlx4_en_tx_ring *ring,
+ int index, u8 owner, u64 timestamp,
+ int napi_mode);
void mlx4_en_fill_qp_context(struct mlx4_en_priv *priv, int size, int stride,
int is_tx, int rss, int qpn, int cqn, int user_prio,
struct mlx4_qp_context *context);
diff --git a/drivers/net/ethernet/mellanox/mlx4/mr.c b/drivers/net/ethernet/mellanox/mlx4/mr.c
index 93195191f..395b5463c 100644
--- a/drivers/net/ethernet/mellanox/mlx4/mr.c
+++ b/drivers/net/ethernet/mellanox/mlx4/mr.c
@@ -248,7 +248,7 @@ static void mlx4_free_mtt_range(struct mlx4_dev *dev, u32 offset, int order)
offset, order);
return;
}
- __mlx4_free_mtt_range(dev, offset, order);
+ __mlx4_free_mtt_range(dev, offset, order);
}
void mlx4_mtt_cleanup(struct mlx4_dev *dev, struct mlx4_mtt *mtt)
diff --git a/drivers/net/ethernet/mellanox/mlx4/pd.c b/drivers/net/ethernet/mellanox/mlx4/pd.c
index b3cc3ab63..6fc156a39 100644
--- a/drivers/net/ethernet/mellanox/mlx4/pd.c
+++ b/drivers/net/ethernet/mellanox/mlx4/pd.c
@@ -205,7 +205,9 @@ int mlx4_bf_alloc(struct mlx4_dev *dev, struct mlx4_bf *bf, int node)
goto free_uar;
}
- uar->bf_map = io_mapping_map_wc(priv->bf_mapping, uar->index << PAGE_SHIFT);
+ uar->bf_map = io_mapping_map_wc(priv->bf_mapping,
+ uar->index << PAGE_SHIFT,
+ PAGE_SIZE);
if (!uar->bf_map) {
err = -ENOMEM;
goto unamp_uar;
diff --git a/drivers/net/ethernet/mellanox/mlx4/port.c b/drivers/net/ethernet/mellanox/mlx4/port.c
index 087b23b32..c5b206429 100644
--- a/drivers/net/ethernet/mellanox/mlx4/port.c
+++ b/drivers/net/ethernet/mellanox/mlx4/port.c
@@ -52,6 +52,7 @@
#define MLX4_FLAG_V_IGNORE_FCS_MASK 0x2
#define MLX4_IGNORE_FCS_MASK 0x1
+#define MLX4_TC_MAX_NUMBER 8
void mlx4_init_mac_table(struct mlx4_dev *dev, struct mlx4_mac_table *table)
{
@@ -2015,3 +2016,14 @@ out:
return ret;
}
EXPORT_SYMBOL(mlx4_get_module_info);
+
+int mlx4_max_tc(struct mlx4_dev *dev)
+{
+ u8 num_tc = dev->caps.max_tc_eth;
+
+ if (!num_tc)
+ num_tc = MLX4_TC_MAX_NUMBER;
+
+ return num_tc;
+}
+EXPORT_SYMBOL(mlx4_max_tc);
diff --git a/drivers/net/ethernet/mellanox/mlx4/resource_tracker.c b/drivers/net/ethernet/mellanox/mlx4/resource_tracker.c
index cd9b2b28d..8b81114bd 100644
--- a/drivers/net/ethernet/mellanox/mlx4/resource_tracker.c
+++ b/drivers/net/ethernet/mellanox/mlx4/resource_tracker.c
@@ -2372,16 +2372,15 @@ static int mpt_free_res(struct mlx4_dev *dev, int slave, int op, int cmd,
__mlx4_mpt_release(dev, index);
break;
case RES_OP_MAP_ICM:
- index = get_param_l(&in_param);
- id = index & mpt_mask(dev);
- err = mr_res_start_move_to(dev, slave, id,
- RES_MPT_RESERVED, &mpt);
- if (err)
- return err;
-
- __mlx4_mpt_free_icm(dev, mpt->key);
- res_end_move(dev, slave, RES_MPT, id);
+ index = get_param_l(&in_param);
+ id = index & mpt_mask(dev);
+ err = mr_res_start_move_to(dev, slave, id,
+ RES_MPT_RESERVED, &mpt);
+ if (err)
return err;
+
+ __mlx4_mpt_free_icm(dev, mpt->key);
+ res_end_move(dev, slave, RES_MPT, id);
break;
default:
err = -EINVAL;
@@ -4253,9 +4252,8 @@ int mlx4_UPDATE_QP_wrapper(struct mlx4_dev *dev, int slave,
(1ULL << MLX4_UPD_QP_PATH_MASK_ETH_SRC_CHECK_MC_LB)) &&
!(dev->caps.flags2 &
MLX4_DEV_CAP_FLAG2_UPDATE_QP_SRC_CHECK_LB)) {
- mlx4_warn(dev,
- "Src check LB for slave %d isn't supported\n",
- slave);
+ mlx4_warn(dev, "Src check LB for slave %d isn't supported\n",
+ slave);
return -ENOTSUPP;
}