From d635711daa98be86d4c7fd01499c34f566b54ccb Mon Sep 17 00:00:00 2001 From: AndrĂ© Fabian Silva Delgado <emulatorman@parabola.nu> Date: Fri, 10 Jun 2016 05:30:17 -0300 Subject: Linux-libre 4.6.2-gnu --- drivers/net/ethernet/intel/Kconfig | 9 + drivers/net/ethernet/intel/e1000/e1000_main.c | 21 +- drivers/net/ethernet/intel/e1000e/defines.h | 5 + drivers/net/ethernet/intel/e1000e/hw.h | 4 + drivers/net/ethernet/intel/e1000e/ich8lan.c | 30 +- drivers/net/ethernet/intel/e1000e/ich8lan.h | 7 + drivers/net/ethernet/intel/e1000e/netdev.c | 4 + drivers/net/ethernet/intel/e1000e/ptp.c | 85 ++ drivers/net/ethernet/intel/e1000e/regs.h | 4 + drivers/net/ethernet/intel/fm10k/fm10k_main.c | 12 +- drivers/net/ethernet/intel/fm10k/fm10k_netdev.c | 11 +- drivers/net/ethernet/intel/fm10k/fm10k_pf.c | 30 +- drivers/net/ethernet/intel/i40e/Makefile | 1 + drivers/net/ethernet/intel/i40e/i40e.h | 67 +- drivers/net/ethernet/intel/i40e/i40e_adminq.c | 20 +- drivers/net/ethernet/intel/i40e/i40e_adminq_cmd.h | 87 +- drivers/net/ethernet/intel/i40e/i40e_client.c | 1012 ++++++++++++++++++++ drivers/net/ethernet/intel/i40e/i40e_client.h | 232 +++++ drivers/net/ethernet/intel/i40e/i40e_common.c | 776 +++++++++++++-- drivers/net/ethernet/intel/i40e/i40e_dcb.c | 27 +- drivers/net/ethernet/intel/i40e/i40e_debugfs.c | 328 +------ drivers/net/ethernet/intel/i40e/i40e_devids.h | 6 +- drivers/net/ethernet/intel/i40e/i40e_ethtool.c | 296 ++++-- drivers/net/ethernet/intel/i40e/i40e_fcoe.c | 30 +- drivers/net/ethernet/intel/i40e/i40e_main.c | 538 +++++++---- drivers/net/ethernet/intel/i40e/i40e_nvm.c | 5 +- drivers/net/ethernet/intel/i40e/i40e_prototype.h | 39 +- drivers/net/ethernet/intel/i40e/i40e_register.h | 48 + drivers/net/ethernet/intel/i40e/i40e_txrx.c | 878 +++++++++-------- drivers/net/ethernet/intel/i40e/i40e_txrx.h | 89 +- drivers/net/ethernet/intel/i40e/i40e_type.h | 23 +- drivers/net/ethernet/intel/i40e/i40e_virtchnl.h | 34 + drivers/net/ethernet/intel/i40e/i40e_virtchnl_pf.c | 283 +++++- drivers/net/ethernet/intel/i40e/i40e_virtchnl_pf.h | 8 +- drivers/net/ethernet/intel/i40evf/i40e_adminq.c | 5 +- .../net/ethernet/intel/i40evf/i40e_adminq_cmd.h | 87 +- drivers/net/ethernet/intel/i40evf/i40e_common.c | 125 +++ drivers/net/ethernet/intel/i40evf/i40e_prototype.h | 15 + drivers/net/ethernet/intel/i40evf/i40e_txrx.c | 826 +++++++++------- drivers/net/ethernet/intel/i40evf/i40e_txrx.h | 80 +- drivers/net/ethernet/intel/i40evf/i40evf.h | 7 +- drivers/net/ethernet/intel/i40evf/i40evf_ethtool.c | 124 ++- drivers/net/ethernet/intel/i40evf/i40evf_main.c | 121 ++- .../net/ethernet/intel/i40evf/i40evf_virtchnl.c | 6 + drivers/net/ethernet/intel/igb/e1000_82575.c | 41 +- drivers/net/ethernet/intel/igb/e1000_82575.h | 4 +- drivers/net/ethernet/intel/igb/e1000_defines.h | 3 +- drivers/net/ethernet/intel/igb/e1000_hw.h | 4 +- drivers/net/ethernet/intel/igb/e1000_mac.c | 213 ++-- drivers/net/ethernet/intel/igb/e1000_mac.h | 5 +- drivers/net/ethernet/intel/igb/e1000_mbx.c | 18 +- drivers/net/ethernet/intel/igb/igb.h | 4 +- drivers/net/ethernet/intel/igb/igb_ethtool.c | 4 +- drivers/net/ethernet/intel/igb/igb_main.c | 959 +++++++++++-------- drivers/net/ethernet/intel/igb/igb_ptp.c | 3 +- drivers/net/ethernet/intel/igbvf/mbx.c | 20 +- drivers/net/ethernet/intel/igbvf/netdev.c | 143 +-- drivers/net/ethernet/intel/igbvf/vf.h | 1 + drivers/net/ethernet/intel/ixgbe/ixgbe.h | 17 +- drivers/net/ethernet/intel/ixgbe/ixgbe_ethtool.c | 10 +- drivers/net/ethernet/intel/ixgbe/ixgbe_main.c | 398 ++++++-- drivers/net/ethernet/intel/ixgbe/ixgbe_model.h | 103 ++ drivers/net/ethernet/intel/ixgbe/ixgbe_x550.c | 2 +- drivers/net/ethernet/intel/ixgbevf/ethtool.c | 4 +- drivers/net/ethernet/intel/ixgbevf/ixgbevf.h | 2 + drivers/net/ethernet/intel/ixgbevf/ixgbevf_main.c | 18 +- drivers/net/ethernet/intel/ixgbevf/vf.c | 4 +- 67 files changed, 6185 insertions(+), 2240 deletions(-) create mode 100644 drivers/net/ethernet/intel/i40e/i40e_client.c create mode 100644 drivers/net/ethernet/intel/i40e/i40e_client.h create mode 100644 drivers/net/ethernet/intel/ixgbe/ixgbe_model.h (limited to 'drivers/net/ethernet/intel') diff --git a/drivers/net/ethernet/intel/Kconfig b/drivers/net/ethernet/intel/Kconfig index fa593dd3e..3772f3ac9 100644 --- a/drivers/net/ethernet/intel/Kconfig +++ b/drivers/net/ethernet/intel/Kconfig @@ -83,6 +83,15 @@ config E1000E To compile this driver as a module, choose M here. The module will be called e1000e. +config E1000E_HWTS + bool "Support HW cross-timestamp on PCH devices" + default y + depends on E1000E && X86 + ---help--- + Say Y to enable hardware supported cross-timestamping on PCH + devices. The cross-timestamp is available through the PTP clock + driver precise cross-timestamp ioctl (PTP_SYS_OFFSET_PRECISE). + config IGB tristate "Intel(R) 82575/82576 PCI-Express Gigabit Ethernet support" depends on PCI diff --git a/drivers/net/ethernet/intel/e1000/e1000_main.c b/drivers/net/ethernet/intel/e1000/e1000_main.c index 3fc7bde69..ae90d4f12 100644 --- a/drivers/net/ethernet/intel/e1000/e1000_main.c +++ b/drivers/net/ethernet/intel/e1000/e1000_main.c @@ -3106,7 +3106,7 @@ static int e1000_maybe_stop_tx(struct net_device *netdev, return __e1000_maybe_stop_tx(netdev, size); } -#define TXD_USE_COUNT(S, X) (((S) >> (X)) + 1) +#define TXD_USE_COUNT(S, X) (((S) + ((1 << (X)) - 1)) >> (X)) static netdev_tx_t e1000_xmit_frame(struct sk_buff *skb, struct net_device *netdev) { @@ -3256,12 +3256,29 @@ static netdev_tx_t e1000_xmit_frame(struct sk_buff *skb, nr_frags, mss); if (count) { + /* The descriptors needed is higher than other Intel drivers + * due to a number of workarounds. The breakdown is below: + * Data descriptors: MAX_SKB_FRAGS + 1 + * Context Descriptor: 1 + * Keep head from touching tail: 2 + * Workarounds: 3 + */ + int desc_needed = MAX_SKB_FRAGS + 7; + netdev_sent_queue(netdev, skb->len); skb_tx_timestamp(skb); e1000_tx_queue(adapter, tx_ring, tx_flags, count); + + /* 82544 potentially requires twice as many data descriptors + * in order to guarantee buffers don't end on evenly-aligned + * dwords + */ + if (adapter->pcix_82544) + desc_needed += MAX_SKB_FRAGS + 1; + /* Make sure there is space in the ring for the next send. */ - e1000_maybe_stop_tx(netdev, tx_ring, MAX_SKB_FRAGS + 2); + e1000_maybe_stop_tx(netdev, tx_ring, desc_needed); if (!skb->xmit_more || netif_xmit_stopped(netdev_get_tx_queue(netdev, 0))) { diff --git a/drivers/net/ethernet/intel/e1000e/defines.h b/drivers/net/ethernet/intel/e1000e/defines.h index f7c7804d7..0641c0098 100644 --- a/drivers/net/ethernet/intel/e1000e/defines.h +++ b/drivers/net/ethernet/intel/e1000e/defines.h @@ -528,6 +528,11 @@ #define E1000_RXCW_C 0x20000000 /* Receive config */ #define E1000_RXCW_SYNCH 0x40000000 /* Receive config synch */ +/* HH Time Sync */ +#define E1000_TSYNCTXCTL_MAX_ALLOWED_DLY_MASK 0x0000F000 /* max delay */ +#define E1000_TSYNCTXCTL_SYNC_COMP 0x40000000 /* sync complete */ +#define E1000_TSYNCTXCTL_START_SYNC 0x80000000 /* initiate sync */ + #define E1000_TSYNCTXCTL_VALID 0x00000001 /* Tx timestamp valid */ #define E1000_TSYNCTXCTL_ENABLED 0x00000010 /* enable Tx timestamping */ diff --git a/drivers/net/ethernet/intel/e1000e/hw.h b/drivers/net/ethernet/intel/e1000e/hw.h index b3949d5be..4e733bf1a 100644 --- a/drivers/net/ethernet/intel/e1000e/hw.h +++ b/drivers/net/ethernet/intel/e1000e/hw.h @@ -92,6 +92,10 @@ struct e1000_hw; #define E1000_DEV_ID_PCH_SPT_I219_LM2 0x15B7 /* SPT-H PCH */ #define E1000_DEV_ID_PCH_SPT_I219_V2 0x15B8 /* SPT-H PCH */ #define E1000_DEV_ID_PCH_LBG_I219_LM3 0x15B9 /* LBG PCH */ +#define E1000_DEV_ID_PCH_SPT_I219_LM4 0x15D7 +#define E1000_DEV_ID_PCH_SPT_I219_V4 0x15D8 +#define E1000_DEV_ID_PCH_SPT_I219_LM5 0x15E3 +#define E1000_DEV_ID_PCH_SPT_I219_V5 0x15D6 #define E1000_REVISION_4 4 diff --git a/drivers/net/ethernet/intel/e1000e/ich8lan.c b/drivers/net/ethernet/intel/e1000e/ich8lan.c index a049e3063..c0f4887ea 100644 --- a/drivers/net/ethernet/intel/e1000e/ich8lan.c +++ b/drivers/net/ethernet/intel/e1000e/ich8lan.c @@ -1252,9 +1252,9 @@ static s32 e1000_disable_ulp_lpt_lp(struct e1000_hw *hw, bool force) ew32(H2ME, mac_reg); } - /* Poll up to 100msec for ME to clear ULP_CFG_DONE */ + /* Poll up to 300msec for ME to clear ULP_CFG_DONE. */ while (er32(FWSM) & E1000_FWSM_ULP_CFG_DONE) { - if (i++ == 10) { + if (i++ == 30) { ret_val = -E1000_ERR_PHY; goto out; } @@ -1328,6 +1328,8 @@ static s32 e1000_disable_ulp_lpt_lp(struct e1000_hw *hw, bool force) I218_ULP_CONFIG1_RESET_TO_SMBUS | I218_ULP_CONFIG1_WOL_HOST | I218_ULP_CONFIG1_INBAND_EXIT | + I218_ULP_CONFIG1_EN_ULP_LANPHYPC | + I218_ULP_CONFIG1_DIS_CLR_STICKY_ON_PERST | I218_ULP_CONFIG1_DISABLE_SMB_PERST); e1000_write_phy_reg_hv_locked(hw, I218_ULP_CONFIG1, phy_reg); @@ -1433,6 +1435,18 @@ static s32 e1000_check_for_copper_link_ich8lan(struct e1000_hw *hw) emi_addr = I217_RX_CONFIG; ret_val = e1000_write_emi_reg_locked(hw, emi_addr, emi_val); + if (hw->mac.type == e1000_pch_lpt || + hw->mac.type == e1000_pch_spt) { + u16 phy_reg; + + e1e_rphy_locked(hw, I217_PLL_CLOCK_GATE_REG, &phy_reg); + phy_reg &= ~I217_PLL_CLOCK_GATE_MASK; + if (speed == SPEED_100 || speed == SPEED_10) + phy_reg |= 0x3E8; + else + phy_reg |= 0xFA; + e1e_wphy_locked(hw, I217_PLL_CLOCK_GATE_REG, phy_reg); + } hw->phy.ops.release(hw); if (ret_val) @@ -1467,6 +1481,18 @@ static s32 e1000_check_for_copper_link_ich8lan(struct e1000_hw *hw) hw->phy.ops.release(hw); if (ret_val) return ret_val; + } else { + ret_val = hw->phy.ops.acquire(hw); + if (ret_val) + return ret_val; + + ret_val = e1e_wphy_locked(hw, + PHY_REG(776, 20), + 0xC023); + hw->phy.ops.release(hw); + if (ret_val) + return ret_val; + } } } diff --git a/drivers/net/ethernet/intel/e1000e/ich8lan.h b/drivers/net/ethernet/intel/e1000e/ich8lan.h index 34c551e32..2311f6003 100644 --- a/drivers/net/ethernet/intel/e1000e/ich8lan.h +++ b/drivers/net/ethernet/intel/e1000e/ich8lan.h @@ -188,6 +188,10 @@ #define I218_ULP_CONFIG1_INBAND_EXIT 0x0020 /* Inband on ULP exit */ #define I218_ULP_CONFIG1_WOL_HOST 0x0040 /* WoL Host on ULP exit */ #define I218_ULP_CONFIG1_RESET_TO_SMBUS 0x0100 /* Reset to SMBus mode */ +/* enable ULP even if when phy powered down via lanphypc */ +#define I218_ULP_CONFIG1_EN_ULP_LANPHYPC 0x0400 +/* disable clear of sticky ULP on PERST */ +#define I218_ULP_CONFIG1_DIS_CLR_STICKY_ON_PERST 0x0800 #define I218_ULP_CONFIG1_DISABLE_SMB_PERST 0x1000 /* Disable on PERST# */ /* SMBus Address Phy Register */ @@ -226,6 +230,9 @@ #define HV_PM_CTRL_PLL_STOP_IN_K1_GIGA 0x100 #define HV_PM_CTRL_K1_ENABLE 0x4000 +#define I217_PLL_CLOCK_GATE_REG PHY_REG(772, 28) +#define I217_PLL_CLOCK_GATE_MASK 0x07FF + #define SW_FLAG_TIMEOUT 1000 /* SW Semaphore flag timeout in ms */ /* Inband Control */ diff --git a/drivers/net/ethernet/intel/e1000e/netdev.c b/drivers/net/ethernet/intel/e1000e/netdev.c index c71ba1bfc..9b4ec13d9 100644 --- a/drivers/net/ethernet/intel/e1000e/netdev.c +++ b/drivers/net/ethernet/intel/e1000e/netdev.c @@ -7452,6 +7452,10 @@ static const struct pci_device_id e1000_pci_tbl[] = { { PCI_VDEVICE(INTEL, E1000_DEV_ID_PCH_SPT_I219_LM2), board_pch_spt }, { PCI_VDEVICE(INTEL, E1000_DEV_ID_PCH_SPT_I219_V2), board_pch_spt }, { PCI_VDEVICE(INTEL, E1000_DEV_ID_PCH_LBG_I219_LM3), board_pch_spt }, + { PCI_VDEVICE(INTEL, E1000_DEV_ID_PCH_SPT_I219_LM4), board_pch_spt }, + { PCI_VDEVICE(INTEL, E1000_DEV_ID_PCH_SPT_I219_V4), board_pch_spt }, + { PCI_VDEVICE(INTEL, E1000_DEV_ID_PCH_SPT_I219_LM5), board_pch_spt }, + { PCI_VDEVICE(INTEL, E1000_DEV_ID_PCH_SPT_I219_V5), board_pch_spt }, { 0, 0, 0, 0, 0, 0, 0 } /* terminate list */ }; diff --git a/drivers/net/ethernet/intel/e1000e/ptp.c b/drivers/net/ethernet/intel/e1000e/ptp.c index 25a0ad510..e2ff3ef75 100644 --- a/drivers/net/ethernet/intel/e1000e/ptp.c +++ b/drivers/net/ethernet/intel/e1000e/ptp.c @@ -26,6 +26,12 @@ #include "e1000.h" +#ifdef CONFIG_E1000E_HWTS +#include <linux/clocksource.h> +#include <linux/ktime.h> +#include <asm/tsc.h> +#endif + /** * e1000e_phc_adjfreq - adjust the frequency of the hardware clock * @ptp: ptp clock structure @@ -98,6 +104,78 @@ static int e1000e_phc_adjtime(struct ptp_clock_info *ptp, s64 delta) return 0; } +#ifdef CONFIG_E1000E_HWTS +#define MAX_HW_WAIT_COUNT (3) + +/** + * e1000e_phc_get_syncdevicetime - Callback given to timekeeping code reads system/device registers + * @device: current device time + * @system: system counter value read synchronously with device time + * @ctx: context provided by timekeeping code + * + * Read device and system (ART) clock simultaneously and return the corrected + * clock values in ns. + **/ +static int e1000e_phc_get_syncdevicetime(ktime_t *device, + struct system_counterval_t *system, + void *ctx) +{ + struct e1000_adapter *adapter = (struct e1000_adapter *)ctx; + struct e1000_hw *hw = &adapter->hw; + unsigned long flags; + int i; + u32 tsync_ctrl; + cycle_t dev_cycles; + cycle_t sys_cycles; + + tsync_ctrl = er32(TSYNCTXCTL); + tsync_ctrl |= E1000_TSYNCTXCTL_START_SYNC | + E1000_TSYNCTXCTL_MAX_ALLOWED_DLY_MASK; + ew32(TSYNCTXCTL, tsync_ctrl); + for (i = 0; i < MAX_HW_WAIT_COUNT; ++i) { + udelay(1); + tsync_ctrl = er32(TSYNCTXCTL); + if (tsync_ctrl & E1000_TSYNCTXCTL_SYNC_COMP) + break; + } + + if (i == MAX_HW_WAIT_COUNT) + return -ETIMEDOUT; + + dev_cycles = er32(SYSSTMPH); + dev_cycles <<= 32; + dev_cycles |= er32(SYSSTMPL); + spin_lock_irqsave(&adapter->systim_lock, flags); + *device = ns_to_ktime(timecounter_cyc2time(&adapter->tc, dev_cycles)); + spin_unlock_irqrestore(&adapter->systim_lock, flags); + + sys_cycles = er32(PLTSTMPH); + sys_cycles <<= 32; + sys_cycles |= er32(PLTSTMPL); + *system = convert_art_to_tsc(sys_cycles); + + return 0; +} + +/** + * e1000e_phc_getsynctime - Reads the current system/device cross timestamp + * @ptp: ptp clock structure + * @cts: structure containing timestamp + * + * Read device and system (ART) clock simultaneously and return the scaled + * clock values in ns. + **/ +static int e1000e_phc_getcrosststamp(struct ptp_clock_info *ptp, + struct system_device_crosststamp *xtstamp) +{ + struct e1000_adapter *adapter = container_of(ptp, struct e1000_adapter, + ptp_clock_info); + + return get_device_system_crosststamp(e1000e_phc_get_syncdevicetime, + adapter, NULL, xtstamp); +} +#endif/*CONFIG_E1000E_HWTS*/ + /** * e1000e_phc_gettime - Reads the current time from the hardware clock * @ptp: ptp clock structure @@ -236,6 +314,13 @@ void e1000e_ptp_init(struct e1000_adapter *adapter) break; } +#ifdef CONFIG_E1000E_HWTS + /* CPU must have ART and GBe must be from Sunrise Point or greater */ + if (hw->mac.type >= e1000_pch_spt && boot_cpu_has(X86_FEATURE_ART)) + adapter->ptp_clock_info.getcrosststamp = + e1000e_phc_getcrosststamp; +#endif/*CONFIG_E1000E_HWTS*/ + INIT_DELAYED_WORK(&adapter->systim_overflow_work, e1000e_systim_overflow_work); diff --git a/drivers/net/ethernet/intel/e1000e/regs.h b/drivers/net/ethernet/intel/e1000e/regs.h index 1d5e0b770..0cb4d365e 100644 --- a/drivers/net/ethernet/intel/e1000e/regs.h +++ b/drivers/net/ethernet/intel/e1000e/regs.h @@ -245,6 +245,10 @@ #define E1000_SYSTIML 0x0B600 /* System time register Low - RO */ #define E1000_SYSTIMH 0x0B604 /* System time register High - RO */ #define E1000_TIMINCA 0x0B608 /* Increment attributes register - RW */ +#define E1000_SYSSTMPL 0x0B648 /* HH Timesync system stamp low register */ +#define E1000_SYSSTMPH 0x0B64C /* HH Timesync system stamp hi register */ +#define E1000_PLTSTMPL 0x0B640 /* HH Timesync platform stamp low register */ +#define E1000_PLTSTMPH 0x0B644 /* HH Timesync platform stamp hi register */ #define E1000_RXMTRL 0x0B634 /* Time sync Rx EtherType and Msg Type - RW */ #define E1000_RXUDP 0x0B638 /* Time Sync Rx UDP Port - RW */ diff --git a/drivers/net/ethernet/intel/fm10k/fm10k_main.c b/drivers/net/ethernet/intel/fm10k/fm10k_main.c index b243c3cbe..4de17db38 100644 --- a/drivers/net/ethernet/intel/fm10k/fm10k_main.c +++ b/drivers/net/ethernet/intel/fm10k/fm10k_main.c @@ -243,7 +243,7 @@ static bool fm10k_can_reuse_rx_page(struct fm10k_rx_buffer *rx_buffer, /* Even if we own the page, we are not allowed to use atomic_set() * This would break get_page_unless_zero() users. */ - atomic_inc(&page->_count); + page_ref_inc(page); return true; } @@ -1937,8 +1937,10 @@ static void fm10k_init_reta(struct fm10k_intfc *interface) u16 i, rss_i = interface->ring_feature[RING_F_RSS].indices; u32 reta, base; - /* If the netdev is initialized we have to maintain table if possible */ - if (interface->netdev->reg_state != NETREG_UNINITIALIZED) { + /* If the Rx flow indirection table has been configured manually, we + * need to maintain it when possible. + */ + if (netif_is_rxfh_configured(interface->netdev)) { for (i = FM10K_RETA_SIZE; i--;) { reta = interface->reta[i]; if ((((reta << 24) >> 24) < rss_i) && @@ -1946,6 +1948,10 @@ static void fm10k_init_reta(struct fm10k_intfc *interface) (((reta << 8) >> 24) < rss_i) && (((reta) >> 24) < rss_i)) continue; + + /* this should never happen */ + dev_err(&interface->pdev->dev, + "RSS indirection table assigned flows out of queue bounds. Reconfiguring.\n"); goto repopulate_reta; } diff --git a/drivers/net/ethernet/intel/fm10k/fm10k_netdev.c b/drivers/net/ethernet/intel/fm10k/fm10k_netdev.c index 662569d5b..d09a8dd71 100644 --- a/drivers/net/ethernet/intel/fm10k/fm10k_netdev.c +++ b/drivers/net/ethernet/intel/fm10k/fm10k_netdev.c @@ -1204,6 +1204,15 @@ err_queueing_scheme: return err; } +static int __fm10k_setup_tc(struct net_device *dev, u32 handle, __be16 proto, + struct tc_to_netdev *tc) +{ + if (tc->type != TC_SETUP_MQPRIO) + return -EINVAL; + + return fm10k_setup_tc(dev, tc->tc); +} + static int fm10k_ioctl(struct net_device *netdev, struct ifreq *ifr, int cmd) { switch (cmd) { @@ -1386,7 +1395,7 @@ static const struct net_device_ops fm10k_netdev_ops = { .ndo_vlan_rx_kill_vid = fm10k_vlan_rx_kill_vid, .ndo_set_rx_mode = fm10k_set_rx_mode, .ndo_get_stats64 = fm10k_get_stats64, - .ndo_setup_tc = fm10k_setup_tc, + .ndo_setup_tc = __fm10k_setup_tc, .ndo_set_vf_mac = fm10k_ndo_set_vf_mac, .ndo_set_vf_vlan = fm10k_ndo_set_vf_vlan, .ndo_set_vf_rate = fm10k_ndo_set_vf_bw, diff --git a/drivers/net/ethernet/intel/fm10k/fm10k_pf.c b/drivers/net/ethernet/intel/fm10k/fm10k_pf.c index 62ccebc5f..8cf943db5 100644 --- a/drivers/net/ethernet/intel/fm10k/fm10k_pf.c +++ b/drivers/net/ethernet/intel/fm10k/fm10k_pf.c @@ -1223,18 +1223,32 @@ s32 fm10k_iov_msg_mac_vlan_pf(struct fm10k_hw *hw, u32 **results, if (err) return err; - /* verify upper 16 bits are zero */ - if (vid >> 16) - return FM10K_ERR_PARAM; - set = !(vid & FM10K_VLAN_CLEAR); vid &= ~FM10K_VLAN_CLEAR; - err = fm10k_iov_select_vid(vf_info, (u16)vid); - if (err < 0) - return err; + /* if the length field has been set, this is a multi-bit + * update request. For multi-bit requests, simply disallow + * them when the pf_vid has been set. In this case, the PF + * should have already cleared the VLAN_TABLE, and if we + * allowed them, it could allow a rogue VF to receive traffic + * on a VLAN it was not assigned. In the single-bit case, we + * need to modify requests for VLAN 0 to use the default PF or + * SW vid when assigned. + */ - vid = err; + if (vid >> 16) { + /* prevent multi-bit requests when PF has + * administratively set the VLAN for this VF + */ + if (vf_info->pf_vid) + return FM10K_ERR_PARAM; + } else { + err = fm10k_iov_select_vid(vf_info, (u16)vid); + if (err < 0) + return err; + + vid = err; + } /* update VSI info for VF in regards to VLAN table */ err = hw->mac.ops.update_vlan(hw, vid, vf_info->vsi, set); diff --git a/drivers/net/ethernet/intel/i40e/Makefile b/drivers/net/ethernet/intel/i40e/Makefile index b4729ba57..3b3c63e54 100644 --- a/drivers/net/ethernet/intel/i40e/Makefile +++ b/drivers/net/ethernet/intel/i40e/Makefile @@ -41,6 +41,7 @@ i40e-objs := i40e_main.o \ i40e_diag.o \ i40e_txrx.o \ i40e_ptp.o \ + i40e_client.o \ i40e_virtchnl_pf.o i40e-$(CONFIG_I40E_DCB) += i40e_dcb.o i40e_dcb_nl.o diff --git a/drivers/net/ethernet/intel/i40e/i40e.h b/drivers/net/ethernet/intel/i40e/i40e.h index 68f2204ec..1ce6e9c04 100644 --- a/drivers/net/ethernet/intel/i40e/i40e.h +++ b/drivers/net/ethernet/intel/i40e/i40e.h @@ -1,7 +1,7 @@ /******************************************************************************* * * Intel Ethernet Controller XL710 Family Linux Driver - * Copyright(c) 2013 - 2015 Intel Corporation. + * Copyright(c) 2013 - 2016 Intel Corporation. * * This program is free software; you can redistribute it and/or modify it * under the terms and conditions of the GNU General Public License, @@ -58,15 +58,13 @@ #ifdef I40E_FCOE #include "i40e_fcoe.h" #endif +#include "i40e_client.h" #include "i40e_virtchnl.h" #include "i40e_virtchnl_pf.h" #include "i40e_txrx.h" #include "i40e_dcb.h" /* Useful i40e defaults */ -#define I40E_BASE_PF_SEID 16 -#define I40E_BASE_VSI_SEID 512 -#define I40E_BASE_VEB_SEID 288 #define I40E_MAX_VEB 16 #define I40E_MAX_NUM_DESCRIPTORS 4096 @@ -104,6 +102,7 @@ #define I40E_PRIV_FLAGS_FD_ATR BIT(2) #define I40E_PRIV_FLAGS_VEB_STATS BIT(3) #define I40E_PRIV_FLAGS_PS BIT(4) +#define I40E_PRIV_FLAGS_HW_ATR_EVICT BIT(5) #define I40E_NVM_VERSION_LO_SHIFT 0 #define I40E_NVM_VERSION_LO_MASK (0xff << I40E_NVM_VERSION_LO_SHIFT) @@ -113,6 +112,7 @@ #define I40E_OEM_VER_PATCH_MASK 0xff #define I40E_OEM_VER_BUILD_SHIFT 8 #define I40E_OEM_VER_SHIFT 24 +#define I40E_PHY_DEBUG_PORT BIT(4) /* The values in here are decimal coded as hex as is the case in the NVM map*/ #define I40E_CURRENT_NVM_VERSION_HI 0x2 @@ -137,6 +137,19 @@ /* default to trying for four seconds */ #define I40E_TRY_LINK_TIMEOUT (4 * HZ) +/** + * i40e_is_mac_710 - Return true if MAC is X710/XL710 + * @hw: ptr to the hardware info + **/ +static inline bool i40e_is_mac_710(struct i40e_hw *hw) +{ + if ((hw->mac.type == I40E_MAC_X710) || + (hw->mac.type == I40E_MAC_XL710)) + return true; + + return false; +} + /* driver state flags */ enum i40e_state_t { __I40E_TESTING, @@ -178,6 +191,7 @@ struct i40e_lump_tracking { u16 search_hint; u16 list[0]; #define I40E_PILE_VALID_BIT 0x8000 +#define I40E_IWARP_IRQ_PILE_ID (I40E_PILE_VALID_BIT - 2) }; #define I40E_DEFAULT_ATR_SAMPLE_RATE 20 @@ -270,6 +284,8 @@ struct i40e_pf { #endif /* I40E_FCOE */ u16 num_lan_qps; /* num lan queues this PF has set up */ u16 num_lan_msix; /* num queue vectors for the base PF vsi */ + u16 num_iwarp_msix; /* num of iwarp vectors for this PF */ + int iwarp_base_vector; int queues_left; /* queues left unclaimed */ u16 alloc_rss_size; /* allocated RSS queues */ u16 rss_size_max; /* HW defined max RSS queues */ @@ -317,6 +333,7 @@ struct i40e_pf { #define I40E_FLAG_16BYTE_RX_DESC_ENABLED BIT_ULL(13) #define I40E_FLAG_CLEAN_ADMINQ BIT_ULL(14) #define I40E_FLAG_FILTER_SYNC BIT_ULL(15) +#define I40E_FLAG_SERVICE_CLIENT_REQUESTED BIT_ULL(16) #define I40E_FLAG_PROCESS_MDD_EVENT BIT_ULL(17) #define I40E_FLAG_PROCESS_VFLR_EVENT BIT_ULL(18) #define I40E_FLAG_SRIOV_ENABLED BIT_ULL(19) @@ -339,6 +356,12 @@ struct i40e_pf { #define I40E_FLAG_VEB_MODE_ENABLED BIT_ULL(40) #define I40E_FLAG_GENEVE_OFFLOAD_CAPABLE BIT_ULL(41) #define I40E_FLAG_NO_PCI_LINK_CHECK BIT_ULL(42) +#define I40E_FLAG_100M_SGMII_CAPABLE BIT_ULL(43) +#define I40E_FLAG_RESTART_AUTONEG BIT_ULL(44) +#define I40E_FLAG_NO_DCB_SUPPORT BIT_ULL(45) +#define I40E_FLAG_USE_SET_LLDP_MIB BIT_ULL(46) +#define I40E_FLAG_STOP_FW_LLDP BIT_ULL(47) +#define I40E_FLAG_HAVE_10GBASET_PHY BIT_ULL(48) #define I40E_FLAG_PF_MAC BIT_ULL(50) /* tracks features that get auto disabled by errors */ @@ -391,6 +414,7 @@ struct i40e_pf { struct i40e_vf *vf; int num_alloc_vfs; /* actual number of VFs allocated */ u32 vf_aq_requests; + u32 arq_overflows; /* Not fatal, possibly indicative of problems */ /* DCBx/DCBNL capability for PF that indicates * whether DCBx is managed by firmware or host @@ -423,6 +447,7 @@ struct i40e_pf { u32 ioremap_len; u32 fd_inv; + u16 phy_led_val; }; struct i40e_mac_filter { @@ -492,6 +517,7 @@ struct i40e_vsi { u32 tx_busy; u64 tx_linearize; u64 tx_force_wb; + u64 tx_lost_interrupt; u32 rx_buf_failed; u32 rx_page_failed; @@ -500,13 +526,6 @@ struct i40e_vsi { struct i40e_ring **tx_rings; u16 work_limit; - /* high bit set means dynamic, use accessor routines to read/write. - * hardware only supports 2us resolution for the ITR registers. - * these values always store the USER setting, and must be converted - * before programming to a register. - */ - u16 rx_itr_setting; - u16 tx_itr_setting; u16 int_rate_limit; /* value in usecs */ u16 rss_table_size; /* HW RSS table size */ @@ -557,6 +576,8 @@ struct i40e_vsi { struct kobject *kobj; /* sysfs object */ bool current_isup; /* Sync 'link up' logging */ + void *priv; /* client driver data reference. */ + /* VSI specific handlers */ irqreturn_t (*irq_handler)(int irq, void *data); @@ -714,6 +735,10 @@ void i40e_vsi_setup_queue_map(struct i40e_vsi *vsi, struct i40e_vsi_context *ctxt, u8 enabled_tc, bool is_add); #endif +void i40e_service_event_schedule(struct i40e_pf *pf); +void i40e_notify_client_of_vf_msg(struct i40e_vsi *vsi, u32 vf_id, + u8 *msg, u16 len); + int i40e_vsi_control_rings(struct i40e_vsi *vsi, bool enable); int i40e_reconfig_rss_queues(struct i40e_pf *pf, int queue_count); struct i40e_veb *i40e_veb_setup(struct i40e_pf *pf, u16 flags, u16 uplink_seid, @@ -736,6 +761,17 @@ static inline void i40e_dbg_pf_exit(struct i40e_pf *pf) {} static inline void i40e_dbg_init(void) {} static inline void i40e_dbg_exit(void) {} #endif /* CONFIG_DEBUG_FS*/ +/* needed by client drivers */ +int i40e_lan_add_device(struct i40e_pf *pf); +int i40e_lan_del_device(struct i40e_pf *pf); +void i40e_client_subtask(struct i40e_pf *pf); +void i40e_notify_client_of_l2_param_changes(struct i40e_vsi *vsi); +void i40e_notify_client_of_netdev_open(struct i40e_vsi *vsi); +void i40e_notify_client_of_netdev_close(struct i40e_vsi *vsi, bool reset); +void i40e_notify_client_of_vf_enable(struct i40e_pf *pf, u32 num_vfs); +void i40e_notify_client_of_vf_reset(struct i40e_pf *pf, u32 vf_id); +int i40e_vf_client_capable(struct i40e_pf *pf, u32 vf_id, + enum i40e_client_type type); /** * i40e_irq_dynamic_enable - Enable default interrupt generation settings * @vsi: pointer to a vsi @@ -747,6 +783,9 @@ static inline void i40e_irq_dynamic_enable(struct i40e_vsi *vsi, int vector) struct i40e_hw *hw = &pf->hw; u32 val; + /* definitely clear the PBA here, as this function is meant to + * clean out all previous interrupts AND enable the interrupt + */ val = I40E_PFINT_DYN_CTLN_INTENA_MASK | I40E_PFINT_DYN_CTLN_CLEARPBA_MASK | (I40E_ITR_NONE << I40E_PFINT_DYN_CTLN_ITR_INDX_SHIFT); @@ -754,9 +793,8 @@ static inline void i40e_irq_dynamic_enable(struct i40e_vsi *vsi, int vector) /* skip the flush */ } -void i40e_irq_dynamic_disable(struct i40e_vsi *vsi, int vector); void i40e_irq_dynamic_disable_icr0(struct i40e_pf *pf); -void i40e_irq_dynamic_enable_icr0(struct i40e_pf *pf); +void i40e_irq_dynamic_enable_icr0(struct i40e_pf *pf, bool clearpba); #ifdef I40E_FCOE struct rtnl_link_stats64 *i40e_get_netdev_stats_struct( struct net_device *netdev, @@ -786,7 +824,8 @@ struct i40e_mac_filter *i40e_find_mac(struct i40e_vsi *vsi, u8 *macaddr, bool is_vf, bool is_netdev); #ifdef I40E_FCOE int i40e_close(struct net_device *netdev); -int i40e_setup_tc(struct net_device *netdev, u8 tc); +int __i40e_setup_tc(struct net_device *netdev, u32 handle, __be16 proto, + struct tc_to_netdev *tc); void i40e_netpoll(struct net_device *netdev); int i40e_fcoe_enable(struct net_device *netdev); int i40e_fcoe_disable(struct net_device *netdev); diff --git a/drivers/net/ethernet/intel/i40e/i40e_adminq.c b/drivers/net/ethernet/intel/i40e/i40e_adminq.c index 1fd5ea82a..df8e2fd6a 100644 --- a/drivers/net/ethernet/intel/i40e/i40e_adminq.c +++ b/drivers/net/ethernet/intel/i40e/i40e_adminq.c @@ -1,7 +1,7 @@ /******************************************************************************* * * Intel Ethernet Controller XL710 Family Linux Driver - * Copyright(c) 2013 - 2014 Intel Corporation. + * Copyright(c) 2013 - 2016 Intel Corporation. * * This program is free software; you can redistribute it and/or modify it * under the terms and conditions of the GNU General Public License, @@ -953,6 +953,9 @@ i40e_status i40e_clean_arq_element(struct i40e_hw *hw, u16 flags; u16 ntu; + /* pre-clean the event info */ + memset(&e->desc, 0, sizeof(e->desc)); + /* take the lock before we start messing with the ring */ mutex_lock(&hw->aq.arq_mutex); @@ -1020,14 +1023,6 @@ i40e_status i40e_clean_arq_element(struct i40e_hw *hw, hw->aq.arq.next_to_clean = ntc; hw->aq.arq.next_to_use = ntu; -clean_arq_element_out: - /* Set pending if needed, unlock and return */ - if (pending != NULL) - *pending = (ntc > ntu ? hw->aq.arq.count : 0) + (ntu - ntc); - -clean_arq_element_err: - mutex_unlock(&hw->aq.arq_mutex); - if (i40e_is_nvm_update_op(&e->desc)) { if (hw->aq.nvm_release_on_done) { i40e_release_nvm(hw); @@ -1048,6 +1043,13 @@ clean_arq_element_err: } } +clean_arq_element_out: + /* Set pending if needed, unlock and return */ + if (pending) + *pending = (ntc > ntu ? hw->aq.arq.count : 0) + (ntu - ntc); +clean_arq_element_err: + mutex_unlock(&hw->aq.arq_mutex); + return ret_code; } diff --git a/drivers/net/ethernet/intel/i40e/i40e_adminq_cmd.h b/drivers/net/ethernet/intel/i40e/i40e_adminq_cmd.h index b22012a44..8d5c65ab6 100644 --- a/drivers/net/ethernet/intel/i40e/i40e_adminq_cmd.h +++ b/drivers/net/ethernet/intel/i40e/i40e_adminq_cmd.h @@ -1,7 +1,7 @@ /******************************************************************************* * * Intel Ethernet Controller XL710 Family Linux Driver - * Copyright(c) 2013 - 2014 Intel Corporation. + * Copyright(c) 2013 - 2016 Intel Corporation. * * This program is free software; you can redistribute it and/or modify it * under the terms and conditions of the GNU General Public License, @@ -34,7 +34,7 @@ */ #define I40E_FW_API_VERSION_MAJOR 0x0001 -#define I40E_FW_API_VERSION_MINOR 0x0004 +#define I40E_FW_API_VERSION_MINOR 0x0005 struct i40e_aq_desc { __le16 flags; @@ -145,6 +145,9 @@ enum i40e_admin_queue_opc { i40e_aqc_opc_remove_statistics = 0x0202, i40e_aqc_opc_set_port_parameters = 0x0203, i40e_aqc_opc_get_switch_resource_alloc = 0x0204, + i40e_aqc_opc_set_switch_config = 0x0205, + i40e_aqc_opc_rx_ctl_reg_read = 0x0206, + i40e_aqc_opc_rx_ctl_reg_write = 0x0207, i40e_aqc_opc_add_vsi = 0x0210, i40e_aqc_opc_update_vsi_parameters = 0x0211, @@ -220,6 +223,7 @@ enum i40e_admin_queue_opc { i40e_aqc_opc_get_phy_wol_caps = 0x0621, i40e_aqc_opc_set_phy_debug = 0x0622, i40e_aqc_opc_upload_ext_phy_fm = 0x0625, + i40e_aqc_opc_run_phy_activity = 0x0626, /* NVM commands */ i40e_aqc_opc_nvm_read = 0x0701, @@ -228,6 +232,7 @@ enum i40e_admin_queue_opc { i40e_aqc_opc_nvm_config_read = 0x0704, i40e_aqc_opc_nvm_config_write = 0x0705, i40e_aqc_opc_oem_post_update = 0x0720, + i40e_aqc_opc_thermal_sensor = 0x0721, /* virtualization commands */ i40e_aqc_opc_send_msg_to_pf = 0x0801, @@ -402,6 +407,7 @@ struct i40e_aqc_list_capabilities_element_resp { #define I40E_AQ_CAP_ID_OS2BMC_CAP 0x0004 #define I40E_AQ_CAP_ID_FUNCTIONS_VALID 0x0005 #define I40E_AQ_CAP_ID_ALTERNATE_RAM 0x0006 +#define I40E_AQ_CAP_ID_WOL_AND_PROXY 0x0008 #define I40E_AQ_CAP_ID_SRIOV 0x0012 #define I40E_AQ_CAP_ID_VF 0x0013 #define I40E_AQ_CAP_ID_VMDQ 0x0014 @@ -422,6 +428,7 @@ struct i40e_aqc_list_capabilities_element_resp { #define I40E_AQ_CAP_ID_LED 0x0061 #define I40E_AQ_CAP_ID_SDP 0x0062 #define I40E_AQ_CAP_ID_MDIO 0x0063 +#define I40E_AQ_CAP_ID_WSR_PROT 0x0064 #define I40E_AQ_CAP_ID_FLEX10 0x00F1 #define I40E_AQ_CAP_ID_CEM 0x00F2 @@ -680,6 +687,31 @@ struct i40e_aqc_switch_resource_alloc_element_resp { I40E_CHECK_STRUCT_LEN(0x10, i40e_aqc_switch_resource_alloc_element_resp); +/* Set Switch Configuration (direct 0x0205) */ +struct i40e_aqc_set_switch_config { + __le16 flags; +#define I40E_AQ_SET_SWITCH_CFG_PROMISC 0x0001 +#define I40E_AQ_SET_SWITCH_CFG_L2_FILTER 0x0002 + __le16 valid_flags; + u8 reserved[12]; +}; + +I40E_CHECK_CMD_LENGTH(i40e_aqc_set_switch_config); + +/* Read Receive control registers (direct 0x0206) + * Write Receive control registers (direct 0x0207) + * used for accessing Rx control registers that can be + * slow and need special handling when under high Rx load + */ +struct i40e_aqc_rx_ctl_reg_read_write { + __le32 reserved1; + __le32 address; + __le32 reserved2; + __le32 value; +}; + +I40E_CHECK_CMD_LENGTH(i40e_aqc_rx_ctl_reg_read_write); + /* Add VSI (indirect 0x0210) * this indirect command uses struct i40e_aqc_vsi_properties_data * as the indirect buffer (128 bytes) @@ -906,7 +938,8 @@ struct i40e_aqc_add_veb { I40E_AQC_ADD_VEB_PORT_TYPE_SHIFT) #define I40E_AQC_ADD_VEB_PORT_TYPE_DEFAULT 0x2 #define I40E_AQC_ADD_VEB_PORT_TYPE_DATA 0x4 -#define I40E_AQC_ADD_VEB_ENABLE_L2_FILTER 0x8 +#define I40E_AQC_ADD_VEB_ENABLE_L2_FILTER 0x8 /* deprecated */ +#define I40E_AQC_ADD_VEB_ENABLE_DISABLE_STATS 0x10 u8 enable_tcs; u8 reserved[9]; }; @@ -973,6 +1006,7 @@ struct i40e_aqc_add_macvlan_element_data { #define I40E_AQC_MACVLAN_ADD_HASH_MATCH 0x0002 #define I40E_AQC_MACVLAN_ADD_IGNORE_VLAN 0x0004 #define I40E_AQC_MACVLAN_ADD_TO_QUEUE 0x0008 +#define I40E_AQC_MACVLAN_ADD_USE_SHARED_MAC 0x0010 __le16 queue_number; #define I40E_AQC_MACVLAN_CMD_QUEUE_SHIFT 0 #define I40E_AQC_MACVLAN_CMD_QUEUE_MASK (0x7FF << \ @@ -1069,6 +1103,7 @@ struct i40e_aqc_set_vsi_promiscuous_modes { #define I40E_AQC_SET_VSI_PROMISC_BROADCAST 0x04 #define I40E_AQC_SET_VSI_DEFAULT 0x08 #define I40E_AQC_SET_VSI_PROMISC_VLAN 0x10 +#define I40E_AQC_SET_VSI_PROMISC_TX 0x8000 __le16 seid; #define I40E_AQC_VSI_PROM_CMD_SEID_MASK 0x3FF __le16 vlan_tag; @@ -1257,10 +1292,16 @@ struct i40e_aqc_add_remove_cloud_filters_element_data { #define I40E_AQC_ADD_CLOUD_TNL_TYPE_SHIFT 9 #define I40E_AQC_ADD_CLOUD_TNL_TYPE_MASK 0x1E00 -#define I40E_AQC_ADD_CLOUD_TNL_TYPE_XVLAN 0 +#define I40E_AQC_ADD_CLOUD_TNL_TYPE_VXLAN 0 #define I40E_AQC_ADD_CLOUD_TNL_TYPE_NVGRE_OMAC 1 -#define I40E_AQC_ADD_CLOUD_TNL_TYPE_NGE 2 +#define I40E_AQC_ADD_CLOUD_TNL_TYPE_GENEVE 2 #define I40E_AQC_ADD_CLOUD_TNL_TYPE_IP 3 +#define I40E_AQC_ADD_CLOUD_TNL_TYPE_RESERVED 4 +#define I40E_AQC_ADD_CLOUD_TNL_TYPE_VXLAN_GPE 5 + +#define I40E_AQC_ADD_CLOUD_FLAGS_SHARED_OUTER_MAC 0x2000 +#define I40E_AQC_ADD_CLOUD_FLAGS_SHARED_INNER_MAC 0x4000 +#define I40E_AQC_ADD_CLOUD_FLAGS_SHARED_OUTER_IP 0x8000 __le32 tenant_id; u8 reserved[4]; @@ -1755,7 +1796,12 @@ struct i40e_aqc_get_link_status { u8 config; #define I40E_AQ_CONFIG_CRC_ENA 0x04 #define I40E_AQ_CONFIG_PACING_MASK 0x78 - u8 reserved[5]; + u8 external_power_ability; +#define I40E_AQ_LINK_POWER_CLASS_1 0x00 +#define I40E_AQ_LINK_POWER_CLASS_2 0x01 +#define I40E_AQ_LINK_POWER_CLASS_3 0x02 +#define I40E_AQ_LINK_POWER_CLASS_4 0x03 + u8 reserved[4]; }; I40E_CHECK_CMD_LENGTH(i40e_aqc_get_link_status); @@ -1823,6 +1869,18 @@ enum i40e_aq_phy_reg_type { I40E_AQC_PHY_REG_EXERNAL_MODULE = 0x3 }; +/* Run PHY Activity (0x0626) */ +struct i40e_aqc_run_phy_activity { + __le16 activity_id; + u8 flags; + u8 reserved1; + __le32 control; + __le32 data; + u8 reserved2[4]; +}; + +I40E_CHECK_CMD_LENGTH(i40e_aqc_run_phy_activity); + /* NVM Read command (indirect 0x0701) * NVM Erase commands (direct 0x0702) * NVM Update commands (indirect 0x0703) @@ -1912,6 +1970,22 @@ struct i40e_aqc_nvm_oem_post_update_buffer { I40E_CHECK_STRUCT_LEN(0x28, i40e_aqc_nvm_oem_post_update_buffer); +/* Thermal Sensor (indirect 0x0721) + * read or set thermal sensor configs and values + * takes a sensor and command specific data buffer, not detailed here + */ +struct i40e_aqc_thermal_sensor { + u8 sensor_action; +#define I40E_AQ_THERMAL_SENSOR_READ_CONFIG 0 +#define I40E_AQ_THERMAL_SENSOR_SET_CONFIG 1 +#define I40E_AQ_THERMAL_SENSOR_READ_TEMP 2 + u8 reserved[7]; + __le32 addr_high; + __le32 addr_low; +}; + +I40E_CHECK_CMD_LENGTH(i40e_aqc_thermal_sensor); + /* Send to PF command (indirect 0x0801) id is only used by PF * Send to VF command (indirect 0x0802) id is only used by PF * Send to Peer PF command (indirect 0x0803) @@ -2191,6 +2265,7 @@ struct i40e_aqc_add_udp_tunnel { #define I40E_AQC_TUNNEL_TYPE_VXLAN 0x00 #define I40E_AQC_TUNNEL_TYPE_NGE 0x01 #define I40E_AQC_TUNNEL_TYPE_TEREDO 0x10 +#define I40E_AQC_TUNNEL_TYPE_VXLAN_GPE 0x11 u8 reserved1[10]; }; diff --git a/drivers/net/ethernet/intel/i40e/i40e_client.c b/drivers/net/ethernet/intel/i40e/i40e_client.c new file mode 100644 index 000000000..0e6ac8413 --- /dev/null +++ b/drivers/net/ethernet/intel/i40e/i40e_client.c @@ -0,0 +1,1012 @@ +/******************************************************************************* + * + * Intel Ethernet Controller XL710 Family Linux Driver + * Copyright(c) 2013 - 2015 Intel Corporation. + * + * This program is free software; you can redistribute it and/or modify it + * under the terms and conditions of the GNU General Public License, + * version 2, as published by the Free Software Foundation. + * + * This program is distributed in the hope it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for + * more details. + * + * You should have received a copy of the GNU General Public License along + * with this program. If not, see <http://www.gnu.org/licenses/>. + * + * The full GNU General Public License is included in this distribution in + * the file called "COPYING". + * + * Contact Information: + * e1000-devel Mailing List <e1000-devel@lists.sourceforge.net> + * Intel Corporation, 5200 N.E. Elam Young Parkway, Hillsboro, OR 97124-6497 + * + ******************************************************************************/ + +#include <linux/list.h> +#include <linux/errno.h> + +#include "i40e.h" +#include "i40e_prototype.h" +#include "i40e_client.h" + +static const char i40e_client_interface_version_str[] = I40E_CLIENT_VERSION_STR; + +static LIST_HEAD(i40e_devices); +static DEFINE_MUTEX(i40e_device_mutex); + +static LIST_HEAD(i40e_clients); +static DEFINE_MUTEX(i40e_client_mutex); + +static LIST_HEAD(i40e_client_instances); +static DEFINE_MUTEX(i40e_client_instance_mutex); + +static int i40e_client_virtchnl_send(struct i40e_info *ldev, + struct i40e_client *client, + u32 vf_id, u8 *msg, u16 len); + +static int i40e_client_setup_qvlist(struct i40e_info *ldev, + struct i40e_client *client, + struct i40e_qvlist_info *qvlist_info); + +static void i40e_client_request_reset(struct i40e_info *ldev, + struct i40e_client *client, + u32 reset_level); + +static int i40e_client_update_vsi_ctxt(struct i40e_info *ldev, + struct i40e_client *client, + bool is_vf, u32 vf_id, + u32 flag, u32 valid_flag); + +static struct i40e_ops i40e_lan_ops = { + .virtchnl_send = i40e_client_virtchnl_send, + .setup_qvlist = i40e_client_setup_qvlist, + .request_reset = i40e_client_request_reset, + .update_vsi_ctxt = i40e_client_update_vsi_ctxt, +}; + +/** + * i40e_client_type_to_vsi_type - convert client type to vsi type + * @client_type: the i40e_client type + * + * returns the related vsi type value + **/ +static +enum i40e_vsi_type i40e_client_type_to_vsi_type(enum i40e_client_type type) +{ + switch (type) { + case I40E_CLIENT_IWARP: + return I40E_VSI_IWARP; + + case I40E_CLIENT_VMDQ2: + return I40E_VSI_VMDQ2; + + default: + pr_err("i40e: Client type unknown\n"); + return I40E_VSI_TYPE_UNKNOWN; + } +} + +/** + * i40e_client_get_params - Get the params that can change at runtime + * @vsi: the VSI with the message + * @param: clinet param struct + * + **/ +static +int i40e_client_get_params(struct i40e_vsi *vsi, struct i40e_params *params) +{ + struct i40e_dcbx_config *dcb_cfg = &vsi->back->hw.local_dcbx_config; + int i = 0; + + for (i = 0; i < I40E_MAX_USER_PRIORITY; i++) { + u8 tc = dcb_cfg->etscfg.prioritytable[i]; + u16 qs_handle; + + /* If TC is not enabled for VSI use TC0 for UP */ + if (!(vsi->tc_config.enabled_tc & BIT(tc))) + tc = 0; + + qs_handle = le16_to_cpu(vsi->info.qs_handle[tc]); + params->qos.prio_qos[i].tc = tc; + params->qos.prio_qos[i].qs_handle = qs_handle; + if (qs_handle == I40E_AQ_VSI_QS_HANDLE_INVALID) { + dev_err(&vsi->back->pdev->dev, "Invalid queue set handle for TC = %d, vsi id = %d\n", + tc, vsi->id); + return -EINVAL; + } + } + + params->mtu = vsi->netdev->mtu; + return 0; +} + +/** + * i40e_notify_client_of_vf_msg - call the client vf message callback + * @vsi: the VSI with the message + * @vf_id: the absolute VF id that sent the message + * @msg: message buffer + * @len: length of the message + * + * If there is a client to this VSI, call the client + **/ +void +i40e_notify_client_of_vf_msg(struct i40e_vsi *vsi, u32 vf_id, u8 *msg, u16 len) +{ + struct i40e_client_instance *cdev; + + if (!vsi) + return; + mutex_lock(&i40e_client_instance_mutex); + list_for_each_entry(cdev, &i40e_client_instances, list) { + if (cdev->lan_info.pf == vsi->back) { + if (!cdev->client || + !cdev->client->ops || + !cdev->client->ops->virtchnl_receive) { + dev_dbg(&vsi->back->pdev->dev, + "Cannot locate client instance virtual channel receive routine\n"); + continue; + } + cdev->client->ops->virtchnl_receive(&cdev->lan_info, + cdev->client, + vf_id, msg, len); + } + } + mutex_unlock(&i40e_client_instance_mutex); +} + +/** + * i40e_notify_client_of_l2_param_changes - call the client notify callback + * @vsi: the VSI with l2 param changes + * + * If there is a client to this VSI, call the client + **/ +void i40e_notify_client_of_l2_param_changes(struct i40e_vsi *vsi) +{ + struct i40e_client_instance *cdev; + struct i40e_params params; + + if (!vsi) + return; + memset(¶ms, 0, sizeof(params)); + i40e_client_get_params(vsi, ¶ms); + mutex_lock(&i40e_client_instance_mutex); + list_for_each_entry(cdev, &i40e_client_instances, list) { + if (cdev->lan_info.pf == vsi->back) { + if (!cdev->client || + !cdev->client->ops || + !cdev->client->ops->l2_param_change) { + dev_dbg(&vsi->back->pdev->dev, + "Cannot locate client instance l2_param_change routine\n"); + continue; + } + cdev->lan_info.params = params; + cdev->client->ops->l2_param_change(&cdev->lan_info, + cdev->client, + ¶ms); + } + } + mutex_unlock(&i40e_client_instance_mutex); +} + +/** + * i40e_notify_client_of_netdev_open - call the client open callback + * @vsi: the VSI with netdev opened + * + * If there is a client to this netdev, call the client with open + **/ +void i40e_notify_client_of_netdev_open(struct i40e_vsi *vsi) +{ + struct i40e_client_instance *cdev; + + if (!vsi) + return; + mutex_lock(&i40e_client_instance_mutex); + list_for_each_entry(cdev, &i40e_client_instances, list) { + if (cdev->lan_info.netdev == vsi->netdev) { + if (!cdev->client || + !cdev->client->ops || !cdev->client->ops->open) { + dev_dbg(&vsi->back->pdev->dev, + "Cannot locate client instance open routine\n"); + continue; + } + cdev->client->ops->open(&cdev->lan_info, cdev->client); + } + } + mutex_unlock(&i40e_client_instance_mutex); +} + +/** + * i40e_client_release_qvlist + * @ldev: pointer to L2 context. + * + **/ +static void i40e_client_release_qvlist(struct i40e_info *ldev) +{ + struct i40e_qvlist_info *qvlist_info = ldev->qvlist_info; + u32 i; + + if (!ldev->qvlist_info) + return; + + for (i = 0; i < qvlist_info->num_vectors; i++) { + struct i40e_pf *pf = ldev->pf; + struct i40e_qv_info *qv_info; + u32 reg_idx; + + qv_info = &qvlist_info->qv_info[i]; + if (!qv_info) + continue; + reg_idx = I40E_PFINT_LNKLSTN(qv_info->v_idx - 1); + wr32(&pf->hw, reg_idx, I40E_PFINT_LNKLSTN_FIRSTQ_INDX_MASK); + } + kfree(ldev->qvlist_info); + ldev->qvlist_info = NULL; +} + +/** + * i40e_notify_client_of_netdev_close - call the client close callback + * @vsi: the VSI with netdev closed + * @reset: true when close called due to a reset pending + * + * If there is a client to this netdev, call the client with close + **/ +void i40e_notify_client_of_netdev_close(struct i40e_vsi *vsi, bool reset) +{ + struct i40e_client_instance *cdev; + + if (!vsi) + return; + mutex_lock(&i40e_client_instance_mutex); + list_for_each_entry(cdev, &i40e_client_instances, list) { + if (cdev->lan_info.netdev == vsi->netdev) { + if (!cdev->client || + !cdev->client->ops || !cdev->client->ops->close) { + dev_dbg(&vsi->back->pdev->dev, + "Cannot locate client instance close routine\n"); + continue; + } + cdev->client->ops->close(&cdev->lan_info, cdev->client, + reset); + i40e_client_release_qvlist(&cdev->lan_info); + } + } + mutex_unlock(&i40e_client_instance_mutex); +} + +/** + * i40e_notify_client_of_vf_reset - call the client vf reset callback + * @pf: PF device pointer + * @vf_id: asolute id of VF being reset + * + * If there is a client attached to this PF, notify when a VF is reset + **/ +void i40e_notify_client_of_vf_reset(struct i40e_pf *pf, u32 vf_id) +{ + struct i40e_client_instance *cdev; + + if (!pf) + return; + mutex_lock(&i40e_client_instance_mutex); + list_for_each_entry(cdev, &i40e_client_instances, list) { + if (cdev->lan_info.pf == pf) { + if (!cdev->client || + !cdev->client->ops || + !cdev->client->ops->vf_reset) { + dev_dbg(&pf->pdev->dev, + "Cannot locate client instance VF reset routine\n"); + continue; + } + cdev->client->ops->vf_reset(&cdev->lan_info, + cdev->client, vf_id); + } + } + mutex_unlock(&i40e_client_instance_mutex); +} + +/** + * i40e_notify_client_of_vf_enable - call the client vf notification callback + * @pf: PF device pointer + * @num_vfs: the number of VFs currently enabled, 0 for disable + * + * If there is a client attached to this PF, call its VF notification routine + **/ +void i40e_notify_client_of_vf_enable(struct i40e_pf *pf, u32 num_vfs) +{ + struct i40e_client_instance *cdev; + + if (!pf) + return; + mutex_lock(&i40e_client_instance_mutex); + list_for_each_entry(cdev, &i40e_client_instances, list) { + if (cdev->lan_info.pf == pf) { + if (!cdev->client || + !cdev->client->ops || + !cdev->client->ops->vf_enable) { + dev_dbg(&pf->pdev->dev, + "Cannot locate client instance VF enable routine\n"); + continue; + } + cdev->client->ops->vf_enable(&cdev->lan_info, + cdev->client, num_vfs); + } + } + mutex_unlock(&i40e_client_instance_mutex); +} + +/** + * i40e_vf_client_capable - ask the client if it likes the specified VF + * @pf: PF device pointer + * @vf_id: the VF in question + * + * If there is a client of the specified type attached to this PF, call + * its vf_capable routine + **/ +int i40e_vf_client_capable(struct i40e_pf *pf, u32 vf_id, + enum i40e_client_type type) +{ + struct i40e_client_instance *cdev; + int capable = false; + + if (!pf) + return false; + mutex_lock(&i40e_client_instance_mutex); + list_for_each_entry(cdev, &i40e_client_instances, list) { + if (cdev->lan_info.pf == pf) { + if (!cdev->client || + !cdev->client->ops || + !cdev->client->ops->vf_capable || + !(cdev->client->type == type)) { + dev_dbg(&pf->pdev->dev, + "Cannot locate client instance VF capability routine\n"); + continue; + } + capable = cdev->client->ops->vf_capable(&cdev->lan_info, + cdev->client, + vf_id); + break; + } + } + mutex_unlock(&i40e_client_instance_mutex); + return capable; +} + +/** + * i40e_vsi_lookup - finds a matching VSI from the PF list starting at start_vsi + * @pf: board private structure + * @type: vsi type + * @start_vsi: a VSI pointer from where to start the search + * + * Returns non NULL on success or NULL for failure + **/ +struct i40e_vsi *i40e_vsi_lookup(struct i40e_pf *pf, + enum i40e_vsi_type type, + struct i40e_vsi *start_vsi) +{ + struct i40e_vsi *vsi; + int i = 0; + + if (start_vsi) { + for (i = 0; i < pf->num_alloc_vsi; i++) { + vsi = pf->vsi[i]; + if (vsi == start_vsi) + break; + } + } + for (; i < pf->num_alloc_vsi; i++) { + vsi = pf->vsi[i]; + if (vsi && vsi->type == type) + return vsi; + } + + return NULL; +} + +/** + * i40e_client_add_instance - add a client instance struct to the instance list + * @pf: pointer to the board struct + * @client: pointer to a client struct in the client list. + * + * Returns cdev ptr on success, NULL on failure + **/ +static +struct i40e_client_instance *i40e_client_add_instance(struct i40e_pf *pf, + struct i40e_client *client) +{ + struct i40e_client_instance *cdev; + struct netdev_hw_addr *mac = NULL; + struct i40e_vsi *vsi = pf->vsi[pf->lan_vsi]; + + mutex_lock(&i40e_client_instance_mutex); + list_for_each_entry(cdev, &i40e_client_instances, list) { + if ((cdev->lan_info.pf == pf) && (cdev->client == client)) { + cdev = NULL; + goto out; + } + } + cdev = kzalloc(sizeof(*cdev), GFP_KERNEL); + if (!cdev) + goto out; + + cdev->lan_info.pf = (void *)pf; + cdev->lan_info.netdev = vsi->netdev; + cdev->lan_info.pcidev = pf->pdev; + cdev->lan_info.fid = pf->hw.pf_id; + cdev->lan_info.ftype = I40E_CLIENT_FTYPE_PF; + cdev->lan_info.hw_addr = pf->hw.hw_addr; + cdev->lan_info.ops = &i40e_lan_ops; + cdev->lan_info.version.major = I40E_CLIENT_VERSION_MAJOR; + cdev->lan_info.version.minor = I40E_CLIENT_VERSION_MINOR; + cdev->lan_info.version.build = I40E_CLIENT_VERSION_BUILD; + cdev->lan_info.fw_maj_ver = pf->hw.aq.fw_maj_ver; + cdev->lan_info.fw_min_ver = pf->hw.aq.fw_min_ver; + cdev->lan_info.fw_build = pf->hw.aq.fw_build; + set_bit(__I40E_CLIENT_INSTANCE_NONE, &cdev->state); + + if (i40e_client_get_params(vsi, &cdev->lan_info.params)) { + kfree(cdev); + cdev = NULL; + goto out; + } + + cdev->lan_info.msix_count = pf->num_iwarp_msix; + cdev->lan_info.msix_entries = &pf->msix_entries[pf->iwarp_base_vector]; + + mac = list_first_entry(&cdev->lan_info.netdev->dev_addrs.list, + struct netdev_hw_addr, list); + if (mac) + ether_addr_copy(cdev->lan_info.lanmac, mac->addr); + else + dev_err(&pf->pdev->dev, "MAC address list is empty!\n"); + + cdev->client = client; + INIT_LIST_HEAD(&cdev->list); + list_add(&cdev->list, &i40e_client_instances); +out: + mutex_unlock(&i40e_client_instance_mutex); + return cdev; +} + +/** + * i40e_client_del_instance - removes a client instance from the list + * @pf: pointer to the board struct + * + * Returns 0 on success or non-0 on error + **/ +static +int i40e_client_del_instance(struct i40e_pf *pf, struct i40e_client *client) +{ + struct i40e_client_instance *cdev, *tmp; + int ret = -ENODEV; + + mutex_lock(&i40e_client_instance_mutex); + list_for_each_entry_safe(cdev, tmp, &i40e_client_instances, list) { + if ((cdev->lan_info.pf != pf) || (cdev->client != client)) + continue; + + dev_info(&pf->pdev->dev, "Deleted instance of Client %s, of dev %d bus=0x%02x func=0x%02x)\n", + client->name, pf->hw.pf_id, + pf->hw.bus.device, pf->hw.bus.func); + list_del(&cdev->list); + kfree(cdev); + ret = 0; + break; + } + mutex_unlock(&i40e_client_instance_mutex); + return ret; +} + +/** + * i40e_client_subtask - client maintenance work + * @pf: board private structure + **/ +void i40e_client_subtask(struct i40e_pf *pf) +{ + struct i40e_client_instance *cdev; + struct i40e_client *client; + int ret = 0; + + if (!(pf->flags & I40E_FLAG_SERVICE_CLIENT_REQUESTED)) + return; + pf->flags &= ~I40E_FLAG_SERVICE_CLIENT_REQUESTED; + + /* If we're down or resetting, just bail */ + if (test_bit(__I40E_DOWN, &pf->state) || + test_bit(__I40E_CONFIG_BUSY, &pf->state)) + return; + + /* Check client state and instantiate client if client registered */ + mutex_lock(&i40e_client_mutex); + list_for_each_entry(client, &i40e_clients, list) { + /* first check client is registered */ + if (!test_bit(__I40E_CLIENT_REGISTERED, &client->state)) + continue; + + /* Do we also need the LAN VSI to be up, to create instance */ + if (!(client->flags & I40E_CLIENT_FLAGS_LAUNCH_ON_PROBE)) { + /* check if L2 VSI is up, if not we are not ready */ + if (test_bit(__I40E_DOWN, &pf->vsi[pf->lan_vsi]->state)) + continue; + } + + /* Add the client instance to the instance list */ + cdev = i40e_client_add_instance(pf, client); + if (!cdev) + continue; + + /* Also up the ref_cnt of no. of instances of this client */ + atomic_inc(&client->ref_cnt); + dev_info(&pf->pdev->dev, "Added instance of Client %s to PF%d bus=0x%02x func=0x%02x\n", + client->name, pf->hw.pf_id, + pf->hw.bus.device, pf->hw.bus.func); + + /* Send an Open request to the client */ + atomic_inc(&cdev->ref_cnt); + if (client->ops && client->ops->open) + ret = client->ops->open(&cdev->lan_info, client); + atomic_dec(&cdev->ref_cnt); + if (!ret) { + set_bit(__I40E_CLIENT_INSTANCE_OPENED, &cdev->state); + } else { + /* remove client instance */ + i40e_client_del_instance(pf, client); + atomic_dec(&client->ref_cnt); + continue; + } + } + mutex_unlock(&i40e_client_mutex); +} + +/** + * i40e_lan_add_device - add a lan device struct to the list of lan devices + * @pf: pointer to the board struct + * + * Returns 0 on success or none 0 on error + **/ +int i40e_lan_add_device(struct i40e_pf *pf) +{ + struct i40e_device *ldev; + int ret = 0; + + mutex_lock(&i40e_device_mutex); + list_for_each_entry(ldev, &i40e_devices, list) { + if (ldev->pf == pf) { + ret = -EEXIST; + goto out; + } + } + ldev = kzalloc(sizeof(*ldev), GFP_KERNEL); + if (!ldev) { + ret = -ENOMEM; + goto out; + } + ldev->pf = pf; + INIT_LIST_HEAD(&ldev->list); + list_add(&ldev->list, &i40e_devices); + dev_info(&pf->pdev->dev, "Added LAN device PF%d bus=0x%02x func=0x%02x\n", + pf->hw.pf_id, pf->hw.bus.device, pf->hw.bus.func); + + /* Since in some cases register may have happened before a device gets + * added, we can schedule a subtask to go initiate the clients. + */ + pf->flags |= I40E_FLAG_SERVICE_CLIENT_REQUESTED; + i40e_service_event_schedule(pf); + +out: + mutex_unlock(&i40e_device_mutex); + return ret; +} + +/** + * i40e_lan_del_device - removes a lan device from the device list + * @pf: pointer to the board struct + * + * Returns 0 on success or non-0 on error + **/ +int i40e_lan_del_device(struct i40e_pf *pf) +{ + struct i40e_device *ldev, *tmp; + int ret = -ENODEV; + + mutex_lock(&i40e_device_mutex); + list_for_each_entry_safe(ldev, tmp, &i40e_devices, list) { + if (ldev->pf == pf) { + dev_info(&pf->pdev->dev, "Deleted LAN device PF%d bus=0x%02x func=0x%02x\n", + pf->hw.pf_id, pf->hw.bus.device, + pf->hw.bus.func); + list_del(&ldev->list); + kfree(ldev); + ret = 0; + break; + } + } + + mutex_unlock(&i40e_device_mutex); + return ret; +} + +/** + * i40e_client_release - release client specific resources + * @client: pointer to the registered client + * + * Return 0 on success or < 0 on error + **/ +static int i40e_client_release(struct i40e_client *client) +{ + struct i40e_client_instance *cdev, *tmp; + struct i40e_pf *pf = NULL; + int ret = 0; + + LIST_HEAD(cdevs_tmp); + + mutex_lock(&i40e_client_instance_mutex); + list_for_each_entry_safe(cdev, tmp, &i40e_client_instances, list) { + if (strncmp(cdev->client->name, client->name, + I40E_CLIENT_STR_LENGTH)) + continue; + if (test_bit(__I40E_CLIENT_INSTANCE_OPENED, &cdev->state)) { + if (atomic_read(&cdev->ref_cnt) > 0) { + ret = I40E_ERR_NOT_READY; + goto out; + } + pf = (struct i40e_pf *)cdev->lan_info.pf; + if (client->ops && client->ops->close) + client->ops->close(&cdev->lan_info, client, + false); + i40e_client_release_qvlist(&cdev->lan_info); + clear_bit(__I40E_CLIENT_INSTANCE_OPENED, &cdev->state); + + dev_warn(&pf->pdev->dev, + "Client %s instance for PF id %d closed\n", + client->name, pf->hw.pf_id); + } + /* delete the client instance from the list */ + list_del(&cdev->list); + list_add(&cdev->list, &cdevs_tmp); + atomic_dec(&client->ref_cnt); + dev_info(&pf->pdev->dev, "Deleted client instance of Client %s\n", + client->name); + } +out: + mutex_unlock(&i40e_client_instance_mutex); + + /* free the client device and release its vsi */ + list_for_each_entry_safe(cdev, tmp, &cdevs_tmp, list) { + kfree(cdev); + } + return ret; +} + +/** + * i40e_client_prepare - prepare client specific resources + * @client: pointer to the registered client + * + * Return 0 on success or < 0 on error + **/ +static int i40e_client_prepare(struct i40e_client *client) +{ + struct i40e_device *ldev; + struct i40e_pf *pf; + int ret = 0; + + mutex_lock(&i40e_device_mutex); + list_for_each_entry(ldev, &i40e_devices, list) { + pf = ldev->pf; + /* Start the client subtask */ + pf->flags |= I40E_FLAG_SERVICE_CLIENT_REQUESTED; + i40e_service_event_schedule(pf); + } + mutex_unlock(&i40e_device_mutex); + return ret; +} + +/** + * i40e_client_virtchnl_send - TBD + * @ldev: pointer to L2 context + * @client: Client pointer + * @vf_id: absolute VF identifier + * @msg: message buffer + * @len: length of message buffer + * + * Return 0 on success or < 0 on error + **/ +static int i40e_client_virtchnl_send(struct i40e_info *ldev, + struct i40e_client *client, + u32 vf_id, u8 *msg, u16 len) +{ + struct i40e_pf *pf = ldev->pf; + struct i40e_hw *hw = &pf->hw; + i40e_status err; + + err = i40e_aq_send_msg_to_vf(hw, vf_id, I40E_VIRTCHNL_OP_IWARP, + 0, msg, len, NULL); + if (err) + dev_err(&pf->pdev->dev, "Unable to send iWarp message to VF, error %d, aq status %d\n", + err, hw->aq.asq_last_status); + + return err; +} + +/** + * i40e_client_setup_qvlist + * @ldev: pointer to L2 context. + * @client: Client pointer. + * @qv_info: queue and vector list + * + * Return 0 on success or < 0 on error + **/ +static int i40e_client_setup_qvlist(struct i40e_info *ldev, + struct i40e_client *client, + struct i40e_qvlist_info *qvlist_info) +{ + struct i40e_pf *pf = ldev->pf; + struct i40e_hw *hw = &pf->hw; + struct i40e_qv_info *qv_info; + u32 v_idx, i, reg_idx, reg; + u32 size; + + size = sizeof(struct i40e_qvlist_info) + + (sizeof(struct i40e_qv_info) * (qvlist_info->num_vectors - 1)); + ldev->qvlist_info = kzalloc(size, GFP_KERNEL); + ldev->qvlist_info->num_vectors = qvlist_info->num_vectors; + + for (i = 0; i < qvlist_info->num_vectors; i++) { + qv_info = &qvlist_info->qv_info[i]; + if (!qv_info) + continue; + v_idx = qv_info->v_idx; + + /* Validate vector id belongs to this client */ + if ((v_idx >= (pf->iwarp_base_vector + pf->num_iwarp_msix)) || + (v_idx < pf->iwarp_base_vector)) + goto err; + + ldev->qvlist_info->qv_info[i] = *qv_info; + reg_idx = I40E_PFINT_LNKLSTN(v_idx - 1); + + if (qv_info->ceq_idx == I40E_QUEUE_INVALID_IDX) { + /* Special case - No CEQ mapped on this vector */ + wr32(hw, reg_idx, I40E_PFINT_LNKLSTN_FIRSTQ_INDX_MASK); + } else { + reg = (qv_info->ceq_idx & + I40E_PFINT_LNKLSTN_FIRSTQ_INDX_MASK) | + (I40E_QUEUE_TYPE_PE_CEQ << + I40E_PFINT_LNKLSTN_FIRSTQ_TYPE_SHIFT); + wr32(hw, reg_idx, reg); + + reg = (I40E_PFINT_CEQCTL_CAUSE_ENA_MASK | + (v_idx << I40E_PFINT_CEQCTL_MSIX_INDX_SHIFT) | + (qv_info->itr_idx << + I40E_PFINT_CEQCTL_ITR_INDX_SHIFT) | + (I40E_QUEUE_END_OF_LIST << + I40E_PFINT_CEQCTL_NEXTQ_INDX_SHIFT)); + wr32(hw, I40E_PFINT_CEQCTL(qv_info->ceq_idx), reg); + } + if (qv_info->aeq_idx != I40E_QUEUE_INVALID_IDX) { + reg = (I40E_PFINT_AEQCTL_CAUSE_ENA_MASK | + (v_idx << I40E_PFINT_AEQCTL_MSIX_INDX_SHIFT) | + (qv_info->itr_idx << + I40E_PFINT_AEQCTL_ITR_INDX_SHIFT)); + + wr32(hw, I40E_PFINT_AEQCTL, reg); + } + } + + return 0; +err: + kfree(ldev->qvlist_info); + ldev->qvlist_info = NULL; + return -EINVAL; +} + +/** + * i40e_client_request_reset + * @ldev: pointer to L2 context. + * @client: Client pointer. + * @level: reset level + **/ +static void i40e_client_request_reset(struct i40e_info *ldev, + struct i40e_client *client, + u32 reset_level) +{ + struct i40e_pf *pf = ldev->pf; + + switch (reset_level) { + case I40E_CLIENT_RESET_LEVEL_PF: + set_bit(__I40E_PF_RESET_REQUESTED, &pf->state); + break; + case I40E_CLIENT_RESET_LEVEL_CORE: + set_bit(__I40E_PF_RESET_REQUESTED, &pf->state); + break; + default: + dev_warn(&pf->pdev->dev, + "Client %s instance for PF id %d request an unsupported reset: %d.\n", + client->name, pf->hw.pf_id, reset_level); + break; + } + + i40e_service_event_schedule(pf); +} + +/** + * i40e_client_update_vsi_ctxt + * @ldev: pointer to L2 context. + * @client: Client pointer. + * @is_vf: if this for the VF + * @vf_id: if is_vf true this carries the vf_id + * @flag: Any device level setting that needs to be done for PE + * @valid_flag: Bits in this match up and enable changing of flag bits + * + * Return 0 on success or < 0 on error + **/ +static int i40e_client_update_vsi_ctxt(struct i40e_info *ldev, + struct i40e_client *client, + bool is_vf, u32 vf_id, + u32 flag, u32 valid_flag) +{ + struct i40e_pf *pf = ldev->pf; + struct i40e_vsi *vsi = pf->vsi[pf->lan_vsi]; + struct i40e_vsi_context ctxt; + bool update = true; + i40e_status err; + + /* TODO: for now do not allow setting VF's VSI setting */ + if (is_vf) + return -EINVAL; + + ctxt.seid = pf->main_vsi_seid; + ctxt.pf_num = pf->hw.pf_id; + err = i40e_aq_get_vsi_params(&pf->hw, &ctxt, NULL); + ctxt.flags = I40E_AQ_VSI_TYPE_PF; + if (err) { + dev_info(&pf->pdev->dev, + "couldn't get PF vsi config, err %s aq_err %s\n", + i40e_stat_str(&pf->hw, err), + i40e_aq_str(&pf->hw, + pf->hw.aq.asq_last_status)); + return -ENOENT; + } + + if ((valid_flag & I40E_CLIENT_VSI_FLAG_TCP_PACKET_ENABLE) && + (flag & I40E_CLIENT_VSI_FLAG_TCP_PACKET_ENABLE)) { + ctxt.info.valid_sections = + cpu_to_le16(I40E_AQ_VSI_PROP_QUEUE_OPT_VALID); + ctxt.info.queueing_opt_flags |= I40E_AQ_VSI_QUE_OPT_TCP_ENA; + } else if ((valid_flag & I40E_CLIENT_VSI_FLAG_TCP_PACKET_ENABLE) && + !(flag & I40E_CLIENT_VSI_FLAG_TCP_PACKET_ENABLE)) { + ctxt.info.valid_sections = + cpu_to_le16(I40E_AQ_VSI_PROP_QUEUE_OPT_VALID); + ctxt.info.queueing_opt_flags &= ~I40E_AQ_VSI_QUE_OPT_TCP_ENA; + } else { + update = false; + dev_warn(&pf->pdev->dev, + "Client %s instance for PF id %d request an unsupported Config: %x.\n", + client->name, pf->hw.pf_id, flag); + } + + if (update) { + err = i40e_aq_update_vsi_params(&vsi->back->hw, &ctxt, NULL); + if (err) { + dev_info(&pf->pdev->dev, + "update VSI ctxt for PE failed, err %s aq_err %s\n", + i40e_stat_str(&pf->hw, err), + i40e_aq_str(&pf->hw, + pf->hw.aq.asq_last_status)); + } + } + return err; +} + +/** + * i40e_register_client - Register a i40e client driver with the L2 driver + * @client: pointer to the i40e_client struct + * + * Returns 0 on success or non-0 on error + **/ +int i40e_register_client(struct i40e_client *client) +{ + int ret = 0; + enum i40e_vsi_type vsi_type; + + if (!client) { + ret = -EIO; + goto out; + } + + if (strlen(client->name) == 0) { + pr_info("i40e: Failed to register client with no name\n"); + ret = -EIO; + goto out; + } + + mutex_lock(&i40e_client_mutex); + if (i40e_client_is_registered(client)) { + pr_info("i40e: Client %s has already been registered!\n", + client->name); + mutex_unlock(&i40e_client_mutex); + ret = -EEXIST; + goto out; + } + + if ((client->version.major != I40E_CLIENT_VERSION_MAJOR) || + (client->version.minor != I40E_CLIENT_VERSION_MINOR)) { + pr_info("i40e: Failed to register client %s due to mismatched client interface version\n", + client->name); + pr_info("Client is using version: %02d.%02d.%02d while LAN driver supports %s\n", + client->version.major, client->version.minor, + client->version.build, + i40e_client_interface_version_str); + mutex_unlock(&i40e_client_mutex); + ret = -EIO; + goto out; + } + + vsi_type = i40e_client_type_to_vsi_type(client->type); + if (vsi_type == I40E_VSI_TYPE_UNKNOWN) { + pr_info("i40e: Failed to register client %s due to unknown client type %d\n", + client->name, client->type); + mutex_unlock(&i40e_client_mutex); + ret = -EIO; + goto out; + } + list_add(&client->list, &i40e_clients); + set_bit(__I40E_CLIENT_REGISTERED, &client->state); + mutex_unlock(&i40e_client_mutex); + + if (i40e_client_prepare(client)) { + ret = -EIO; + goto out; + } + + pr_info("i40e: Registered client %s with return code %d\n", + client->name, ret); +out: + return ret; +} +EXPORT_SYMBOL(i40e_register_client); + +/** + * i40e_unregister_client - Unregister a i40e client driver with the L2 driver + * @client: pointer to the i40e_client struct + * + * Returns 0 on success or non-0 on error + **/ +int i40e_unregister_client(struct i40e_client *client) +{ + int ret = 0; + + /* When a unregister request comes through we would have to send + * a close for each of the client instances that were opened. + * client_release function is called to handle this. + */ + if (!client || i40e_client_release(client)) { + ret = -EIO; + goto out; + } + + /* TODO: check if device is in reset, or if that matters? */ + mutex_lock(&i40e_client_mutex); + if (!i40e_client_is_registered(client)) { + pr_info("i40e: Client %s has not been registered\n", + client->name); + mutex_unlock(&i40e_client_mutex); + ret = -ENODEV; + goto out; + } + if (atomic_read(&client->ref_cnt) == 0) { + clear_bit(__I40E_CLIENT_REGISTERED, &client->state); + list_del(&client->list); + pr_info("i40e: Unregistered client %s with return code %d\n", + client->name, ret); + } else { + ret = I40E_ERR_NOT_READY; + pr_err("i40e: Client %s failed unregister - client has open instances\n", + client->name); + } + + mutex_unlock(&i40e_client_mutex); +out: + return ret; +} +EXPORT_SYMBOL(i40e_unregister_client); diff --git a/drivers/net/ethernet/intel/i40e/i40e_client.h b/drivers/net/ethernet/intel/i40e/i40e_client.h new file mode 100644 index 000000000..bf6b453d9 --- /dev/null +++ b/drivers/net/ethernet/intel/i40e/i40e_client.h @@ -0,0 +1,232 @@ +/******************************************************************************* + * + * Intel Ethernet Controller XL710 Family Linux Driver + * Copyright(c) 2013 - 2015 Intel Corporation. + * + * This program is free software; you can redistribute it and/or modify it + * under the terms and conditions of the GNU General Public License, + * version 2, as published by the Free Software Foundation. + * + * This program is distributed in the hope it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for + * more details. + * + * You should have received a copy of the GNU General Public License along + * with this program. If not, see <http://www.gnu.org/licenses/>. + * + * The full GNU General Public License is included in this distribution in + * the file called "COPYING". + * + * Contact Information: + * e1000-devel Mailing List <e1000-devel@lists.sourceforge.net> + * Intel Corporation, 5200 N.E. Elam Young Parkway, Hillsboro, OR 97124-6497 + * + ******************************************************************************/ + +#ifndef _I40E_CLIENT_H_ +#define _I40E_CLIENT_H_ + +#define I40E_CLIENT_STR_LENGTH 10 + +/* Client interface version should be updated anytime there is a change in the + * existing APIs or data structures. + */ +#define I40E_CLIENT_VERSION_MAJOR 0 +#define I40E_CLIENT_VERSION_MINOR 01 +#define I40E_CLIENT_VERSION_BUILD 00 +#define I40E_CLIENT_VERSION_STR \ + XSTRINGIFY(I40E_CLIENT_VERSION_MAJOR) "." \ + XSTRINGIFY(I40E_CLIENT_VERSION_MINOR) "." \ + XSTRINGIFY(I40E_CLIENT_VERSION_BUILD) + +struct i40e_client_version { + u8 major; + u8 minor; + u8 build; + u8 rsvd; +}; + +enum i40e_client_state { + __I40E_CLIENT_NULL, + __I40E_CLIENT_REGISTERED +}; + +enum i40e_client_instance_state { + __I40E_CLIENT_INSTANCE_NONE, + __I40E_CLIENT_INSTANCE_OPENED, +}; + +enum i40e_client_type { + I40E_CLIENT_IWARP, + I40E_CLIENT_VMDQ2 +}; + +struct i40e_ops; +struct i40e_client; + +/* HW does not define a type value for AEQ; only for RX/TX and CEQ. + * In order for us to keep the interface simple, SW will define a + * unique type value for AEQ. + */ +#define I40E_QUEUE_TYPE_PE_AEQ 0x80 +#define I40E_QUEUE_INVALID_IDX 0xFFFF + +struct i40e_qv_info { + u32 v_idx; /* msix_vector */ + u16 ceq_idx; + u16 aeq_idx; + u8 itr_idx; +}; + +struct i40e_qvlist_info { + u32 num_vectors; + struct i40e_qv_info qv_info[1]; +}; + +#define I40E_CLIENT_MSIX_ALL 0xFFFFFFFF + +/* set of LAN parameters useful for clients managed by LAN */ + +/* Struct to hold per priority info */ +struct i40e_prio_qos_params { + u16 qs_handle; /* qs handle for prio */ + u8 tc; /* TC mapped to prio */ + u8 reserved; +}; + +#define I40E_CLIENT_MAX_USER_PRIORITY 8 +/* Struct to hold Client QoS */ +struct i40e_qos_params { + struct i40e_prio_qos_params prio_qos[I40E_CLIENT_MAX_USER_PRIORITY]; +}; + +struct i40e_params { + struct i40e_qos_params qos; + u16 mtu; +}; + +/* Structure to hold Lan device info for a client device */ +struct i40e_info { + struct i40e_client_version version; + u8 lanmac[6]; + struct net_device *netdev; + struct pci_dev *pcidev; + u8 __iomem *hw_addr; + u8 fid; /* function id, PF id or VF id */ +#define I40E_CLIENT_FTYPE_PF 0 +#define I40E_CLIENT_FTYPE_VF 1 + u8 ftype; /* function type, PF or VF */ + void *pf; + + /* All L2 params that could change during the life span of the PF + * and needs to be communicated to the client when they change + */ + struct i40e_qvlist_info *qvlist_info; + struct i40e_params params; + struct i40e_ops *ops; + + u16 msix_count; /* number of msix vectors*/ + /* Array down below will be dynamically allocated based on msix_count */ + struct msix_entry *msix_entries; + u16 itr_index; /* Which ITR index the PE driver is suppose to use */ + u16 fw_maj_ver; /* firmware major version */ + u16 fw_min_ver; /* firmware minor version */ + u32 fw_build; /* firmware build number */ +}; + +#define I40E_CLIENT_RESET_LEVEL_PF 1 +#define I40E_CLIENT_RESET_LEVEL_CORE 2 +#define I40E_CLIENT_VSI_FLAG_TCP_PACKET_ENABLE BIT(1) + +struct i40e_ops { + /* setup_q_vector_list enables queues with a particular vector */ + int (*setup_qvlist)(struct i40e_info *ldev, struct i40e_client *client, + struct i40e_qvlist_info *qv_info); + + int (*virtchnl_send)(struct i40e_info *ldev, struct i40e_client *client, + u32 vf_id, u8 *msg, u16 len); + + /* If the PE Engine is unresponsive, RDMA driver can request a reset. + * The level helps determine the level of reset being requested. + */ + void (*request_reset)(struct i40e_info *ldev, + struct i40e_client *client, u32 level); + + /* API for the RDMA driver to set certain VSI flags that control + * PE Engine. + */ + int (*update_vsi_ctxt)(struct i40e_info *ldev, + struct i40e_client *client, + bool is_vf, u32 vf_id, + u32 flag, u32 valid_flag); +}; + +struct i40e_client_ops { + /* Should be called from register_client() or whenever PF is ready + * to create a specific client instance. + */ + int (*open)(struct i40e_info *ldev, struct i40e_client *client); + + /* Should be called when netdev is unavailable or when unregister + * call comes in. If the close is happenening due to a reset being + * triggered set the reset bit to true. + */ + void (*close)(struct i40e_info *ldev, struct i40e_client *client, + bool reset); + + /* called when some l2 managed parameters changes - mtu */ + void (*l2_param_change)(struct i40e_info *ldev, + struct i40e_client *client, + struct i40e_params *params); + + int (*virtchnl_receive)(struct i40e_info *ldev, + struct i40e_client *client, u32 vf_id, + u8 *msg, u16 len); + + /* called when a VF is reset by the PF */ + void (*vf_reset)(struct i40e_info *ldev, + struct i40e_client *client, u32 vf_id); + + /* called when the number of VFs changes */ + void (*vf_enable)(struct i40e_info *ldev, + struct i40e_client *client, u32 num_vfs); + + /* returns true if VF is capable of specified offload */ + int (*vf_capable)(struct i40e_info *ldev, + struct i40e_client *client, u32 vf_id); +}; + +/* Client device */ +struct i40e_client_instance { + struct list_head list; + struct i40e_info lan_info; + struct i40e_client *client; + unsigned long state; + /* A count of all the in-progress calls to the client */ + atomic_t ref_cnt; +}; + +struct i40e_client { + struct list_head list; /* list of registered clients */ + char name[I40E_CLIENT_STR_LENGTH]; + struct i40e_client_version version; + unsigned long state; /* client state */ + atomic_t ref_cnt; /* Count of all the client devices of this kind */ + u32 flags; +#define I40E_CLIENT_FLAGS_LAUNCH_ON_PROBE BIT(0) +#define I40E_TX_FLAGS_NOTIFY_OTHER_EVENTS BIT(2) + enum i40e_client_type type; + struct i40e_client_ops *ops; /* client ops provided by the client */ +}; + +static inline bool i40e_client_is_registered(struct i40e_client *client) +{ + return test_bit(__I40E_CLIENT_REGISTERED, &client->state); +} + +/* used by clients */ +int i40e_register_client(struct i40e_client *client); +int i40e_unregister_client(struct i40e_client *client); + +#endif /* _I40E_CLIENT_H_ */ diff --git a/drivers/net/ethernet/intel/i40e/i40e_common.c b/drivers/net/ethernet/intel/i40e/i40e_common.c index 6a034ddac..4596294c2 100644 --- a/drivers/net/ethernet/intel/i40e/i40e_common.c +++ b/drivers/net/ethernet/intel/i40e/i40e_common.c @@ -1,7 +1,7 @@ /******************************************************************************* * * Intel Ethernet Controller XL710 Family Linux Driver - * Copyright(c) 2013 - 2015 Intel Corporation. + * Copyright(c) 2013 - 2016 Intel Corporation. * * This program is free software; you can redistribute it and/or modify it * under the terms and conditions of the GNU General Public License, @@ -55,19 +55,13 @@ static i40e_status i40e_set_mac_type(struct i40e_hw *hw) case I40E_DEV_ID_20G_KR2_A: hw->mac.type = I40E_MAC_XL710; break; + case I40E_DEV_ID_KX_X722: + case I40E_DEV_ID_QSFP_X722: case I40E_DEV_ID_SFP_X722: case I40E_DEV_ID_1G_BASE_T_X722: case I40E_DEV_ID_10G_BASE_T_X722: hw->mac.type = I40E_MAC_X722; break; - case I40E_DEV_ID_X722_VF: - case I40E_DEV_ID_X722_VF_HV: - hw->mac.type = I40E_MAC_X722_VF; - break; - case I40E_DEV_ID_VF: - case I40E_DEV_ID_VF_HV: - hw->mac.type = I40E_MAC_VF; - break; default: hw->mac.type = I40E_MAC_GENERIC; break; @@ -1245,7 +1239,13 @@ i40e_status i40e_pf_reset(struct i40e_hw *hw) grst_del = (rd32(hw, I40E_GLGEN_RSTCTL) & I40E_GLGEN_RSTCTL_GRSTDEL_MASK) >> I40E_GLGEN_RSTCTL_GRSTDEL_SHIFT; - for (cnt = 0; cnt < grst_del + 10; cnt++) { + + /* It can take upto 15 secs for GRST steady state. + * Bump it to 16 secs max to be safe. + */ + grst_del = grst_del * 20; + + for (cnt = 0; cnt < grst_del; cnt++) { reg = rd32(hw, I40E_GLGEN_RSTAT); if (!(reg & I40E_GLGEN_RSTAT_DEVSTATE_MASK)) break; @@ -1893,6 +1893,32 @@ i40e_status i40e_aq_set_phy_int_mask(struct i40e_hw *hw, return status; } +/** + * i40e_aq_set_phy_debug + * @hw: pointer to the hw struct + * @cmd_flags: debug command flags + * @cmd_details: pointer to command details structure or NULL + * + * Reset the external PHY. + **/ +enum i40e_status_code i40e_aq_set_phy_debug(struct i40e_hw *hw, u8 cmd_flags, + struct i40e_asq_cmd_details *cmd_details) +{ + struct i40e_aq_desc desc; + struct i40e_aqc_set_phy_debug *cmd = + (struct i40e_aqc_set_phy_debug *)&desc.params.raw; + enum i40e_status_code status; + + i40e_fill_default_direct_cmd_desc(&desc, + i40e_aqc_opc_set_phy_debug); + + cmd->command_flags = cmd_flags; + + status = i40e_asq_send_command(hw, &desc, NULL, 0, cmd_details); + + return status; +} + /** * i40e_aq_add_vsi * @hw: pointer to the hw struct @@ -1958,12 +1984,19 @@ i40e_status i40e_aq_set_vsi_unicast_promiscuous(struct i40e_hw *hw, i40e_fill_default_direct_cmd_desc(&desc, i40e_aqc_opc_set_vsi_promiscuous_modes); - if (set) + if (set) { flags |= I40E_AQC_SET_VSI_PROMISC_UNICAST; + if (((hw->aq.api_maj_ver == 1) && (hw->aq.api_min_ver >= 5)) || + (hw->aq.api_maj_ver > 1)) + flags |= I40E_AQC_SET_VSI_PROMISC_TX; + } cmd->promiscuous_flags = cpu_to_le16(flags); cmd->valid_flags = cpu_to_le16(I40E_AQC_SET_VSI_PROMISC_UNICAST); + if (((hw->aq.api_maj_ver >= 1) && (hw->aq.api_min_ver >= 5)) || + (hw->aq.api_maj_ver > 1)) + cmd->valid_flags |= cpu_to_le16(I40E_AQC_SET_VSI_PROMISC_TX); cmd->seid = cpu_to_le16(seid); status = i40e_asq_send_command(hw, &desc, NULL, 0, cmd_details); @@ -2038,6 +2071,37 @@ i40e_status i40e_aq_set_vsi_broadcast(struct i40e_hw *hw, return status; } +/** + * i40e_aq_set_vsi_vlan_promisc - control the VLAN promiscuous setting + * @hw: pointer to the hw struct + * @seid: vsi number + * @enable: set MAC L2 layer unicast promiscuous enable/disable for a given VLAN + * @cmd_details: pointer to command details structure or NULL + **/ +i40e_status i40e_aq_set_vsi_vlan_promisc(struct i40e_hw *hw, + u16 seid, bool enable, + struct i40e_asq_cmd_details *cmd_details) +{ + struct i40e_aq_desc desc; + struct i40e_aqc_set_vsi_promiscuous_modes *cmd = + (struct i40e_aqc_set_vsi_promiscuous_modes *)&desc.params.raw; + i40e_status status; + u16 flags = 0; + + i40e_fill_default_direct_cmd_desc(&desc, + i40e_aqc_opc_set_vsi_promiscuous_modes); + if (enable) + flags |= I40E_AQC_SET_VSI_PROMISC_VLAN; + + cmd->promiscuous_flags = cpu_to_le16(flags); + cmd->valid_flags = cpu_to_le16(I40E_AQC_SET_VSI_PROMISC_VLAN); + cmd->seid = cpu_to_le16(seid); + + status = i40e_asq_send_command(hw, &desc, NULL, 0, cmd_details); + + return status; +} + /** * i40e_get_vsi_params - get VSI configuration info * @hw: pointer to the hw struct @@ -2283,8 +2347,8 @@ i40e_status i40e_update_link_info(struct i40e_hw *hw) * @downlink_seid: the VSI SEID * @enabled_tc: bitmap of TCs to be enabled * @default_port: true for default port VSI, false for control port - * @enable_l2_filtering: true to add L2 filter table rules to regular forwarding rules for cloud support * @veb_seid: pointer to where to put the resulting VEB SEID + * @enable_stats: true to turn on VEB stats * @cmd_details: pointer to command details structure or NULL * * This asks the FW to add a VEB between the uplink and downlink @@ -2292,8 +2356,8 @@ i40e_status i40e_update_link_info(struct i40e_hw *hw) **/ i40e_status i40e_aq_add_veb(struct i40e_hw *hw, u16 uplink_seid, u16 downlink_seid, u8 enabled_tc, - bool default_port, bool enable_l2_filtering, - u16 *veb_seid, + bool default_port, u16 *veb_seid, + bool enable_stats, struct i40e_asq_cmd_details *cmd_details) { struct i40e_aq_desc desc; @@ -2320,8 +2384,9 @@ i40e_status i40e_aq_add_veb(struct i40e_hw *hw, u16 uplink_seid, else veb_flags |= I40E_AQC_ADD_VEB_PORT_TYPE_DATA; - if (enable_l2_filtering) - veb_flags |= I40E_AQC_ADD_VEB_ENABLE_L2_FILTER; + /* reverse logic here: set the bitflag to disable the stats */ + if (!enable_stats) + veb_flags |= I40E_AQC_ADD_VEB_ENABLE_DISABLE_STATS; cmd->veb_flags = cpu_to_le16(veb_flags); @@ -2410,6 +2475,7 @@ i40e_status i40e_aq_add_macvlan(struct i40e_hw *hw, u16 seid, (struct i40e_aqc_macvlan *)&desc.params.raw; i40e_status status; u16 buf_size; + int i; if (count == 0 || !mv_list || !hw) return I40E_ERR_PARAM; @@ -2423,12 +2489,17 @@ i40e_status i40e_aq_add_macvlan(struct i40e_hw *hw, u16 seid, cmd->seid[1] = 0; cmd->seid[2] = 0; + for (i = 0; i < count; i++) + if (is_multicast_ether_addr(mv_list[i].mac_addr)) + mv_list[i].flags |= + cpu_to_le16(I40E_AQC_MACVLAN_ADD_USE_SHARED_MAC); + desc.flags |= cpu_to_le16((u16)(I40E_AQ_FLAG_BUF | I40E_AQ_FLAG_RD)); if (buf_size > I40E_AQ_LARGE_BUF) desc.flags |= cpu_to_le16((u16)I40E_AQ_FLAG_LB); status = i40e_asq_send_command(hw, &desc, mv_list, buf_size, - cmd_details); + cmd_details); return status; } @@ -2475,6 +2546,137 @@ i40e_status i40e_aq_remove_macvlan(struct i40e_hw *hw, u16 seid, return status; } +/** + * i40e_mirrorrule_op - Internal helper function to add/delete mirror rule + * @hw: pointer to the hw struct + * @opcode: AQ opcode for add or delete mirror rule + * @sw_seid: Switch SEID (to which rule refers) + * @rule_type: Rule Type (ingress/egress/VLAN) + * @id: Destination VSI SEID or Rule ID + * @count: length of the list + * @mr_list: list of mirrored VSI SEIDs or VLAN IDs + * @cmd_details: pointer to command details structure or NULL + * @rule_id: Rule ID returned from FW + * @rule_used: Number of rules used in internal switch + * @rule_free: Number of rules free in internal switch + * + * Add/Delete a mirror rule to a specific switch. Mirror rules are supported for + * VEBs/VEPA elements only + **/ +static i40e_status i40e_mirrorrule_op(struct i40e_hw *hw, + u16 opcode, u16 sw_seid, u16 rule_type, u16 id, + u16 count, __le16 *mr_list, + struct i40e_asq_cmd_details *cmd_details, + u16 *rule_id, u16 *rules_used, u16 *rules_free) +{ + struct i40e_aq_desc desc; + struct i40e_aqc_add_delete_mirror_rule *cmd = + (struct i40e_aqc_add_delete_mirror_rule *)&desc.params.raw; + struct i40e_aqc_add_delete_mirror_rule_completion *resp = + (struct i40e_aqc_add_delete_mirror_rule_completion *)&desc.params.raw; + i40e_status status; + u16 buf_size; + + buf_size = count * sizeof(*mr_list); + + /* prep the rest of the request */ + i40e_fill_default_direct_cmd_desc(&desc, opcode); + cmd->seid = cpu_to_le16(sw_seid); + cmd->rule_type = cpu_to_le16(rule_type & + I40E_AQC_MIRROR_RULE_TYPE_MASK); + cmd->num_entries = cpu_to_le16(count); + /* Dest VSI for add, rule_id for delete */ + cmd->destination = cpu_to_le16(id); + if (mr_list) { + desc.flags |= cpu_to_le16((u16)(I40E_AQ_FLAG_BUF | + I40E_AQ_FLAG_RD)); + if (buf_size > I40E_AQ_LARGE_BUF) + desc.flags |= cpu_to_le16((u16)I40E_AQ_FLAG_LB); + } + + status = i40e_asq_send_command(hw, &desc, mr_list, buf_size, + cmd_details); + if (!status || + hw->aq.asq_last_status == I40E_AQ_RC_ENOSPC) { + if (rule_id) + *rule_id = le16_to_cpu(resp->rule_id); + if (rules_used) + *rules_used = le16_to_cpu(resp->mirror_rules_used); + if (rules_free) + *rules_free = le16_to_cpu(resp->mirror_rules_free); + } + return status; +} + +/** + * i40e_aq_add_mirrorrule - add a mirror rule + * @hw: pointer to the hw struct + * @sw_seid: Switch SEID (to which rule refers) + * @rule_type: Rule Type (ingress/egress/VLAN) + * @dest_vsi: SEID of VSI to which packets will be mirrored + * @count: length of the list + * @mr_list: list of mirrored VSI SEIDs or VLAN IDs + * @cmd_details: pointer to command details structure or NULL + * @rule_id: Rule ID returned from FW + * @rule_used: Number of rules used in internal switch + * @rule_free: Number of rules free in internal switch + * + * Add mirror rule. Mirror rules are supported for VEBs or VEPA elements only + **/ +i40e_status i40e_aq_add_mirrorrule(struct i40e_hw *hw, u16 sw_seid, + u16 rule_type, u16 dest_vsi, u16 count, __le16 *mr_list, + struct i40e_asq_cmd_details *cmd_details, + u16 *rule_id, u16 *rules_used, u16 *rules_free) +{ + if (!(rule_type == I40E_AQC_MIRROR_RULE_TYPE_ALL_INGRESS || + rule_type == I40E_AQC_MIRROR_RULE_TYPE_ALL_EGRESS)) { + if (count == 0 || !mr_list) + return I40E_ERR_PARAM; + } + + return i40e_mirrorrule_op(hw, i40e_aqc_opc_add_mirror_rule, sw_seid, + rule_type, dest_vsi, count, mr_list, + cmd_details, rule_id, rules_used, rules_free); +} + +/** + * i40e_aq_delete_mirrorrule - delete a mirror rule + * @hw: pointer to the hw struct + * @sw_seid: Switch SEID (to which rule refers) + * @rule_type: Rule Type (ingress/egress/VLAN) + * @count: length of the list + * @rule_id: Rule ID that is returned in the receive desc as part of + * add_mirrorrule. + * @mr_list: list of mirrored VLAN IDs to be removed + * @cmd_details: pointer to command details structure or NULL + * @rule_used: Number of rules used in internal switch + * @rule_free: Number of rules free in internal switch + * + * Delete a mirror rule. Mirror rules are supported for VEBs/VEPA elements only + **/ +i40e_status i40e_aq_delete_mirrorrule(struct i40e_hw *hw, u16 sw_seid, + u16 rule_type, u16 rule_id, u16 count, __le16 *mr_list, + struct i40e_asq_cmd_details *cmd_details, + u16 *rules_used, u16 *rules_free) +{ + /* Rule ID has to be valid except rule_type: INGRESS VLAN mirroring */ + if (rule_type != I40E_AQC_MIRROR_RULE_TYPE_VLAN) { + if (!rule_id) + return I40E_ERR_PARAM; + } else { + /* count and mr_list shall be valid for rule_type INGRESS VLAN + * mirroring. For other rule_type, count and rule_type should + * not matter. + */ + if (count == 0 || !mr_list) + return I40E_ERR_PARAM; + } + + return i40e_mirrorrule_op(hw, i40e_aqc_opc_delete_mirror_rule, sw_seid, + rule_type, rule_id, count, mr_list, + cmd_details, NULL, rules_used, rules_free); +} + /** * i40e_aq_send_msg_to_vf * @hw: pointer to the hardware structure @@ -2765,35 +2967,6 @@ i40e_aq_erase_nvm_exit: return status; } -#define I40E_DEV_FUNC_CAP_SWITCH_MODE 0x01 -#define I40E_DEV_FUNC_CAP_MGMT_MODE 0x02 -#define I40E_DEV_FUNC_CAP_NPAR 0x03 -#define I40E_DEV_FUNC_CAP_OS2BMC 0x04 -#define I40E_DEV_FUNC_CAP_VALID_FUNC 0x05 -#define I40E_DEV_FUNC_CAP_SRIOV_1_1 0x12 -#define I40E_DEV_FUNC_CAP_VF 0x13 -#define I40E_DEV_FUNC_CAP_VMDQ 0x14 -#define I40E_DEV_FUNC_CAP_802_1_QBG 0x15 -#define I40E_DEV_FUNC_CAP_802_1_QBH 0x16 -#define I40E_DEV_FUNC_CAP_VSI 0x17 -#define I40E_DEV_FUNC_CAP_DCB 0x18 -#define I40E_DEV_FUNC_CAP_FCOE 0x21 -#define I40E_DEV_FUNC_CAP_ISCSI 0x22 -#define I40E_DEV_FUNC_CAP_RSS 0x40 -#define I40E_DEV_FUNC_CAP_RX_QUEUES 0x41 -#define I40E_DEV_FUNC_CAP_TX_QUEUES 0x42 -#define I40E_DEV_FUNC_CAP_MSIX 0x43 -#define I40E_DEV_FUNC_CAP_MSIX_VF 0x44 -#define I40E_DEV_FUNC_CAP_FLOW_DIRECTOR 0x45 -#define I40E_DEV_FUNC_CAP_IEEE_1588 0x46 -#define I40E_DEV_FUNC_CAP_FLEX10 0xF1 -#define I40E_DEV_FUNC_CAP_CEM 0xF2 -#define I40E_DEV_FUNC_CAP_IWARP 0x51 -#define I40E_DEV_FUNC_CAP_LED 0x61 -#define I40E_DEV_FUNC_CAP_SDP 0x62 -#define I40E_DEV_FUNC_CAP_MDIO 0x63 -#define I40E_DEV_FUNC_CAP_WR_CSR_PROT 0x64 - /** * i40e_parse_discover_capabilities * @hw: pointer to the hw struct @@ -2832,79 +3005,79 @@ static void i40e_parse_discover_capabilities(struct i40e_hw *hw, void *buff, major_rev = cap->major_rev; switch (id) { - case I40E_DEV_FUNC_CAP_SWITCH_MODE: + case I40E_AQ_CAP_ID_SWITCH_MODE: p->switch_mode = number; break; - case I40E_DEV_FUNC_CAP_MGMT_MODE: + case I40E_AQ_CAP_ID_MNG_MODE: p->management_mode = number; break; - case I40E_DEV_FUNC_CAP_NPAR: + case I40E_AQ_CAP_ID_NPAR_ACTIVE: p->npar_enable = number; break; - case I40E_DEV_FUNC_CAP_OS2BMC: + case I40E_AQ_CAP_ID_OS2BMC_CAP: p->os2bmc = number; break; - case I40E_DEV_FUNC_CAP_VALID_FUNC: + case I40E_AQ_CAP_ID_FUNCTIONS_VALID: p->valid_functions = number; break; - case I40E_DEV_FUNC_CAP_SRIOV_1_1: + case I40E_AQ_CAP_ID_SRIOV: if (number == 1) p->sr_iov_1_1 = true; break; - case I40E_DEV_FUNC_CAP_VF: + case I40E_AQ_CAP_ID_VF: p->num_vfs = number; p->vf_base_id = logical_id; break; - case I40E_DEV_FUNC_CAP_VMDQ: + case I40E_AQ_CAP_ID_VMDQ: if (number == 1) p->vmdq = true; break; - case I40E_DEV_FUNC_CAP_802_1_QBG: + case I40E_AQ_CAP_ID_8021QBG: if (number == 1) p->evb_802_1_qbg = true; break; - case I40E_DEV_FUNC_CAP_802_1_QBH: + case I40E_AQ_CAP_ID_8021QBR: if (number == 1) p->evb_802_1_qbh = true; break; - case I40E_DEV_FUNC_CAP_VSI: + case I40E_AQ_CAP_ID_VSI: p->num_vsis = number; break; - case I40E_DEV_FUNC_CAP_DCB: + case I40E_AQ_CAP_ID_DCB: if (number == 1) { p->dcb = true; p->enabled_tcmap = logical_id; p->maxtc = phys_id; } break; - case I40E_DEV_FUNC_CAP_FCOE: + case I40E_AQ_CAP_ID_FCOE: if (number == 1) p->fcoe = true; break; - case I40E_DEV_FUNC_CAP_ISCSI: + case I40E_AQ_CAP_ID_ISCSI: if (number == 1) p->iscsi = true; break; - case I40E_DEV_FUNC_CAP_RSS: + case I40E_AQ_CAP_ID_RSS: p->rss = true; p->rss_table_size = number; p->rss_table_entry_width = logical_id; break; - case I40E_DEV_FUNC_CAP_RX_QUEUES: + case I40E_AQ_CAP_ID_RXQ: p->num_rx_qp = number; p->base_queue = phys_id; break; - case I40E_DEV_FUNC_CAP_TX_QUEUES: + case I40E_AQ_CAP_ID_TXQ: p->num_tx_qp = number; p->base_queue = phys_id; break; - case I40E_DEV_FUNC_CAP_MSIX: + case I40E_AQ_CAP_ID_MSIX: p->num_msix_vectors = number; break; - case I40E_DEV_FUNC_CAP_MSIX_VF: + case I40E_AQ_CAP_ID_VF_MSIX: p->num_msix_vectors_vf = number; break; - case I40E_DEV_FUNC_CAP_FLEX10: + case I40E_AQ_CAP_ID_FLEX10: if (major_rev == 1) { if (number == 1) { p->flex10_enable = true; @@ -2920,38 +3093,38 @@ static void i40e_parse_discover_capabilities(struct i40e_hw *hw, void *buff, p->flex10_mode = logical_id; p->flex10_status = phys_id; break; - case I40E_DEV_FUNC_CAP_CEM: + case I40E_AQ_CAP_ID_CEM: if (number == 1) p->mgmt_cem = true; break; - case I40E_DEV_FUNC_CAP_IWARP: + case I40E_AQ_CAP_ID_IWARP: if (number == 1) p->iwarp = true; break; - case I40E_DEV_FUNC_CAP_LED: + case I40E_AQ_CAP_ID_LED: if (phys_id < I40E_HW_CAP_MAX_GPIO) p->led[phys_id] = true; break; - case I40E_DEV_FUNC_CAP_SDP: + case I40E_AQ_CAP_ID_SDP: if (phys_id < I40E_HW_CAP_MAX_GPIO) p->sdp[phys_id] = true; break; - case I40E_DEV_FUNC_CAP_MDIO: + case I40E_AQ_CAP_ID_MDIO: if (number == 1) { p->mdio_port_num = phys_id; p->mdio_port_mode = logical_id; } break; - case I40E_DEV_FUNC_CAP_IEEE_1588: + case I40E_AQ_CAP_ID_1588: if (number == 1) p->ieee_1588 = true; break; - case I40E_DEV_FUNC_CAP_FLOW_DIRECTOR: + case I40E_AQ_CAP_ID_FLOW_DIRECTOR: p->fd = true; p->fd_filters_guaranteed = number; p->fd_filters_best_effort = logical_id; break; - case I40E_DEV_FUNC_CAP_WR_CSR_PROT: + case I40E_AQ_CAP_ID_WSR_PROT: p->wr_csr_prot = (u64)number; p->wr_csr_prot |= (u64)logical_id << 32; break; @@ -3709,7 +3882,7 @@ i40e_status i40e_set_filter_control(struct i40e_hw *hw, return ret; /* Read the PF Queue Filter control register */ - val = rd32(hw, I40E_PFQF_CTL_0); + val = i40e_read_rx_ctl(hw, I40E_PFQF_CTL_0); /* Program required PE hash buckets for the PF */ val &= ~I40E_PFQF_CTL_0_PEHSIZE_MASK; @@ -3746,7 +3919,7 @@ i40e_status i40e_set_filter_control(struct i40e_hw *hw, if (settings->enable_macvlan) val |= I40E_PFQF_CTL_0_MACVLAN_ENA_MASK; - wr32(hw, I40E_PFQF_CTL_0, val); + i40e_write_rx_ctl(hw, I40E_PFQF_CTL_0, val); return 0; } @@ -4073,3 +4246,454 @@ i40e_status i40e_aq_configure_partition_bw(struct i40e_hw *hw, return status; } + +/** + * i40e_read_phy_register + * @hw: pointer to the HW structure + * @page: registers page number + * @reg: register address in the page + * @phy_adr: PHY address on MDIO interface + * @value: PHY register value + * + * Reads specified PHY register value + **/ +i40e_status i40e_read_phy_register(struct i40e_hw *hw, + u8 page, u16 reg, u8 phy_addr, + u16 *value) +{ + i40e_status status = I40E_ERR_TIMEOUT; + u32 command = 0; + u16 retry = 1000; + u8 port_num = hw->func_caps.mdio_port_num; + + command = (reg << I40E_GLGEN_MSCA_MDIADD_SHIFT) | + (page << I40E_GLGEN_MSCA_DEVADD_SHIFT) | + (phy_addr << I40E_GLGEN_MSCA_PHYADD_SHIFT) | + (I40E_MDIO_OPCODE_ADDRESS) | + (I40E_MDIO_STCODE) | + (I40E_GLGEN_MSCA_MDICMD_MASK) | + (I40E_GLGEN_MSCA_MDIINPROGEN_MASK); + wr32(hw, I40E_GLGEN_MSCA(port_num), command); + do { + command = rd32(hw, I40E_GLGEN_MSCA(port_num)); + if (!(command & I40E_GLGEN_MSCA_MDICMD_MASK)) { + status = 0; + break; + } + usleep_range(10, 20); + retry--; + } while (retry); + + if (status) { + i40e_debug(hw, I40E_DEBUG_PHY, + "PHY: Can't write command to external PHY.\n"); + goto phy_read_end; + } + + command = (page << I40E_GLGEN_MSCA_DEVADD_SHIFT) | + (phy_addr << I40E_GLGEN_MSCA_PHYADD_SHIFT) | + (I40E_MDIO_OPCODE_READ) | + (I40E_MDIO_STCODE) | + (I40E_GLGEN_MSCA_MDICMD_MASK) | + (I40E_GLGEN_MSCA_MDIINPROGEN_MASK); + status = I40E_ERR_TIMEOUT; + retry = 1000; + wr32(hw, I40E_GLGEN_MSCA(port_num), command); + do { + command = rd32(hw, I40E_GLGEN_MSCA(port_num)); + if (!(command & I40E_GLGEN_MSCA_MDICMD_MASK)) { + status = 0; + break; + } + usleep_range(10, 20); + retry--; + } while (retry); + + if (!status) { + command = rd32(hw, I40E_GLGEN_MSRWD(port_num)); + *value = (command & I40E_GLGEN_MSRWD_MDIRDDATA_MASK) >> + I40E_GLGEN_MSRWD_MDIRDDATA_SHIFT; + } else { + i40e_debug(hw, I40E_DEBUG_PHY, + "PHY: Can't read register value from external PHY.\n"); + } + +phy_read_end: + return status; +} + +/** + * i40e_write_phy_register + * @hw: pointer to the HW structure + * @page: registers page number + * @reg: register address in the page + * @phy_adr: PHY address on MDIO interface + * @value: PHY register value + * + * Writes value to specified PHY register + **/ +i40e_status i40e_write_phy_register(struct i40e_hw *hw, + u8 page, u16 reg, u8 phy_addr, + u16 value) +{ + i40e_status status = I40E_ERR_TIMEOUT; + u32 command = 0; + u16 retry = 1000; + u8 port_num = hw->func_caps.mdio_port_num; + + command = (reg << I40E_GLGEN_MSCA_MDIADD_SHIFT) | + (page << I40E_GLGEN_MSCA_DEVADD_SHIFT) | + (phy_addr << I40E_GLGEN_MSCA_PHYADD_SHIFT) | + (I40E_MDIO_OPCODE_ADDRESS) | + (I40E_MDIO_STCODE) | + (I40E_GLGEN_MSCA_MDICMD_MASK) | + (I40E_GLGEN_MSCA_MDIINPROGEN_MASK); + wr32(hw, I40E_GLGEN_MSCA(port_num), command); + do { + command = rd32(hw, I40E_GLGEN_MSCA(port_num)); + if (!(command & I40E_GLGEN_MSCA_MDICMD_MASK)) { + status = 0; + break; + } + usleep_range(10, 20); + retry--; + } while (retry); + if (status) { + i40e_debug(hw, I40E_DEBUG_PHY, + "PHY: Can't write command to external PHY.\n"); + goto phy_write_end; + } + + command = value << I40E_GLGEN_MSRWD_MDIWRDATA_SHIFT; + wr32(hw, I40E_GLGEN_MSRWD(port_num), command); + + command = (page << I40E_GLGEN_MSCA_DEVADD_SHIFT) | + (phy_addr << I40E_GLGEN_MSCA_PHYADD_SHIFT) | + (I40E_MDIO_OPCODE_WRITE) | + (I40E_MDIO_STCODE) | + (I40E_GLGEN_MSCA_MDICMD_MASK) | + (I40E_GLGEN_MSCA_MDIINPROGEN_MASK); + status = I40E_ERR_TIMEOUT; + retry = 1000; + wr32(hw, I40E_GLGEN_MSCA(port_num), command); + do { + command = rd32(hw, I40E_GLGEN_MSCA(port_num)); + if (!(command & I40E_GLGEN_MSCA_MDICMD_MASK)) { + status = 0; + break; + } + usleep_range(10, 20); + retry--; + } while (retry); + +phy_write_end: + return status; +} + +/** + * i40e_get_phy_address + * @hw: pointer to the HW structure + * @dev_num: PHY port num that address we want + * @phy_addr: Returned PHY address + * + * Gets PHY address for current port + **/ +u8 i40e_get_phy_address(struct i40e_hw *hw, u8 dev_num) +{ + u8 port_num = hw->func_caps.mdio_port_num; + u32 reg_val = rd32(hw, I40E_GLGEN_MDIO_I2C_SEL(port_num)); + + return (u8)(reg_val >> ((dev_num + 1) * 5)) & 0x1f; +} + +/** + * i40e_blink_phy_led + * @hw: pointer to the HW structure + * @time: time how long led will blinks in secs + * @interval: gap between LED on and off in msecs + * + * Blinks PHY link LED + **/ +i40e_status i40e_blink_phy_link_led(struct i40e_hw *hw, + u32 time, u32 interval) +{ + i40e_status status = 0; + u32 i; + u16 led_ctl; + u16 gpio_led_port; + u16 led_reg; + u16 led_addr = I40E_PHY_LED_PROV_REG_1; + u8 phy_addr = 0; + u8 port_num; + + i = rd32(hw, I40E_PFGEN_PORTNUM); + port_num = (u8)(i & I40E_PFGEN_PORTNUM_PORT_NUM_MASK); + phy_addr = i40e_get_phy_address(hw, port_num); + + for (gpio_led_port = 0; gpio_led_port < 3; gpio_led_port++, + led_addr++) { + status = i40e_read_phy_register(hw, I40E_PHY_COM_REG_PAGE, + led_addr, phy_addr, &led_reg); + if (status) + goto phy_blinking_end; + led_ctl = led_reg; + if (led_reg & I40E_PHY_LED_LINK_MODE_MASK) { + led_reg = 0; + status = i40e_write_phy_register(hw, + I40E_PHY_COM_REG_PAGE, + led_addr, phy_addr, + led_reg); + if (status) + goto phy_blinking_end; + break; + } + } + + if (time > 0 && interval > 0) { + for (i = 0; i < time * 1000; i += interval) { + status = i40e_read_phy_register(hw, + I40E_PHY_COM_REG_PAGE, + led_addr, phy_addr, + &led_reg); + if (status) + goto restore_config; + if (led_reg & I40E_PHY_LED_MANUAL_ON) + led_reg = 0; + else + led_reg = I40E_PHY_LED_MANUAL_ON; + status = i40e_write_phy_register(hw, + I40E_PHY_COM_REG_PAGE, + led_addr, phy_addr, + led_reg); + if (status) + goto restore_config; + msleep(interval); + } + } + +restore_config: + status = i40e_write_phy_register(hw, I40E_PHY_COM_REG_PAGE, led_addr, + phy_addr, led_ctl); + +phy_blinking_end: + return status; +} + +/** + * i40e_led_get_phy - return current on/off mode + * @hw: pointer to the hw struct + * @led_addr: address of led register to use + * @val: original value of register to use + * + **/ +i40e_status i40e_led_get_phy(struct i40e_hw *hw, u16 *led_addr, + u16 *val) +{ + i40e_status status = 0; + u16 gpio_led_port; + u8 phy_addr = 0; + u16 reg_val; + u16 temp_addr; + u8 port_num; + u32 i; + + temp_addr = I40E_PHY_LED_PROV_REG_1; + i = rd32(hw, I40E_PFGEN_PORTNUM); + port_num = (u8)(i & I40E_PFGEN_PORTNUM_PORT_NUM_MASK); + phy_addr = i40e_get_phy_address(hw, port_num); + + for (gpio_led_port = 0; gpio_led_port < 3; gpio_led_port++, + temp_addr++) { + status = i40e_read_phy_register(hw, I40E_PHY_COM_REG_PAGE, + temp_addr, phy_addr, ®_val); + if (status) + return status; + *val = reg_val; + if (reg_val & I40E_PHY_LED_LINK_MODE_MASK) { + *led_addr = temp_addr; + break; + } + } + return status; +} + +/** + * i40e_led_set_phy + * @hw: pointer to the HW structure + * @on: true or false + * @mode: original val plus bit for set or ignore + * Set led's on or off when controlled by the PHY + * + **/ +i40e_status i40e_led_set_phy(struct i40e_hw *hw, bool on, + u16 led_addr, u32 mode) +{ + i40e_status status = 0; + u16 led_ctl = 0; + u16 led_reg = 0; + u8 phy_addr = 0; + u8 port_num; + u32 i; + + i = rd32(hw, I40E_PFGEN_PORTNUM); + port_num = (u8)(i & I40E_PFGEN_PORTNUM_PORT_NUM_MASK); + phy_addr = i40e_get_phy_address(hw, port_num); + + status = i40e_read_phy_register(hw, I40E_PHY_COM_REG_PAGE, led_addr, + phy_addr, &led_reg); + if (status) + return status; + led_ctl = led_reg; + if (led_reg & I40E_PHY_LED_LINK_MODE_MASK) { + led_reg = 0; + status = i40e_write_phy_register(hw, I40E_PHY_COM_REG_PAGE, + led_addr, phy_addr, led_reg); + if (status) + return status; + } + status = i40e_read_phy_register(hw, I40E_PHY_COM_REG_PAGE, + led_addr, phy_addr, &led_reg); + if (status) + goto restore_config; + if (on) + led_reg = I40E_PHY_LED_MANUAL_ON; + else + led_reg = 0; + status = i40e_write_phy_register(hw, I40E_PHY_COM_REG_PAGE, + led_addr, phy_addr, led_reg); + if (status) + goto restore_config; + if (mode & I40E_PHY_LED_MODE_ORIG) { + led_ctl = (mode & I40E_PHY_LED_MODE_MASK); + status = i40e_write_phy_register(hw, + I40E_PHY_COM_REG_PAGE, + led_addr, phy_addr, led_ctl); + } + return status; +restore_config: + status = i40e_write_phy_register(hw, I40E_PHY_COM_REG_PAGE, led_addr, + phy_addr, led_ctl); + return status; +} + +/** + * i40e_aq_rx_ctl_read_register - use FW to read from an Rx control register + * @hw: pointer to the hw struct + * @reg_addr: register address + * @reg_val: ptr to register value + * @cmd_details: pointer to command details structure or NULL + * + * Use the firmware to read the Rx control register, + * especially useful if the Rx unit is under heavy pressure + **/ +i40e_status i40e_aq_rx_ctl_read_register(struct i40e_hw *hw, + u32 reg_addr, u32 *reg_val, + struct i40e_asq_cmd_details *cmd_details) +{ + struct i40e_aq_desc desc; + struct i40e_aqc_rx_ctl_reg_read_write *cmd_resp = + (struct i40e_aqc_rx_ctl_reg_read_write *)&desc.params.raw; + i40e_status status; + + if (!reg_val) + return I40E_ERR_PARAM; + + i40e_fill_default_direct_cmd_desc(&desc, i40e_aqc_opc_rx_ctl_reg_read); + + cmd_resp->address = cpu_to_le32(reg_addr); + + status = i40e_asq_send_command(hw, &desc, NULL, 0, cmd_details); + + if (status == 0) + *reg_val = le32_to_cpu(cmd_resp->value); + + return status; +} + +/** + * i40e_read_rx_ctl - read from an Rx control register + * @hw: pointer to the hw struct + * @reg_addr: register address + **/ +u32 i40e_read_rx_ctl(struct i40e_hw *hw, u32 reg_addr) +{ + i40e_status status = 0; + bool use_register; + int retry = 5; + u32 val = 0; + + use_register = (hw->aq.api_maj_ver == 1) && (hw->aq.api_min_ver < 5); + if (!use_register) { +do_retry: + status = i40e_aq_rx_ctl_read_register(hw, reg_addr, &val, NULL); + if (hw->aq.asq_last_status == I40E_AQ_RC_EAGAIN && retry) { + usleep_range(1000, 2000); + retry--; + goto do_retry; + } + } + + /* if the AQ access failed, try the old-fashioned way */ + if (status || use_register) + val = rd32(hw, reg_addr); + + return val; +} + +/** + * i40e_aq_rx_ctl_write_register + * @hw: pointer to the hw struct + * @reg_addr: register address + * @reg_val: register value + * @cmd_details: pointer to command details structure or NULL + * + * Use the firmware to write to an Rx control register, + * especially useful if the Rx unit is under heavy pressure + **/ +i40e_status i40e_aq_rx_ctl_write_register(struct i40e_hw *hw, + u32 reg_addr, u32 reg_val, + struct i40e_asq_cmd_details *cmd_details) +{ + struct i40e_aq_desc desc; + struct i40e_aqc_rx_ctl_reg_read_write *cmd = + (struct i40e_aqc_rx_ctl_reg_read_write *)&desc.params.raw; + i40e_status status; + + i40e_fill_default_direct_cmd_desc(&desc, i40e_aqc_opc_rx_ctl_reg_write); + + cmd->address = cpu_to_le32(reg_addr); + cmd->value = cpu_to_le32(reg_val); + + status = i40e_asq_send_command(hw, &desc, NULL, 0, cmd_details); + + return status; +} + +/** + * i40e_write_rx_ctl - write to an Rx control register + * @hw: pointer to the hw struct + * @reg_addr: register address + * @reg_val: register value + **/ +void i40e_write_rx_ctl(struct i40e_hw *hw, u32 reg_addr, u32 reg_val) +{ + i40e_status status = 0; + bool use_register; + int retry = 5; + + use_register = (hw->aq.api_maj_ver == 1) && (hw->aq.api_min_ver < 5); + if (!use_register) { +do_retry: + status = i40e_aq_rx_ctl_write_register(hw, reg_addr, + reg_val, NULL); + if (hw->aq.asq_last_status == I40E_AQ_RC_EAGAIN && retry) { + usleep_range(1000, 2000); + retry--; + goto do_retry; + } + } + + /* if the AQ access failed, try the old-fashioned way */ + if (status || use_register) + wr32(hw, reg_addr, reg_val); +} diff --git a/drivers/net/ethernet/intel/i40e/i40e_dcb.c b/drivers/net/ethernet/intel/i40e/i40e_dcb.c index 2691277c0..0fab3a9b5 100644 --- a/drivers/net/ethernet/intel/i40e/i40e_dcb.c +++ b/drivers/net/ethernet/intel/i40e/i40e_dcb.c @@ -380,17 +380,20 @@ static void i40e_parse_cee_app_tlv(struct i40e_cee_feat_tlv *tlv, { u16 length, typelength, offset = 0; struct i40e_cee_app_prio *app; - u8 i, up, selector; + u8 i; typelength = ntohs(tlv->hdr.typelen); length = (u16)((typelength & I40E_LLDP_TLV_LEN_MASK) >> I40E_LLDP_TLV_LEN_SHIFT); dcbcfg->numapps = length / sizeof(*app); + if (!dcbcfg->numapps) return; for (i = 0; i < dcbcfg->numapps; i++) { + u8 up, selector; + app = (struct i40e_cee_app_prio *)(tlv->tlvinfo + offset); for (up = 0; up < I40E_MAX_USER_PRIORITY; up++) { if (app->prio_map & BIT(up)) @@ -400,13 +403,17 @@ static void i40e_parse_cee_app_tlv(struct i40e_cee_feat_tlv *tlv, /* Get Selector from lower 2 bits, and convert to IEEE */ selector = (app->upper_oui_sel & I40E_CEE_APP_SELECTOR_MASK); - if (selector == I40E_CEE_APP_SEL_ETHTYPE) + switch (selector) { + case I40E_CEE_APP_SEL_ETHTYPE: dcbcfg->app[i].selector = I40E_APP_SEL_ETHTYPE; - else if (selector == I40E_CEE_APP_SEL_TCPIP) + break; + case I40E_CEE_APP_SEL_TCPIP: dcbcfg->app[i].selector = I40E_APP_SEL_TCPIP; - else + break; + default: /* Keep selector as it is for unknown types */ dcbcfg->app[i].selector = selector; + } dcbcfg->app[i].protocolid = ntohs(app->protocol); /* Move to next app */ @@ -814,13 +821,15 @@ i40e_status i40e_get_dcb_config(struct i40e_hw *hw) struct i40e_aqc_get_cee_dcb_cfg_resp cee_cfg; struct i40e_aqc_get_cee_dcb_cfg_v1_resp cee_v1_cfg; - /* If Firmware version < v4.33 IEEE only */ - if (((hw->aq.fw_maj_ver == 4) && (hw->aq.fw_min_ver < 33)) || - (hw->aq.fw_maj_ver < 4)) + /* If Firmware version < v4.33 on X710/XL710, IEEE only */ + if ((hw->mac.type == I40E_MAC_XL710) && + (((hw->aq.fw_maj_ver == 4) && (hw->aq.fw_min_ver < 33)) || + (hw->aq.fw_maj_ver < 4))) return i40e_get_ieee_dcb_config(hw); - /* If Firmware version == v4.33 use old CEE struct */ - if ((hw->aq.fw_maj_ver == 4) && (hw->aq.fw_min_ver == 33)) { + /* If Firmware version == v4.33 on X710/XL710, use old CEE struct */ + if ((hw->mac.type == I40E_MAC_XL710) && + ((hw->aq.fw_maj_ver == 4) && (hw->aq.fw_min_ver == 33))) { ret = i40e_aq_get_cee_dcb_config(hw, &cee_v1_cfg, sizeof(cee_v1_cfg), NULL); if (!ret) { diff --git a/drivers/net/ethernet/intel/i40e/i40e_debugfs.c b/drivers/net/ethernet/intel/i40e/i40e_debugfs.c index 10744a698..0c97733d2 100644 --- a/drivers/net/ethernet/intel/i40e/i40e_debugfs.c +++ b/drivers/net/ethernet/intel/i40e/i40e_debugfs.c @@ -1,7 +1,7 @@ /******************************************************************************* * * Intel Ethernet Controller XL710 Family Linux Driver - * Copyright(c) 2013 - 2014 Intel Corporation. + * Copyright(c) 2013 - 2016 Intel Corporation. * * This program is free software; you can redistribute it and/or modify it * under the terms and conditions of the GNU General Public License, @@ -61,256 +61,12 @@ static struct i40e_veb *i40e_dbg_find_veb(struct i40e_pf *pf, int seid) { int i; - if ((seid < I40E_BASE_VEB_SEID) || - (seid > (I40E_BASE_VEB_SEID + I40E_MAX_VEB))) - dev_info(&pf->pdev->dev, "%d: bad seid\n", seid); - else - for (i = 0; i < I40E_MAX_VEB; i++) - if (pf->veb[i] && pf->veb[i]->seid == seid) - return pf->veb[i]; + for (i = 0; i < I40E_MAX_VEB; i++) + if (pf->veb[i] && pf->veb[i]->seid == seid) + return pf->veb[i]; return NULL; } -/************************************************************** - * dump - * The dump entry in debugfs is for getting a data snapshow of - * the driver's current configuration and runtime details. - * When the filesystem entry is written, a snapshot is taken. - * When the entry is read, the most recent snapshot data is dumped. - **************************************************************/ -static char *i40e_dbg_dump_buf; -static ssize_t i40e_dbg_dump_data_len; -static ssize_t i40e_dbg_dump_buffer_len; - -/** - * i40e_dbg_dump_read - read the dump data - * @filp: the opened file - * @buffer: where to write the data for the user to read - * @count: the size of the user's buffer - * @ppos: file position offset - **/ -static ssize_t i40e_dbg_dump_read(struct file *filp, char __user *buffer, - size_t count, loff_t *ppos) -{ - int bytes_not_copied; - int len; - - /* is *ppos bigger than the available data? */ - if (*ppos >= i40e_dbg_dump_data_len || !i40e_dbg_dump_buf) - return 0; - - /* be sure to not read beyond the end of available data */ - len = min_t(int, count, (i40e_dbg_dump_data_len - *ppos)); - - bytes_not_copied = copy_to_user(buffer, &i40e_dbg_dump_buf[*ppos], len); - if (bytes_not_copied) - return -EFAULT; - - *ppos += len; - return len; -} - -/** - * i40e_dbg_prep_dump_buf - * @pf: the PF we're working with - * @buflen: the desired buffer length - * - * Return positive if success, 0 if failed - **/ -static int i40e_dbg_prep_dump_buf(struct i40e_pf *pf, int buflen) -{ - /* if not already big enough, prep for re alloc */ - if (i40e_dbg_dump_buffer_len && i40e_dbg_dump_buffer_len < buflen) { - kfree(i40e_dbg_dump_buf); - i40e_dbg_dump_buffer_len = 0; - i40e_dbg_dump_buf = NULL; - } - - /* get a new buffer if needed */ - if (!i40e_dbg_dump_buf) { - i40e_dbg_dump_buf = kzalloc(buflen, GFP_KERNEL); - if (i40e_dbg_dump_buf != NULL) - i40e_dbg_dump_buffer_len = buflen; - } - - return i40e_dbg_dump_buffer_len; -} - -/** - * i40e_dbg_dump_write - trigger a datadump snapshot - * @filp: the opened file - * @buffer: where to find the user's data - * @count: the length of the user's data - * @ppos: file position offset - * - * Any write clears the stats - **/ -static ssize_t i40e_dbg_dump_write(struct file *filp, - const char __user *buffer, - size_t count, loff_t *ppos) -{ - struct i40e_pf *pf = filp->private_data; - bool seid_found = false; - long seid = -1; - int buflen = 0; - int i, ret; - int len; - u8 *p; - - /* don't allow partial writes */ - if (*ppos != 0) - return 0; - - /* decode the SEID given to be dumped */ - ret = kstrtol_from_user(buffer, count, 0, &seid); - - if (ret) { - dev_info(&pf->pdev->dev, "bad seid value\n"); - } else if (seid == 0) { - seid_found = true; - - kfree(i40e_dbg_dump_buf); - i40e_dbg_dump_buffer_len = 0; - i40e_dbg_dump_data_len = 0; - i40e_dbg_dump_buf = NULL; - dev_info(&pf->pdev->dev, "debug buffer freed\n"); - - } else if (seid == pf->pf_seid || seid == 1) { - seid_found = true; - - buflen = sizeof(struct i40e_pf); - buflen += (sizeof(struct i40e_aq_desc) - * (pf->hw.aq.num_arq_entries + pf->hw.aq.num_asq_entries)); - - if (i40e_dbg_prep_dump_buf(pf, buflen)) { - p = i40e_dbg_dump_buf; - - len = sizeof(struct i40e_pf); - memcpy(p, pf, len); - p += len; - - len = (sizeof(struct i40e_aq_desc) - * pf->hw.aq.num_asq_entries); - memcpy(p, pf->hw.aq.asq.desc_buf.va, len); - p += len; - - len = (sizeof(struct i40e_aq_desc) - * pf->hw.aq.num_arq_entries); - memcpy(p, pf->hw.aq.arq.desc_buf.va, len); - p += len; - - i40e_dbg_dump_data_len = buflen; - dev_info(&pf->pdev->dev, - "PF seid %ld dumped %d bytes\n", - seid, (int)i40e_dbg_dump_data_len); - } - } else if (seid >= I40E_BASE_VSI_SEID) { - struct i40e_vsi *vsi = NULL; - struct i40e_mac_filter *f; - int filter_count = 0; - - mutex_lock(&pf->switch_mutex); - vsi = i40e_dbg_find_vsi(pf, seid); - if (!vsi) { - mutex_unlock(&pf->switch_mutex); - goto write_exit; - } - - buflen = sizeof(struct i40e_vsi); - buflen += sizeof(struct i40e_q_vector) * vsi->num_q_vectors; - buflen += sizeof(struct i40e_ring) * 2 * vsi->num_queue_pairs; - buflen += sizeof(struct i40e_tx_buffer) * vsi->num_queue_pairs; - buflen += sizeof(struct i40e_rx_buffer) * vsi->num_queue_pairs; - list_for_each_entry(f, &vsi->mac_filter_list, list) - filter_count++; - buflen += sizeof(struct i40e_mac_filter) * filter_count; - - if (i40e_dbg_prep_dump_buf(pf, buflen)) { - p = i40e_dbg_dump_buf; - seid_found = true; - - len = sizeof(struct i40e_vsi); - memcpy(p, vsi, len); - p += len; - - if (vsi->num_q_vectors) { - len = (sizeof(struct i40e_q_vector) - * vsi->num_q_vectors); - memcpy(p, vsi->q_vectors, len); - p += len; - } - - if (vsi->num_queue_pairs) { - len = (sizeof(struct i40e_ring) * - vsi->num_queue_pairs); - memcpy(p, vsi->tx_rings, len); - p += len; - memcpy(p, vsi->rx_rings, len); - p += len; - } - - if (vsi->tx_rings[0]) { - len = sizeof(struct i40e_tx_buffer); - for (i = 0; i < vsi->num_queue_pairs; i++) { - memcpy(p, vsi->tx_rings[i]->tx_bi, len); - p += len; - } - len = sizeof(struct i40e_rx_buffer); - for (i = 0; i < vsi->num_queue_pairs; i++) { - memcpy(p, vsi->rx_rings[i]->rx_bi, len); - p += len; - } - } - - /* macvlan filter list */ - len = sizeof(struct i40e_mac_filter); - list_for_each_entry(f, &vsi->mac_filter_list, list) { - memcpy(p, f, len); - p += len; - } - - i40e_dbg_dump_data_len = buflen; - dev_info(&pf->pdev->dev, - "VSI seid %ld dumped %d bytes\n", - seid, (int)i40e_dbg_dump_data_len); - } - mutex_unlock(&pf->switch_mutex); - } else if (seid >= I40E_BASE_VEB_SEID) { - struct i40e_veb *veb = NULL; - - mutex_lock(&pf->switch_mutex); - veb = i40e_dbg_find_veb(pf, seid); - if (!veb) { - mutex_unlock(&pf->switch_mutex); - goto write_exit; - } - - buflen = sizeof(struct i40e_veb); - if (i40e_dbg_prep_dump_buf(pf, buflen)) { - seid_found = true; - memcpy(i40e_dbg_dump_buf, veb, buflen); - i40e_dbg_dump_data_len = buflen; - dev_info(&pf->pdev->dev, - "VEB seid %ld dumped %d bytes\n", - seid, (int)i40e_dbg_dump_data_len); - } - mutex_unlock(&pf->switch_mutex); - } - -write_exit: - if (!seid_found) - dev_info(&pf->pdev->dev, "unknown seid %ld\n", seid); - - return count; -} - -static const struct file_operations i40e_dbg_dump_fops = { - .owner = THIS_MODULE, - .open = simple_open, - .read = i40e_dbg_dump_read, - .write = i40e_dbg_dump_write, -}; - /************************************************************** * command * The command entry in debugfs is for giving the driver commands @@ -379,19 +135,27 @@ static void i40e_dbg_dump_vsi_seid(struct i40e_pf *pf, int seid) return; } dev_info(&pf->pdev->dev, "vsi seid %d\n", seid); - if (vsi->netdev) - dev_info(&pf->pdev->dev, - " netdev: name = %s\n", - vsi->netdev->name); + if (vsi->netdev) { + struct net_device *nd = vsi->netdev; + + dev_info(&pf->pdev->dev, " netdev: name = %s, state = %lu, flags = 0x%08x\n", + nd->name, nd->state, nd->flags); + dev_info(&pf->pdev->dev, " features = 0x%08lx\n", + (unsigned long int)nd->features); + dev_info(&pf->pdev->dev, " hw_features = 0x%08lx\n", + (unsigned long int)nd->hw_features); + dev_info(&pf->pdev->dev, " vlan_features = 0x%08lx\n", + (unsigned long int)nd->vlan_features); + } if (vsi->active_vlans) dev_info(&pf->pdev->dev, " vlgrp: & = %p\n", vsi->active_vlans); dev_info(&pf->pdev->dev, - " netdev_registered = %i, current_netdev_flags = 0x%04x, state = %li flags = 0x%08lx\n", - vsi->netdev_registered, - vsi->current_netdev_flags, vsi->state, vsi->flags); + " state = %li flags = 0x%08lx, netdev_registered = %i, current_netdev_flags = 0x%04x\n", + vsi->state, vsi->flags, + vsi->netdev_registered, vsi->current_netdev_flags); if (vsi == pf->vsi[pf->lan_vsi]) - dev_info(&pf->pdev->dev, "MAC address: %pM SAN MAC: %pM Port MAC: %pM\n", + dev_info(&pf->pdev->dev, " MAC address: %pM SAN MAC: %pM Port MAC: %pM\n", pf->hw.mac.addr, pf->hw.mac.san_addr, pf->hw.mac.port_addr); @@ -511,7 +275,7 @@ static void i40e_dbg_dump_vsi_seid(struct i40e_pf *pf, int seid) rx_ring->dtype); dev_info(&pf->pdev->dev, " rx_rings[%i]: hsplit = %d, next_to_use = %d, next_to_clean = %d, ring_active = %i\n", - i, rx_ring->hsplit, + i, ring_is_ps_enabled(rx_ring), rx_ring->next_to_use, rx_ring->next_to_clean, rx_ring->ring_active); @@ -525,6 +289,11 @@ static void i40e_dbg_dump_vsi_seid(struct i40e_pf *pf, int seid) i, rx_ring->rx_stats.alloc_page_failed, rx_ring->rx_stats.alloc_buff_failed); + dev_info(&pf->pdev->dev, + " rx_rings[%i]: rx_stats: realloc_count = %lld, page_reuse_count = %lld\n", + i, + rx_ring->rx_stats.realloc_count, + rx_ring->rx_stats.page_reuse_count); dev_info(&pf->pdev->dev, " rx_rings[%i]: size = %i, dma = 0x%08lx\n", i, rx_ring->size, @@ -533,6 +302,10 @@ static void i40e_dbg_dump_vsi_seid(struct i40e_pf *pf, int seid) " rx_rings[%i]: vsi = %p, q_vector = %p\n", i, rx_ring->vsi, rx_ring->q_vector); + dev_info(&pf->pdev->dev, + " rx_rings[%i]: rx_itr_setting = %d (%s)\n", + i, rx_ring->rx_itr_setting, + ITR_IS_DYNAMIC(rx_ring->rx_itr_setting) ? "dynamic" : "fixed"); } for (i = 0; i < vsi->num_queue_pairs; i++) { struct i40e_ring *tx_ring = ACCESS_ONCE(vsi->tx_rings[i]); @@ -557,8 +330,8 @@ static void i40e_dbg_dump_vsi_seid(struct i40e_pf *pf, int seid) " tx_rings[%i]: dtype = %d\n", i, tx_ring->dtype); dev_info(&pf->pdev->dev, - " tx_rings[%i]: hsplit = %d, next_to_use = %d, next_to_clean = %d, ring_active = %i\n", - i, tx_ring->hsplit, + " tx_rings[%i]: next_to_use = %d, next_to_clean = %d, ring_active = %i\n", + i, tx_ring->next_to_use, tx_ring->next_to_clean, tx_ring->ring_active); @@ -583,14 +356,15 @@ static void i40e_dbg_dump_vsi_seid(struct i40e_pf *pf, int seid) dev_info(&pf->pdev->dev, " tx_rings[%i]: DCB tc = %d\n", i, tx_ring->dcb_tc); + dev_info(&pf->pdev->dev, + " tx_rings[%i]: tx_itr_setting = %d (%s)\n", + i, tx_ring->tx_itr_setting, + ITR_IS_DYNAMIC(tx_ring->tx_itr_setting) ? "dynamic" : "fixed"); } rcu_read_unlock(); dev_info(&pf->pdev->dev, - " work_limit = %d, rx_itr_setting = %d (%s), tx_itr_setting = %d (%s)\n", - vsi->work_limit, vsi->rx_itr_setting, - ITR_IS_DYNAMIC(vsi->rx_itr_setting) ? "dynamic" : "fixed", - vsi->tx_itr_setting, - ITR_IS_DYNAMIC(vsi->tx_itr_setting) ? "dynamic" : "fixed"); + " work_limit = %d\n", + vsi->work_limit); dev_info(&pf->pdev->dev, " max_frame = %d, rx_hdr_len = %d, rx_buf_len = %d dtype = %d\n", vsi->max_frame, vsi->rx_hdr_len, vsi->rx_buf_len, vsi->dtype); @@ -815,20 +589,20 @@ static void i40e_dbg_dump_desc(int cnt, int vsi_seid, int ring_id, int desc_n, if (!is_rx_ring) { txd = I40E_TX_DESC(ring, i); dev_info(&pf->pdev->dev, - " d[%03i] = 0x%016llx 0x%016llx\n", + " d[%03x] = 0x%016llx 0x%016llx\n", i, txd->buffer_addr, txd->cmd_type_offset_bsz); } else if (sizeof(union i40e_rx_desc) == sizeof(union i40e_16byte_rx_desc)) { rxd = I40E_RX_DESC(ring, i); dev_info(&pf->pdev->dev, - " d[%03i] = 0x%016llx 0x%016llx\n", + " d[%03x] = 0x%016llx 0x%016llx\n", i, rxd->read.pkt_addr, rxd->read.hdr_addr); } else { rxd = I40E_RX_DESC(ring, i); dev_info(&pf->pdev->dev, - " d[%03i] = 0x%016llx 0x%016llx 0x%016llx 0x%016llx\n", + " d[%03x] = 0x%016llx 0x%016llx 0x%016llx 0x%016llx\n", i, rxd->read.pkt_addr, rxd->read.hdr_addr, rxd->read.rsvd1, rxd->read.rsvd2); @@ -843,20 +617,20 @@ static void i40e_dbg_dump_desc(int cnt, int vsi_seid, int ring_id, int desc_n, if (!is_rx_ring) { txd = I40E_TX_DESC(ring, desc_n); dev_info(&pf->pdev->dev, - "vsi = %02i tx ring = %02i d[%03i] = 0x%016llx 0x%016llx\n", + "vsi = %02i tx ring = %02i d[%03x] = 0x%016llx 0x%016llx\n", vsi_seid, ring_id, desc_n, txd->buffer_addr, txd->cmd_type_offset_bsz); } else if (sizeof(union i40e_rx_desc) == sizeof(union i40e_16byte_rx_desc)) { rxd = I40E_RX_DESC(ring, desc_n); dev_info(&pf->pdev->dev, - "vsi = %02i rx ring = %02i d[%03i] = 0x%016llx 0x%016llx\n", + "vsi = %02i rx ring = %02i d[%03x] = 0x%016llx 0x%016llx\n", vsi_seid, ring_id, desc_n, rxd->read.pkt_addr, rxd->read.hdr_addr); } else { rxd = I40E_RX_DESC(ring, desc_n); dev_info(&pf->pdev->dev, - "vsi = %02i rx ring = %02i d[%03i] = 0x%016llx 0x%016llx 0x%016llx 0x%016llx\n", + "vsi = %02i rx ring = %02i d[%03x] = 0x%016llx 0x%016llx 0x%016llx 0x%016llx\n", vsi_seid, ring_id, desc_n, rxd->read.pkt_addr, rxd->read.hdr_addr, rxd->read.rsvd1, rxd->read.rsvd2); @@ -918,12 +692,6 @@ static void i40e_dbg_dump_veb_seid(struct i40e_pf *pf, int seid) { struct i40e_veb *veb; - if ((seid < I40E_BASE_VEB_SEID) || - (seid >= (I40E_MAX_VEB + I40E_BASE_VEB_SEID))) { - dev_info(&pf->pdev->dev, "%d: bad seid\n", seid); - return; - } - veb = i40e_dbg_find_veb(pf, seid); if (!veb) { dev_info(&pf->pdev->dev, "can't find veb %d\n", seid); @@ -2202,11 +1970,6 @@ void i40e_dbg_pf_init(struct i40e_pf *pf) if (!pfile) goto create_failed; - pfile = debugfs_create_file("dump", 0600, pf->i40e_dbg_pf, pf, - &i40e_dbg_dump_fops); - if (!pfile) - goto create_failed; - pfile = debugfs_create_file("netdev_ops", 0600, pf->i40e_dbg_pf, pf, &i40e_dbg_netdev_ops_fops); if (!pfile) @@ -2227,9 +1990,6 @@ void i40e_dbg_pf_exit(struct i40e_pf *pf) { debugfs_remove_recursive(pf->i40e_dbg_pf); pf->i40e_dbg_pf = NULL; - - kfree(i40e_dbg_dump_buf); - i40e_dbg_dump_buf = NULL; } /** diff --git a/drivers/net/ethernet/intel/i40e/i40e_devids.h b/drivers/net/ethernet/intel/i40e/i40e_devids.h index 448ef4c17..99257fcd1 100644 --- a/drivers/net/ethernet/intel/i40e/i40e_devids.h +++ b/drivers/net/ethernet/intel/i40e/i40e_devids.h @@ -39,13 +39,11 @@ #define I40E_DEV_ID_20G_KR2 0x1587 #define I40E_DEV_ID_20G_KR2_A 0x1588 #define I40E_DEV_ID_10G_BASE_T4 0x1589 -#define I40E_DEV_ID_VF 0x154C -#define I40E_DEV_ID_VF_HV 0x1571 +#define I40E_DEV_ID_KX_X722 0x37CE +#define I40E_DEV_ID_QSFP_X722 0x37CF #define I40E_DEV_ID_SFP_X722 0x37D0 #define I40E_DEV_ID_1G_BASE_T_X722 0x37D1 #define I40E_DEV_ID_10G_BASE_T_X722 0x37D2 -#define I40E_DEV_ID_X722_VF 0x37CD -#define I40E_DEV_ID_X722_VF_HV 0x37D9 #define i40e_is_40G_device(d) ((d) == I40E_DEV_ID_QSFP_A || \ (d) == I40E_DEV_ID_QSFP_B || \ diff --git a/drivers/net/ethernet/intel/i40e/i40e_ethtool.c b/drivers/net/ethernet/intel/i40e/i40e_ethtool.c index 29d5833e2..784b16594 100644 --- a/drivers/net/ethernet/intel/i40e/i40e_ethtool.c +++ b/drivers/net/ethernet/intel/i40e/i40e_ethtool.c @@ -1,7 +1,7 @@ /******************************************************************************* * * Intel Ethernet Controller XL710 Family Linux Driver - * Copyright(c) 2013 - 2015 Intel Corporation. + * Copyright(c) 2013 - 2016 Intel Corporation. * * This program is free software; you can redistribute it and/or modify it * under the terms and conditions of the GNU General Public License, @@ -89,6 +89,9 @@ static const struct i40e_stats i40e_gstrings_misc_stats[] = { I40E_VSI_STAT("rx_unknown_protocol", eth_stats.rx_unknown_protocol), I40E_VSI_STAT("tx_linearize", tx_linearize), I40E_VSI_STAT("tx_force_wb", tx_force_wb), + I40E_VSI_STAT("tx_lost_interrupt", tx_lost_interrupt), + I40E_VSI_STAT("rx_alloc_fail", rx_buf_failed), + I40E_VSI_STAT("rx_pg_alloc_fail", rx_page_failed), }; /* These PF_STATs might look like duplicates of some NETDEV_STATs, @@ -143,6 +146,7 @@ static struct i40e_stats i40e_gstrings_stats[] = { I40E_PF_STAT("rx_oversize", stats.rx_oversize), I40E_PF_STAT("rx_jabber", stats.rx_jabber), I40E_PF_STAT("VF_admin_queue_requests", vf_aq_requests), + I40E_PF_STAT("arq_overflows", arq_overflows), I40E_PF_STAT("rx_hwtstamp_cleared", rx_hwtstamp_cleared), I40E_PF_STAT("fdir_flush_cnt", fd_flush_cnt), I40E_PF_STAT("fdir_atr_match", stats.fd_atr_match), @@ -232,6 +236,7 @@ static const char i40e_priv_flags_strings[][ETH_GSTRING_LEN] = { "flow-director-atr", "veb-stats", "packet-split", + "hw-atr-eviction", }; #define I40E_PRIV_FLAGS_STR_LEN ARRAY_SIZE(i40e_priv_flags_strings) @@ -340,7 +345,7 @@ static void i40e_get_settings_link_up(struct i40e_hw *hw, SUPPORTED_1000baseT_Full; if (hw_link_info->requested_speeds & I40E_LINK_SPEED_1GB) ecmd->advertising |= ADVERTISED_1000baseT_Full; - if (pf->hw.mac.type == I40E_MAC_X722) { + if (pf->flags & I40E_FLAG_100M_SGMII_CAPABLE) { ecmd->supported |= SUPPORTED_100baseT_Full; if (hw_link_info->requested_speeds & I40E_LINK_SPEED_100MB) @@ -411,6 +416,10 @@ static void i40e_get_settings_link_down(struct i40e_hw *hw, if (pf->hw.mac.type == I40E_MAC_X722) { ecmd->supported |= SUPPORTED_100baseT_Full; ecmd->advertising |= ADVERTISED_100baseT_Full; + if (pf->flags & I40E_FLAG_100M_SGMII_CAPABLE) { + ecmd->supported |= SUPPORTED_100baseT_Full; + ecmd->advertising |= ADVERTISED_100baseT_Full; + } } } if (phy_types & I40E_CAP_PHY_TYPE_XAUI || @@ -996,16 +1005,19 @@ static int i40e_get_eeprom(struct net_device *netdev, /* check for NVMUpdate access method */ magic = hw->vendor_id | (hw->device_id << 16); if (eeprom->magic && eeprom->magic != magic) { - struct i40e_nvm_access *cmd; - int errno; + struct i40e_nvm_access *cmd = (struct i40e_nvm_access *)eeprom; + int errno = 0; /* make sure it is the right magic for NVMUpdate */ if ((eeprom->magic >> 16) != hw->device_id) - return -EINVAL; + errno = -EINVAL; + else if (test_bit(__I40E_RESET_RECOVERY_PENDING, &pf->state) || + test_bit(__I40E_RESET_INTR_RECEIVED, &pf->state)) + errno = -EBUSY; + else + ret_val = i40e_nvmupd_command(hw, cmd, bytes, &errno); - cmd = (struct i40e_nvm_access *)eeprom; - ret_val = i40e_nvmupd_command(hw, cmd, bytes, &errno); - if (ret_val && (hw->debug_mask & I40E_DEBUG_NVM)) + if ((errno || ret_val) && (hw->debug_mask & I40E_DEBUG_NVM)) dev_info(&pf->pdev->dev, "NVMUpdate read failed err=%d status=0x%x errno=%d module=%d offset=0x%x size=%d\n", ret_val, hw->aq.asq_last_status, errno, @@ -1089,27 +1101,25 @@ static int i40e_set_eeprom(struct net_device *netdev, struct i40e_netdev_priv *np = netdev_priv(netdev); struct i40e_hw *hw = &np->vsi->back->hw; struct i40e_pf *pf = np->vsi->back; - struct i40e_nvm_access *cmd; + struct i40e_nvm_access *cmd = (struct i40e_nvm_access *)eeprom; int ret_val = 0; - int errno; + int errno = 0; u32 magic; /* normal ethtool set_eeprom is not supported */ magic = hw->vendor_id | (hw->device_id << 16); if (eeprom->magic == magic) - return -EOPNOTSUPP; - + errno = -EOPNOTSUPP; /* check for NVMUpdate access method */ - if (!eeprom->magic || (eeprom->magic >> 16) != hw->device_id) - return -EINVAL; - - if (test_bit(__I40E_RESET_RECOVERY_PENDING, &pf->state) || - test_bit(__I40E_RESET_INTR_RECEIVED, &pf->state)) - return -EBUSY; + else if (!eeprom->magic || (eeprom->magic >> 16) != hw->device_id) + errno = -EINVAL; + else if (test_bit(__I40E_RESET_RECOVERY_PENDING, &pf->state) || + test_bit(__I40E_RESET_INTR_RECEIVED, &pf->state)) + errno = -EBUSY; + else + ret_val = i40e_nvmupd_command(hw, cmd, bytes, &errno); - cmd = (struct i40e_nvm_access *)eeprom; - ret_val = i40e_nvmupd_command(hw, cmd, bytes, &errno); - if (ret_val && (hw->debug_mask & I40E_DEBUG_NVM)) + if ((errno || ret_val) && (hw->debug_mask & I40E_DEBUG_NVM)) dev_info(&pf->pdev->dev, "NVMUpdate write failed err=%d status=0x%x errno=%d module=%d offset=0x%x size=%d\n", ret_val, hw->aq.asq_last_status, errno, @@ -1816,28 +1826,52 @@ static int i40e_set_phys_id(struct net_device *netdev, enum ethtool_phys_id_state state) { struct i40e_netdev_priv *np = netdev_priv(netdev); + i40e_status ret = 0; struct i40e_pf *pf = np->vsi->back; struct i40e_hw *hw = &pf->hw; int blink_freq = 2; + u16 temp_status; switch (state) { case ETHTOOL_ID_ACTIVE: - pf->led_status = i40e_led_get(hw); + if (!(pf->flags & I40E_FLAG_HAVE_10GBASET_PHY)) { + pf->led_status = i40e_led_get(hw); + } else { + i40e_aq_set_phy_debug(hw, I40E_PHY_DEBUG_PORT, NULL); + ret = i40e_led_get_phy(hw, &temp_status, + &pf->phy_led_val); + pf->led_status = temp_status; + } return blink_freq; case ETHTOOL_ID_ON: - i40e_led_set(hw, 0xF, false); + if (!(pf->flags & I40E_FLAG_HAVE_10GBASET_PHY)) + i40e_led_set(hw, 0xf, false); + else + ret = i40e_led_set_phy(hw, true, pf->led_status, 0); break; case ETHTOOL_ID_OFF: - i40e_led_set(hw, 0x0, false); + if (!(pf->flags & I40E_FLAG_HAVE_10GBASET_PHY)) + i40e_led_set(hw, 0x0, false); + else + ret = i40e_led_set_phy(hw, false, pf->led_status, 0); break; case ETHTOOL_ID_INACTIVE: - i40e_led_set(hw, pf->led_status, false); + if (!(pf->flags & I40E_FLAG_HAVE_10GBASET_PHY)) { + i40e_led_set(hw, false, pf->led_status); + } else { + ret = i40e_led_set_phy(hw, false, pf->led_status, + (pf->phy_led_val | + I40E_PHY_LED_MODE_ORIG)); + i40e_aq_set_phy_debug(hw, 0, NULL); + } break; default: break; } - - return 0; + if (ret) + return -ENOENT; + else + return 0; } /* NOTE: i40e hardware uses a conversion factor of 2 for Interrupt @@ -1845,8 +1879,9 @@ static int i40e_set_phys_id(struct net_device *netdev, * 125us (8000 interrupts per second) == ITR(62) */ -static int i40e_get_coalesce(struct net_device *netdev, - struct ethtool_coalesce *ec) +static int __i40e_get_coalesce(struct net_device *netdev, + struct ethtool_coalesce *ec, + int queue) { struct i40e_netdev_priv *np = netdev_priv(netdev); struct i40e_vsi *vsi = np->vsi; @@ -1854,14 +1889,24 @@ static int i40e_get_coalesce(struct net_device *netdev, ec->tx_max_coalesced_frames_irq = vsi->work_limit; ec->rx_max_coalesced_frames_irq = vsi->work_limit; - if (ITR_IS_DYNAMIC(vsi->rx_itr_setting)) + /* rx and tx usecs has per queue value. If user doesn't specify the queue, + * return queue 0's value to represent. + */ + if (queue < 0) { + queue = 0; + } else if (queue >= vsi->num_queue_pairs) { + return -EINVAL; + } + + if (ITR_IS_DYNAMIC(vsi->rx_rings[queue]->rx_itr_setting)) ec->use_adaptive_rx_coalesce = 1; - if (ITR_IS_DYNAMIC(vsi->tx_itr_setting)) + if (ITR_IS_DYNAMIC(vsi->tx_rings[queue]->tx_itr_setting)) ec->use_adaptive_tx_coalesce = 1; - ec->rx_coalesce_usecs = vsi->rx_itr_setting & ~I40E_ITR_DYNAMIC; - ec->tx_coalesce_usecs = vsi->tx_itr_setting & ~I40E_ITR_DYNAMIC; + ec->rx_coalesce_usecs = vsi->rx_rings[queue]->rx_itr_setting & ~I40E_ITR_DYNAMIC; + ec->tx_coalesce_usecs = vsi->tx_rings[queue]->tx_itr_setting & ~I40E_ITR_DYNAMIC; + /* we use the _usecs_high to store/set the interrupt rate limit * that the hardware supports, that almost but not quite * fits the original intent of the ethtool variable, @@ -1874,15 +1919,63 @@ static int i40e_get_coalesce(struct net_device *netdev, return 0; } -static int i40e_set_coalesce(struct net_device *netdev, +static int i40e_get_coalesce(struct net_device *netdev, struct ethtool_coalesce *ec) { - struct i40e_netdev_priv *np = netdev_priv(netdev); + return __i40e_get_coalesce(netdev, ec, -1); +} + +static int i40e_get_per_queue_coalesce(struct net_device *netdev, u32 queue, + struct ethtool_coalesce *ec) +{ + return __i40e_get_coalesce(netdev, ec, queue); +} + +static void i40e_set_itr_per_queue(struct i40e_vsi *vsi, + struct ethtool_coalesce *ec, + int queue) +{ + struct i40e_pf *pf = vsi->back; + struct i40e_hw *hw = &pf->hw; struct i40e_q_vector *q_vector; + u16 vector, intrl; + + intrl = INTRL_USEC_TO_REG(vsi->int_rate_limit); + + vsi->rx_rings[queue]->rx_itr_setting = ec->rx_coalesce_usecs; + vsi->tx_rings[queue]->tx_itr_setting = ec->tx_coalesce_usecs; + + if (ec->use_adaptive_rx_coalesce) + vsi->rx_rings[queue]->rx_itr_setting |= I40E_ITR_DYNAMIC; + else + vsi->rx_rings[queue]->rx_itr_setting &= ~I40E_ITR_DYNAMIC; + + if (ec->use_adaptive_tx_coalesce) + vsi->tx_rings[queue]->tx_itr_setting |= I40E_ITR_DYNAMIC; + else + vsi->tx_rings[queue]->tx_itr_setting &= ~I40E_ITR_DYNAMIC; + + q_vector = vsi->rx_rings[queue]->q_vector; + q_vector->rx.itr = ITR_TO_REG(vsi->rx_rings[queue]->rx_itr_setting); + vector = vsi->base_vector + q_vector->v_idx; + wr32(hw, I40E_PFINT_ITRN(I40E_RX_ITR, vector - 1), q_vector->rx.itr); + + q_vector = vsi->tx_rings[queue]->q_vector; + q_vector->tx.itr = ITR_TO_REG(vsi->tx_rings[queue]->tx_itr_setting); + vector = vsi->base_vector + q_vector->v_idx; + wr32(hw, I40E_PFINT_ITRN(I40E_TX_ITR, vector - 1), q_vector->tx.itr); + + wr32(hw, I40E_PFINT_RATEN(vector - 1), intrl); + i40e_flush(hw); +} + +static int __i40e_set_coalesce(struct net_device *netdev, + struct ethtool_coalesce *ec, + int queue) +{ + struct i40e_netdev_priv *np = netdev_priv(netdev); struct i40e_vsi *vsi = np->vsi; struct i40e_pf *pf = vsi->back; - struct i40e_hw *hw = &pf->hw; - u16 vector; int i; if (ec->tx_max_coalesced_frames_irq || ec->rx_max_coalesced_frames_irq) @@ -1899,57 +1992,53 @@ static int i40e_set_coalesce(struct net_device *netdev, return -EINVAL; } - vector = vsi->base_vector; - if ((ec->rx_coalesce_usecs >= (I40E_MIN_ITR << 1)) && - (ec->rx_coalesce_usecs <= (I40E_MAX_ITR << 1))) { - vsi->rx_itr_setting = ec->rx_coalesce_usecs; - } else if (ec->rx_coalesce_usecs == 0) { - vsi->rx_itr_setting = ec->rx_coalesce_usecs; + if (ec->rx_coalesce_usecs == 0) { if (ec->use_adaptive_rx_coalesce) netif_info(pf, drv, netdev, "rx-usecs=0, need to disable adaptive-rx for a complete disable\n"); - } else { - netif_info(pf, drv, netdev, "Invalid value, rx-usecs range is 0-8160\n"); - return -EINVAL; + } else if ((ec->rx_coalesce_usecs < (I40E_MIN_ITR << 1)) || + (ec->rx_coalesce_usecs > (I40E_MAX_ITR << 1))) { + netif_info(pf, drv, netdev, "Invalid value, rx-usecs range is 0-8160\n"); + return -EINVAL; } vsi->int_rate_limit = ec->rx_coalesce_usecs_high; - if ((ec->tx_coalesce_usecs >= (I40E_MIN_ITR << 1)) && - (ec->tx_coalesce_usecs <= (I40E_MAX_ITR << 1))) { - vsi->tx_itr_setting = ec->tx_coalesce_usecs; - } else if (ec->tx_coalesce_usecs == 0) { - vsi->tx_itr_setting = ec->tx_coalesce_usecs; + if (ec->tx_coalesce_usecs == 0) { if (ec->use_adaptive_tx_coalesce) netif_info(pf, drv, netdev, "tx-usecs=0, need to disable adaptive-tx for a complete disable\n"); + } else if ((ec->tx_coalesce_usecs < (I40E_MIN_ITR << 1)) || + (ec->tx_coalesce_usecs > (I40E_MAX_ITR << 1))) { + netif_info(pf, drv, netdev, "Invalid value, tx-usecs range is 0-8160\n"); + return -EINVAL; + } + + /* rx and tx usecs has per queue value. If user doesn't specify the queue, + * apply to all queues. + */ + if (queue < 0) { + for (i = 0; i < vsi->num_queue_pairs; i++) + i40e_set_itr_per_queue(vsi, ec, i); + } else if (queue < vsi->num_queue_pairs) { + i40e_set_itr_per_queue(vsi, ec, queue); } else { - netif_info(pf, drv, netdev, - "Invalid value, tx-usecs range is 0-8160\n"); + netif_info(pf, drv, netdev, "Invalid queue value, queue range is 0 - %d\n", + vsi->num_queue_pairs - 1); return -EINVAL; } - if (ec->use_adaptive_rx_coalesce) - vsi->rx_itr_setting |= I40E_ITR_DYNAMIC; - else - vsi->rx_itr_setting &= ~I40E_ITR_DYNAMIC; - - if (ec->use_adaptive_tx_coalesce) - vsi->tx_itr_setting |= I40E_ITR_DYNAMIC; - else - vsi->tx_itr_setting &= ~I40E_ITR_DYNAMIC; - - for (i = 0; i < vsi->num_q_vectors; i++, vector++) { - u16 intrl = INTRL_USEC_TO_REG(vsi->int_rate_limit); + return 0; +} - q_vector = vsi->q_vectors[i]; - q_vector->rx.itr = ITR_TO_REG(vsi->rx_itr_setting); - wr32(hw, I40E_PFINT_ITRN(0, vector - 1), q_vector->rx.itr); - q_vector->tx.itr = ITR_TO_REG(vsi->tx_itr_setting); - wr32(hw, I40E_PFINT_ITRN(1, vector - 1), q_vector->tx.itr); - wr32(hw, I40E_PFINT_RATEN(vector - 1), intrl); - i40e_flush(hw); - } +static int i40e_set_coalesce(struct net_device *netdev, + struct ethtool_coalesce *ec) +{ + return __i40e_set_coalesce(netdev, ec, -1); +} - return 0; +static int i40e_set_per_queue_coalesce(struct net_device *netdev, u32 queue, + struct ethtool_coalesce *ec) +{ + return __i40e_set_coalesce(netdev, ec, queue); } /** @@ -2147,8 +2236,8 @@ static int i40e_get_rxnfc(struct net_device *netdev, struct ethtool_rxnfc *cmd, static int i40e_set_rss_hash_opt(struct i40e_pf *pf, struct ethtool_rxnfc *nfc) { struct i40e_hw *hw = &pf->hw; - u64 hena = (u64)rd32(hw, I40E_PFQF_HENA(0)) | - ((u64)rd32(hw, I40E_PFQF_HENA(1)) << 32); + u64 hena = (u64)i40e_read_rx_ctl(hw, I40E_PFQF_HENA(0)) | + ((u64)i40e_read_rx_ctl(hw, I40E_PFQF_HENA(1)) << 32); /* RSS does not support anything other than hashing * to queues on src and dst IPs and ports @@ -2166,9 +2255,12 @@ static int i40e_set_rss_hash_opt(struct i40e_pf *pf, struct ethtool_rxnfc *nfc) case TCP_V4_FLOW: switch (nfc->data & (RXH_L4_B_0_1 | RXH_L4_B_2_3)) { case 0: - hena &= ~BIT_ULL(I40E_FILTER_PCTYPE_NONF_IPV4_TCP); - break; + return -EINVAL; case (RXH_L4_B_0_1 | RXH_L4_B_2_3): + if (pf->flags & I40E_FLAG_MULTIPLE_TCP_UDP_RSS_PCTYPE) + hena |= + BIT_ULL(I40E_FILTER_PCTYPE_NONF_IPV4_TCP_SYN_NO_ACK); + hena |= BIT_ULL(I40E_FILTER_PCTYPE_NONF_IPV4_TCP); break; default: @@ -2178,9 +2270,12 @@ static int i40e_set_rss_hash_opt(struct i40e_pf *pf, struct ethtool_rxnfc *nfc) case TCP_V6_FLOW: switch (nfc->data & (RXH_L4_B_0_1 | RXH_L4_B_2_3)) { case 0: - hena &= ~BIT_ULL(I40E_FILTER_PCTYPE_NONF_IPV6_TCP); - break; + return -EINVAL; case (RXH_L4_B_0_1 | RXH_L4_B_2_3): + if (pf->flags & I40E_FLAG_MULTIPLE_TCP_UDP_RSS_PCTYPE) + hena |= + BIT_ULL(I40E_FILTER_PCTYPE_NONF_IPV6_TCP_SYN_NO_ACK); + hena |= BIT_ULL(I40E_FILTER_PCTYPE_NONF_IPV6_TCP); break; default: @@ -2190,10 +2285,13 @@ static int i40e_set_rss_hash_opt(struct i40e_pf *pf, struct ethtool_rxnfc *nfc) case UDP_V4_FLOW: switch (nfc->data & (RXH_L4_B_0_1 | RXH_L4_B_2_3)) { case 0: - hena &= ~(BIT_ULL(I40E_FILTER_PCTYPE_NONF_IPV4_UDP) | - BIT_ULL(I40E_FILTER_PCTYPE_FRAG_IPV4)); - break; + return -EINVAL; case (RXH_L4_B_0_1 | RXH_L4_B_2_3): + if (pf->flags & I40E_FLAG_MULTIPLE_TCP_UDP_RSS_PCTYPE) + hena |= + BIT_ULL(I40E_FILTER_PCTYPE_NONF_UNICAST_IPV4_UDP) | + BIT_ULL(I40E_FILTER_PCTYPE_NONF_MULTICAST_IPV4_UDP); + hena |= (BIT_ULL(I40E_FILTER_PCTYPE_NONF_IPV4_UDP) | BIT_ULL(I40E_FILTER_PCTYPE_FRAG_IPV4)); break; @@ -2204,10 +2302,13 @@ static int i40e_set_rss_hash_opt(struct i40e_pf *pf, struct ethtool_rxnfc *nfc) case UDP_V6_FLOW: switch (nfc->data & (RXH_L4_B_0_1 | RXH_L4_B_2_3)) { case 0: - hena &= ~(BIT_ULL(I40E_FILTER_PCTYPE_NONF_IPV6_UDP) | - BIT_ULL(I40E_FILTER_PCTYPE_FRAG_IPV6)); - break; + return -EINVAL; case (RXH_L4_B_0_1 | RXH_L4_B_2_3): + if (pf->flags & I40E_FLAG_MULTIPLE_TCP_UDP_RSS_PCTYPE) + hena |= + BIT_ULL(I40E_FILTER_PCTYPE_NONF_UNICAST_IPV6_UDP) | + BIT_ULL(I40E_FILTER_PCTYPE_NONF_MULTICAST_IPV6_UDP); + hena |= (BIT_ULL(I40E_FILTER_PCTYPE_NONF_IPV6_UDP) | BIT_ULL(I40E_FILTER_PCTYPE_FRAG_IPV6)); break; @@ -2245,8 +2346,8 @@ static int i40e_set_rss_hash_opt(struct i40e_pf *pf, struct ethtool_rxnfc *nfc) return -EINVAL; } - wr32(hw, I40E_PFQF_HENA(0), (u32)hena); - wr32(hw, I40E_PFQF_HENA(1), (u32)(hena >> 32)); + i40e_write_rx_ctl(hw, I40E_PFQF_HENA(0), (u32)hena); + i40e_write_rx_ctl(hw, I40E_PFQF_HENA(1), (u32)(hena >> 32)); i40e_flush(hw); /* Save setting for future output/update */ @@ -2712,6 +2813,8 @@ static u32 i40e_get_priv_flags(struct net_device *dev) I40E_PRIV_FLAGS_VEB_STATS : 0; ret_flags |= pf->flags & I40E_FLAG_RX_PS_ENABLED ? I40E_PRIV_FLAGS_PS : 0; + ret_flags |= pf->auto_disable_flags & I40E_FLAG_HW_ATR_EVICT_CAPABLE ? + 0 : I40E_PRIV_FLAGS_HW_ATR_EVICT; return ret_flags; } @@ -2763,10 +2866,21 @@ static int i40e_set_priv_flags(struct net_device *dev, u32 flags) pf->auto_disable_flags |= I40E_FLAG_FD_ATR_ENABLED; } - if (flags & I40E_PRIV_FLAGS_VEB_STATS) + if ((flags & I40E_PRIV_FLAGS_VEB_STATS) && + !(pf->flags & I40E_FLAG_VEB_STATS_ENABLED)) { pf->flags |= I40E_FLAG_VEB_STATS_ENABLED; - else + reset_required = true; + } else if (!(flags & I40E_PRIV_FLAGS_VEB_STATS) && + (pf->flags & I40E_FLAG_VEB_STATS_ENABLED)) { pf->flags &= ~I40E_FLAG_VEB_STATS_ENABLED; + reset_required = true; + } + + if ((flags & I40E_PRIV_FLAGS_HW_ATR_EVICT) && + (pf->flags & I40E_FLAG_HW_ATR_EVICT_CAPABLE)) + pf->auto_disable_flags &= ~I40E_FLAG_HW_ATR_EVICT_CAPABLE; + else + pf->auto_disable_flags |= I40E_FLAG_HW_ATR_EVICT_CAPABLE; /* if needed, issue reset to cause things to take effect */ if (reset_required) @@ -2812,6 +2926,8 @@ static const struct ethtool_ops i40e_ethtool_ops = { .get_ts_info = i40e_get_ts_info, .get_priv_flags = i40e_get_priv_flags, .set_priv_flags = i40e_set_priv_flags, + .get_per_queue_coalesce = i40e_get_per_queue_coalesce, + .set_per_queue_coalesce = i40e_set_per_queue_coalesce, }; void i40e_set_ethtool_ops(struct net_device *netdev) diff --git a/drivers/net/ethernet/intel/i40e/i40e_fcoe.c b/drivers/net/ethernet/intel/i40e/i40e_fcoe.c index 579a46ca8..8ad162c16 100644 --- a/drivers/net/ethernet/intel/i40e/i40e_fcoe.c +++ b/drivers/net/ethernet/intel/i40e/i40e_fcoe.c @@ -295,11 +295,11 @@ void i40e_init_pf_fcoe(struct i40e_pf *pf) } /* enable FCoE hash filter */ - val = rd32(hw, I40E_PFQF_HENA(1)); + val = i40e_read_rx_ctl(hw, I40E_PFQF_HENA(1)); val |= BIT(I40E_FILTER_PCTYPE_FCOE_OX - 32); val |= BIT(I40E_FILTER_PCTYPE_FCOE_RX - 32); val &= I40E_PFQF_HENA_PTYPE_ENA_MASK; - wr32(hw, I40E_PFQF_HENA(1), val); + i40e_write_rx_ctl(hw, I40E_PFQF_HENA(1), val); /* enable flag */ pf->flags |= I40E_FLAG_FCOE_ENABLED; @@ -317,11 +317,11 @@ void i40e_init_pf_fcoe(struct i40e_pf *pf) pf->filter_settings.fcoe_cntx_num = I40E_DMA_CNTX_SIZE_4K; /* Setup max frame with FCoE_MTU plus L2 overheads */ - val = rd32(hw, I40E_GLFCOE_RCTL); + val = i40e_read_rx_ctl(hw, I40E_GLFCOE_RCTL); val &= ~I40E_GLFCOE_RCTL_MAX_SIZE_MASK; val |= ((FCOE_MTU + ETH_HLEN + VLAN_HLEN + ETH_FCS_LEN) << I40E_GLFCOE_RCTL_MAX_SIZE_SHIFT); - wr32(hw, I40E_GLFCOE_RCTL, val); + i40e_write_rx_ctl(hw, I40E_GLFCOE_RCTL, val); dev_info(&pf->pdev->dev, "FCoE is supported.\n"); } @@ -1359,16 +1359,32 @@ static netdev_tx_t i40e_fcoe_xmit_frame(struct sk_buff *skb, struct i40e_ring *tx_ring = vsi->tx_rings[skb->queue_mapping]; struct i40e_tx_buffer *first; u32 tx_flags = 0; + int fso, count; u8 hdr_len = 0; u8 sof = 0; u8 eof = 0; - int fso; if (i40e_fcoe_set_skb_header(skb)) goto out_drop; - if (!i40e_xmit_descriptor_count(skb, tx_ring)) + count = i40e_xmit_descriptor_count(skb); + if (i40e_chk_linearize(skb, count)) { + if (__skb_linearize(skb)) + goto out_drop; + count = TXD_USE_COUNT(skb->len); + tx_ring->tx_stats.tx_linearize++; + } + + /* need: 1 descriptor per page * PAGE_SIZE/I40E_MAX_DATA_PER_TXD, + * + 1 desc for skb_head_len/I40E_MAX_DATA_PER_TXD, + * + 4 desc gap to avoid the cache line where head is, + * + 1 desc for context descriptor, + * otherwise try next time + */ + if (i40e_maybe_stop_tx(tx_ring, count + 4 + 1)) { + tx_ring->tx_stats.tx_busy++; return NETDEV_TX_BUSY; + } /* prepare the xmit flags */ if (i40e_tx_prepare_vlan_flags(skb, tx_ring, &tx_flags)) @@ -1457,7 +1473,7 @@ static const struct net_device_ops i40e_fcoe_netdev_ops = { .ndo_tx_timeout = i40e_tx_timeout, .ndo_vlan_rx_add_vid = i40e_vlan_rx_add_vid, .ndo_vlan_rx_kill_vid = i40e_vlan_rx_kill_vid, - .ndo_setup_tc = i40e_setup_tc, + .ndo_setup_tc = __i40e_setup_tc, #ifdef CONFIG_NET_POLL_CONTROLLER .ndo_poll_controller = i40e_netpoll, diff --git a/drivers/net/ethernet/intel/i40e/i40e_main.c b/drivers/net/ethernet/intel/i40e/i40e_main.c index 8f3b53e0d..344912957 100644 --- a/drivers/net/ethernet/intel/i40e/i40e_main.c +++ b/drivers/net/ethernet/intel/i40e/i40e_main.c @@ -1,7 +1,7 @@ /******************************************************************************* * * Intel Ethernet Controller XL710 Family Linux Driver - * Copyright(c) 2013 - 2015 Intel Corporation. + * Copyright(c) 2013 - 2016 Intel Corporation. * * This program is free software; you can redistribute it and/or modify it * under the terms and conditions of the GNU General Public License, @@ -28,11 +28,6 @@ #include <linux/of_net.h> #include <linux/pci.h> -#ifdef CONFIG_SPARC -#include <asm/idprom.h> -#include <asm/prom.h> -#endif - /* Local includes */ #include "i40e.h" #include "i40e_diag.h" @@ -51,7 +46,7 @@ static const char i40e_driver_string[] = #define DRV_VERSION_MAJOR 1 #define DRV_VERSION_MINOR 4 -#define DRV_VERSION_BUILD 8 +#define DRV_VERSION_BUILD 25 #define DRV_VERSION __stringify(DRV_VERSION_MAJOR) "." \ __stringify(DRV_VERSION_MINOR) "." \ __stringify(DRV_VERSION_BUILD) DRV_KERN @@ -90,6 +85,8 @@ static const struct pci_device_id i40e_pci_tbl[] = { {PCI_VDEVICE(INTEL, I40E_DEV_ID_10G_BASE_T), 0}, {PCI_VDEVICE(INTEL, I40E_DEV_ID_10G_BASE_T4), 0}, {PCI_VDEVICE(INTEL, I40E_DEV_ID_20G_KR2), 0}, + {PCI_VDEVICE(INTEL, I40E_DEV_ID_KX_X722), 0}, + {PCI_VDEVICE(INTEL, I40E_DEV_ID_QSFP_X722), 0}, {PCI_VDEVICE(INTEL, I40E_DEV_ID_SFP_X722), 0}, {PCI_VDEVICE(INTEL, I40E_DEV_ID_1G_BASE_T_X722), 0}, {PCI_VDEVICE(INTEL, I40E_DEV_ID_10G_BASE_T_X722), 0}, @@ -110,6 +107,8 @@ MODULE_DESCRIPTION("Intel(R) Ethernet Connection XL710 Network Driver"); MODULE_LICENSE("GPL"); MODULE_VERSION(DRV_VERSION); +static struct workqueue_struct *i40e_wq; + /** * i40e_allocate_dma_mem_d - OS specific memory alloc for shared code * @hw: pointer to the HW structure @@ -290,12 +289,12 @@ struct i40e_vsi *i40e_find_vsi_from_id(struct i40e_pf *pf, u16 id) * * If not already scheduled, this puts the task into the work queue **/ -static void i40e_service_event_schedule(struct i40e_pf *pf) +void i40e_service_event_schedule(struct i40e_pf *pf) { if (!test_bit(__I40E_DOWN, &pf->state) && !test_bit(__I40E_RESET_RECOVERY_PENDING, &pf->state) && !test_and_set_bit(__I40E_SERVICE_SCHED, &pf->state)) - schedule_work(&pf->service_task); + queue_work(i40e_wq, &pf->service_task); } /** @@ -769,7 +768,7 @@ static void i40e_update_fcoe_stats(struct i40e_vsi *vsi) if (vsi->type != I40E_VSI_FCOE) return; - idx = (pf->pf_seid - I40E_BASE_PF_SEID) + I40E_FCOE_PF_STAT_OFFSET; + idx = hw->pf_id + I40E_FCOE_PF_STAT_OFFSET; fs = &vsi->fcoe_stats; ofs = &vsi->fcoe_stats_offsets; @@ -820,6 +819,7 @@ static void i40e_update_vsi_stats(struct i40e_vsi *vsi) struct i40e_eth_stats *oes; struct i40e_eth_stats *es; /* device's eth stats */ u32 tx_restart, tx_busy; + u64 tx_lost_interrupt; struct i40e_ring *p; u32 rx_page, rx_buf; u64 bytes, packets; @@ -845,6 +845,7 @@ static void i40e_update_vsi_stats(struct i40e_vsi *vsi) rx_b = rx_p = 0; tx_b = tx_p = 0; tx_restart = tx_busy = tx_linearize = tx_force_wb = 0; + tx_lost_interrupt = 0; rx_page = 0; rx_buf = 0; rcu_read_lock(); @@ -863,6 +864,7 @@ static void i40e_update_vsi_stats(struct i40e_vsi *vsi) tx_busy += p->tx_stats.tx_busy; tx_linearize += p->tx_stats.tx_linearize; tx_force_wb += p->tx_stats.tx_force_wb; + tx_lost_interrupt += p->tx_stats.tx_lost_interrupt; /* Rx queue is part of the same block as Tx queue */ p = &p[1]; @@ -881,6 +883,7 @@ static void i40e_update_vsi_stats(struct i40e_vsi *vsi) vsi->tx_busy = tx_busy; vsi->tx_linearize = tx_linearize; vsi->tx_force_wb = tx_force_wb; + vsi->tx_lost_interrupt = tx_lost_interrupt; vsi->rx_page_failed = rx_page; vsi->rx_buf_failed = rx_buf; @@ -1368,7 +1371,7 @@ struct i40e_mac_filter *i40e_add_filter(struct i40e_vsi *vsi, f->changed = true; INIT_LIST_HEAD(&f->list); - list_add(&f->list, &vsi->mac_filter_list); + list_add_tail(&f->list, &vsi->mac_filter_list); } /* increment counter and add a new flag if needed */ @@ -1538,7 +1541,11 @@ static int i40e_set_mac(struct net_device *netdev, void *p) ether_addr_copy(netdev->dev_addr, addr->sa_data); - return i40e_sync_vsi_filters(vsi); + /* schedule our worker thread which will take care of + * applying the new filter changes + */ + i40e_service_event_schedule(vsi->back); + return 0; } /** @@ -1762,6 +1769,11 @@ bottom_of_search_loop: vsi->flags |= I40E_VSI_FLAG_FILTER_CHANGED; vsi->back->flags |= I40E_FLAG_FILTER_SYNC; } + + /* schedule our worker thread which will take care of + * applying the new filter changes + */ + i40e_service_event_schedule(vsi->back); } /** @@ -1933,7 +1945,7 @@ int i40e_sync_vsi_filters(struct i40e_vsi *vsi) sizeof(struct i40e_aqc_remove_macvlan_element_data); del_list_size = filter_list_len * sizeof(struct i40e_aqc_remove_macvlan_element_data); - del_list = kzalloc(del_list_size, GFP_KERNEL); + del_list = kzalloc(del_list_size, GFP_ATOMIC); if (!del_list) { i40e_cleanup_add_list(&tmp_add_list); @@ -2011,7 +2023,7 @@ int i40e_sync_vsi_filters(struct i40e_vsi *vsi) sizeof(struct i40e_aqc_add_macvlan_element_data), add_list_size = filter_list_len * sizeof(struct i40e_aqc_add_macvlan_element_data); - add_list = kzalloc(add_list_size, GFP_KERNEL); + add_list = kzalloc(add_list_size, GFP_ATOMIC); if (!add_list) { /* Purge element from temporary lists */ i40e_cleanup_add_list(&tmp_add_list); @@ -2110,7 +2122,9 @@ int i40e_sync_vsi_filters(struct i40e_vsi *vsi) cur_promisc = (!!(vsi->current_netdev_flags & IFF_PROMISC) || test_bit(__I40E_FILTER_OVERFLOW_PROMISC, &vsi->state)); - if (vsi->type == I40E_VSI_MAIN && pf->lan_veb != I40E_NO_VEB) { + if ((vsi->type == I40E_VSI_MAIN) && + (pf->lan_veb != I40E_NO_VEB) && + !(pf->flags & I40E_FLAG_MFP_ENABLED)) { /* set defport ON for Main VSI instead of true promisc * this way we will get all unicast/multicast and VLAN * promisc behavior but will not get VF or VMDq traffic @@ -2160,6 +2174,10 @@ int i40e_sync_vsi_filters(struct i40e_vsi *vsi) } } out: + /* if something went wrong then set the changed flag so we try again */ + if (retval) + vsi->flags |= I40E_VSI_FLAG_FILTER_CHANGED; + clear_bit(__I40E_CONFIG_BUSY, &vsi->state); return retval; } @@ -2212,7 +2230,7 @@ static int i40e_change_mtu(struct net_device *netdev, int new_mtu) netdev->mtu = new_mtu; if (netif_running(netdev)) i40e_vsi_reinit_locked(vsi); - + i40e_notify_client_of_l2_param_changes(vsi); return 0; } @@ -3106,11 +3124,11 @@ static void i40e_vsi_configure_msix(struct i40e_vsi *vsi) struct i40e_q_vector *q_vector = vsi->q_vectors[i]; q_vector->itr_countdown = ITR_COUNTDOWN_START; - q_vector->rx.itr = ITR_TO_REG(vsi->rx_itr_setting); + q_vector->rx.itr = ITR_TO_REG(vsi->rx_rings[i]->rx_itr_setting); q_vector->rx.latency_range = I40E_LOW_LATENCY; wr32(hw, I40E_PFINT_ITRN(I40E_RX_ITR, vector - 1), q_vector->rx.itr); - q_vector->tx.itr = ITR_TO_REG(vsi->tx_itr_setting); + q_vector->tx.itr = ITR_TO_REG(vsi->tx_rings[i]->tx_itr_setting); q_vector->tx.latency_range = I40E_LOW_LATENCY; wr32(hw, I40E_PFINT_ITRN(I40E_TX_ITR, vector - 1), q_vector->tx.itr); @@ -3202,10 +3220,10 @@ static void i40e_configure_msi_and_legacy(struct i40e_vsi *vsi) /* set the ITR configuration */ q_vector->itr_countdown = ITR_COUNTDOWN_START; - q_vector->rx.itr = ITR_TO_REG(vsi->rx_itr_setting); + q_vector->rx.itr = ITR_TO_REG(vsi->rx_rings[0]->rx_itr_setting); q_vector->rx.latency_range = I40E_LOW_LATENCY; wr32(hw, I40E_PFINT_ITR0(I40E_RX_ITR), q_vector->rx.itr); - q_vector->tx.itr = ITR_TO_REG(vsi->tx_itr_setting); + q_vector->tx.itr = ITR_TO_REG(vsi->tx_rings[0]->tx_itr_setting); q_vector->tx.latency_range = I40E_LOW_LATENCY; wr32(hw, I40E_PFINT_ITR0(I40E_TX_ITR), q_vector->tx.itr); @@ -3245,36 +3263,21 @@ void i40e_irq_dynamic_disable_icr0(struct i40e_pf *pf) /** * i40e_irq_dynamic_enable_icr0 - Enable default interrupt generation for icr0 * @pf: board private structure + * @clearpba: true when all pending interrupt events should be cleared **/ -void i40e_irq_dynamic_enable_icr0(struct i40e_pf *pf) +void i40e_irq_dynamic_enable_icr0(struct i40e_pf *pf, bool clearpba) { struct i40e_hw *hw = &pf->hw; u32 val; val = I40E_PFINT_DYN_CTL0_INTENA_MASK | - I40E_PFINT_DYN_CTL0_CLEARPBA_MASK | + (clearpba ? I40E_PFINT_DYN_CTL0_CLEARPBA_MASK : 0) | (I40E_ITR_NONE << I40E_PFINT_DYN_CTL0_ITR_INDX_SHIFT); wr32(hw, I40E_PFINT_DYN_CTL0, val); i40e_flush(hw); } -/** - * i40e_irq_dynamic_disable - Disable default interrupt generation settings - * @vsi: pointer to a vsi - * @vector: disable a particular Hw Interrupt vector - **/ -void i40e_irq_dynamic_disable(struct i40e_vsi *vsi, int vector) -{ - struct i40e_pf *pf = vsi->back; - struct i40e_hw *hw = &pf->hw; - u32 val; - - val = I40E_ITR_NONE << I40E_PFINT_DYN_CTLN_ITR_INDX_SHIFT; - wr32(hw, I40E_PFINT_DYN_CTLN(vector - 1), val); - i40e_flush(hw); -} - /** * i40e_msix_clean_rings - MSIX mode Interrupt Handler * @irq: interrupt number @@ -3400,7 +3403,7 @@ static int i40e_vsi_enable_irq(struct i40e_vsi *vsi) for (i = 0; i < vsi->num_q_vectors; i++) i40e_irq_dynamic_enable(vsi, i); } else { - i40e_irq_dynamic_enable_icr0(pf); + i40e_irq_dynamic_enable_icr0(pf, true); } i40e_flush(&pf->hw); @@ -3459,16 +3462,12 @@ static irqreturn_t i40e_intr(int irq, void *data) struct i40e_vsi *vsi = pf->vsi[pf->lan_vsi]; struct i40e_q_vector *q_vector = vsi->q_vectors[0]; - /* temporarily disable queue cause for NAPI processing */ - u32 qval = rd32(hw, I40E_QINT_RQCTL(0)); - - qval &= ~I40E_QINT_RQCTL_CAUSE_ENA_MASK; - wr32(hw, I40E_QINT_RQCTL(0), qval); - - qval = rd32(hw, I40E_QINT_TQCTL(0)); - qval &= ~I40E_QINT_TQCTL_CAUSE_ENA_MASK; - wr32(hw, I40E_QINT_TQCTL(0), qval); - + /* We do not have a way to disarm Queue causes while leaving + * interrupt enabled for all other causes, ideally + * interrupt should be disabled while we are in NAPI but + * this is not a performance path and napi_schedule() + * can deal with rescheduling. + */ if (!test_bit(__I40E_DOWN, &pf->state)) napi_schedule_irqoff(&q_vector->napi); } @@ -3476,6 +3475,7 @@ static irqreturn_t i40e_intr(int irq, void *data) if (icr0 & I40E_PFINT_ICR0_ADMINQ_MASK) { ena_mask &= ~I40E_PFINT_ICR0_ENA_ADMINQ_MASK; set_bit(__I40E_ADMINQ_EVENT_PENDING, &pf->state); + i40e_debug(&pf->hw, I40E_DEBUG_NVM, "AdminQ event\n"); } if (icr0 & I40E_PFINT_ICR0_MAL_DETECT_MASK) { @@ -3546,7 +3546,7 @@ enable_intr: wr32(hw, I40E_PFINT_ICR0_ENA, ena_mask); if (!test_bit(__I40E_DOWN, &pf->state)) { i40e_service_event_schedule(pf); - i40e_irq_dynamic_enable_icr0(pf); + i40e_irq_dynamic_enable_icr0(pf, false); } return ret; @@ -3750,7 +3750,7 @@ static int i40e_vsi_request_irq(struct i40e_vsi *vsi, char *basename) #ifdef CONFIG_NET_POLL_CONTROLLER /** - * i40e_netpoll - A Polling 'interrupt'handler + * i40e_netpoll - A Polling 'interrupt' handler * @netdev: network interface device structure * * This is used by netconsole to send skbs without having to re-enable @@ -3929,6 +3929,9 @@ static int i40e_vsi_control_rx(struct i40e_vsi *vsi, bool enable) else rx_reg &= ~I40E_QRX_ENA_QENA_REQ_MASK; wr32(hw, I40E_QRX_ENA(pf_q), rx_reg); + /* No waiting for the Tx queue to disable */ + if (!enable && test_bit(__I40E_PORT_TX_SUSPENDED, &pf->state)) + continue; /* wait for the change to finish */ ret = i40e_pf_rxq_wait(pf, pf_q, enable); @@ -4166,6 +4169,9 @@ static void i40e_clear_interrupt_scheme(struct i40e_pf *pf) free_irq(pf->msix_entries[0].vector, pf); } + i40e_put_lump(pf->irq_pile, pf->iwarp_base_vector, + I40E_IWARP_IRQ_PILE_ID); + i40e_put_lump(pf->irq_pile, 0, I40E_PILE_VALID_BIT-1); for (i = 0; i < pf->num_alloc_vsi; i++) if (pf->vsi[i]) @@ -4209,12 +4215,17 @@ static void i40e_napi_disable_all(struct i40e_vsi *vsi) **/ static void i40e_vsi_close(struct i40e_vsi *vsi) { + bool reset = false; + if (!test_and_set_bit(__I40E_DOWN, &vsi->state)) i40e_down(vsi); i40e_vsi_free_irq(vsi); i40e_vsi_free_tx_resources(vsi); i40e_vsi_free_rx_resources(vsi); vsi->current_netdev_flags = 0; + if (test_bit(__I40E_RESET_RECOVERY_PENDING, &vsi->back->state)) + reset = true; + i40e_notify_client_of_netdev_close(vsi, reset); } /** @@ -4287,12 +4298,12 @@ static void i40e_pf_unquiesce_all_vsi(struct i40e_pf *pf) #ifdef CONFIG_I40E_DCB /** - * i40e_vsi_wait_txq_disabled - Wait for VSI's queues to be disabled + * i40e_vsi_wait_queues_disabled - Wait for VSI's queues to be disabled * @vsi: the VSI being configured * - * This function waits for the given VSI's Tx queues to be disabled. + * This function waits for the given VSI's queues to be disabled. **/ -static int i40e_vsi_wait_txq_disabled(struct i40e_vsi *vsi) +static int i40e_vsi_wait_queues_disabled(struct i40e_vsi *vsi) { struct i40e_pf *pf = vsi->back; int i, pf_q, ret; @@ -4309,24 +4320,36 @@ static int i40e_vsi_wait_txq_disabled(struct i40e_vsi *vsi) } } + pf_q = vsi->base_queue; + for (i = 0; i < vsi->num_queue_pairs; i++, pf_q++) { + /* Check and wait for the disable status of the queue */ + ret = i40e_pf_rxq_wait(pf, pf_q, false); + if (ret) { + dev_info(&pf->pdev->dev, + "VSI seid %d Rx ring %d disable timeout\n", + vsi->seid, pf_q); + return ret; + } + } + return 0; } /** - * i40e_pf_wait_txq_disabled - Wait for all queues of PF VSIs to be disabled + * i40e_pf_wait_queues_disabled - Wait for all queues of PF VSIs to be disabled * @pf: the PF * - * This function waits for the Tx queues to be in disabled state for all the + * This function waits for the queues to be in disabled state for all the * VSIs that are managed by this PF. **/ -static int i40e_pf_wait_txq_disabled(struct i40e_pf *pf) +static int i40e_pf_wait_queues_disabled(struct i40e_pf *pf) { int v, ret = 0; for (v = 0; v < pf->hw.func_caps.num_vsis; v++) { /* No need to wait for FCoE VSI queues */ if (pf->vsi[v] && pf->vsi[v]->type != I40E_VSI_FCOE) { - ret = i40e_vsi_wait_txq_disabled(pf->vsi[v]); + ret = i40e_vsi_wait_queues_disabled(pf->vsi[v]); if (ret) break; } @@ -4352,7 +4375,7 @@ static void i40e_detect_recover_hung_queue(int q_idx, struct i40e_vsi *vsi) { struct i40e_ring *tx_ring = NULL; struct i40e_pf *pf; - u32 head, val, tx_pending; + u32 head, val, tx_pending_hw; int i; pf = vsi->back; @@ -4378,16 +4401,9 @@ static void i40e_detect_recover_hung_queue(int q_idx, struct i40e_vsi *vsi) else val = rd32(&pf->hw, I40E_PFINT_DYN_CTL0); - /* Bail out if interrupts are disabled because napi_poll - * execution in-progress or will get scheduled soon. - * napi_poll cleans TX and RX queues and updates 'next_to_clean'. - */ - if (!(val & I40E_PFINT_DYN_CTLN_INTENA_MASK)) - return; - head = i40e_get_head(tx_ring); - tx_pending = i40e_get_tx_pending(tx_ring); + tx_pending_hw = i40e_get_tx_pending(tx_ring, false); /* HW is done executing descriptors, updated HEAD write back, * but SW hasn't processed those descriptors. If interrupt is @@ -4395,12 +4411,12 @@ static void i40e_detect_recover_hung_queue(int q_idx, struct i40e_vsi *vsi) * dev_watchdog detecting timeout on those netdev_queue, * hence proactively trigger SW interrupt. */ - if (tx_pending) { + if (tx_pending_hw && (!(val & I40E_PFINT_DYN_CTLN_INTENA_MASK))) { /* NAPI Poll didn't run and clear since it was set */ if (test_and_clear_bit(I40E_Q_VECTOR_HUNG_DETECT, &tx_ring->q_vector->hung_detected)) { - netdev_info(vsi->netdev, "VSI_seid %d, Hung TX queue %d, tx_pending: %d, NTC:0x%x, HWB: 0x%x, NTU: 0x%x, TAIL: 0x%x\n", - vsi->seid, q_idx, tx_pending, + netdev_info(vsi->netdev, "VSI_seid %d, Hung TX queue %d, tx_pending_hw: %d, NTC:0x%x, HWB: 0x%x, NTU: 0x%x, TAIL: 0x%x\n", + vsi->seid, q_idx, tx_pending_hw, tx_ring->next_to_clean, head, tx_ring->next_to_use, readl(tx_ring->tail)); @@ -4413,6 +4429,17 @@ static void i40e_detect_recover_hung_queue(int q_idx, struct i40e_vsi *vsi) &tx_ring->q_vector->hung_detected); } } + + /* This is the case where we have interrupts missing, + * so the tx_pending in HW will most likely be 0, but we + * will have tx_pending in SW since the WB happened but the + * interrupt got lost. + */ + if ((!tx_pending_hw) && i40e_get_tx_pending(tx_ring, true) && + (!(val & I40E_PFINT_DYN_CTLN_INTENA_MASK))) { + if (napi_reschedule(&tx_ring->q_vector->napi)) + tx_ring->tx_stats.tx_lost_interrupt++; + } } /** @@ -4831,6 +4858,12 @@ static int i40e_vsi_config_tc(struct i40e_vsi *vsi, u8 enabled_tc) ctxt.info = vsi->info; i40e_vsi_setup_queue_map(vsi, &ctxt, enabled_tc, false); + if (vsi->back->flags & I40E_FLAG_IWARP_ENABLED) { + ctxt.info.valid_sections |= + cpu_to_le16(I40E_AQ_VSI_PROP_QUEUE_OPT_VALID); + ctxt.info.queueing_opt_flags |= I40E_AQ_VSI_QUE_OPT_TCP_ENA; + } + /* Update the VSI after updating the VSI queue-mapping information */ ret = i40e_aq_update_vsi_params(&vsi->back->hw, &ctxt, NULL); if (ret) { @@ -4974,6 +5007,7 @@ static void i40e_dcb_reconfigure(struct i40e_pf *pf) if (pf->vsi[v]->netdev) i40e_dcbnl_set_all(pf->vsi[v]); } + i40e_notify_client_of_l2_param_changes(pf->vsi[v]); } } @@ -5016,8 +5050,7 @@ static int i40e_init_pf_dcb(struct i40e_pf *pf) int err = 0; /* Do not enable DCB for SW1 and SW2 images even if the FW is capable */ - if (((pf->hw.aq.fw_maj_ver == 4) && (pf->hw.aq.fw_min_ver < 33)) || - (pf->hw.aq.fw_maj_ver < 4)) + if (pf->flags & I40E_FLAG_NO_DCB_SUPPORT) goto out; /* Get the initial DCB configuration */ @@ -5173,6 +5206,11 @@ static int i40e_up_complete(struct i40e_vsi *vsi) } i40e_fdir_filter_restore(vsi); } + + /* On the next run of the service_task, notify any clients of the new + * opened netdev + */ + pf->flags |= I40E_FLAG_SERVICE_CLIENT_REQUESTED; i40e_service_event_schedule(pf); return 0; @@ -5249,11 +5287,7 @@ void i40e_down(struct i40e_vsi *vsi) * @netdev: net device to configure * @tc: number of traffic classes to enable **/ -#ifdef I40E_FCOE -int i40e_setup_tc(struct net_device *netdev, u8 tc) -#else static int i40e_setup_tc(struct net_device *netdev, u8 tc) -#endif { struct i40e_netdev_priv *np = netdev_priv(netdev); struct i40e_vsi *vsi = np->vsi; @@ -5306,6 +5340,19 @@ exit: return ret; } +#ifdef I40E_FCOE +int __i40e_setup_tc(struct net_device *netdev, u32 handle, __be16 proto, + struct tc_to_netdev *tc) +#else +static int __i40e_setup_tc(struct net_device *netdev, u32 handle, __be16 proto, + struct tc_to_netdev *tc) +#endif +{ + if (handle != TC_H_ROOT || tc->type != TC_SETUP_MQPRIO) + return -EINVAL; + return i40e_setup_tc(netdev, tc->tc); +} + /** * i40e_open - Called when a network interface is made active * @netdev: network interface device structure @@ -5348,9 +5395,12 @@ int i40e_open(struct net_device *netdev) vxlan_get_rx_port(netdev); #endif #ifdef CONFIG_I40E_GENEVE - geneve_get_rx_port(netdev); + if (pf->flags & I40E_FLAG_GENEVE_OFFLOAD_CAPABLE) + geneve_get_rx_port(netdev); #endif + i40e_notify_client_of_netdev_open(vsi); + return 0; } @@ -5713,8 +5763,8 @@ static int i40e_handle_lldp_event(struct i40e_pf *pf, if (ret) goto exit; - /* Wait for the PF's Tx queues to be disabled */ - ret = i40e_pf_wait_txq_disabled(pf); + /* Wait for the PF's queues to be disabled */ + ret = i40e_pf_wait_queues_disabled(pf); if (ret) { /* Schedule PF reset to recover */ set_bit(__I40E_PF_RESET_REQUESTED, &pf->state); @@ -6015,6 +6065,7 @@ static void i40e_vsi_link_event(struct i40e_vsi *vsi, bool link_up) case I40E_VSI_SRIOV: case I40E_VSI_VMDQ2: case I40E_VSI_CTRL: + case I40E_VSI_IWARP: case I40E_VSI_MIRROR: default: /* there is no notification for other VSIs */ @@ -6244,6 +6295,7 @@ static void i40e_clean_adminq_subtask(struct i40e_pf *pf) if (hw->debug_mask & I40E_DEBUG_AQ) dev_info(&pf->pdev->dev, "ARQ Overflow Error detected\n"); val &= ~I40E_PF_ARQLEN_ARQOVFL_MASK; + pf->arq_overflows++; } if (val & I40E_PF_ARQLEN_ARQCRIT_MASK) { if (hw->debug_mask & I40E_DEBUG_AQ) @@ -6319,7 +6371,9 @@ static void i40e_clean_adminq_subtask(struct i40e_pf *pf) case i40e_aqc_opc_nvm_erase: case i40e_aqc_opc_nvm_update: case i40e_aqc_opc_oem_post_update: - i40e_debug(&pf->hw, I40E_DEBUG_NVM, "ARQ NVM operation completed\n"); + i40e_debug(&pf->hw, I40E_DEBUG_NVM, + "ARQ NVM operation 0x%04x completed\n", + opcode); break; default: dev_info(&pf->pdev->dev, @@ -6803,12 +6857,12 @@ static void i40e_reset_and_rebuild(struct i40e_pf *pf, bool reinit) if (ret) goto end_core_reset; - /* driver is only interested in link up/down and module qualification - * reports from firmware + /* The driver only wants link up/down and module qualification + * reports from firmware. Note the negative logic. */ ret = i40e_aq_set_phy_int_mask(&pf->hw, - I40E_AQ_EVENT_LINK_UPDOWN | - I40E_AQ_EVENT_MODULE_QUAL_FAIL, NULL); + ~(I40E_AQ_EVENT_LINK_UPDOWN | + I40E_AQ_EVENT_MODULE_QUAL_FAIL), NULL); if (ret) dev_info(&pf->pdev->dev, "set phy mask fail, err %s aq_err %s\n", i40e_stat_str(&pf->hw, ret), @@ -6889,8 +6943,7 @@ static void i40e_reset_and_rebuild(struct i40e_pf *pf, bool reinit) wr32(hw, I40E_REG_MSS, val); } - if (((pf->hw.aq.fw_maj_ver == 4) && (pf->hw.aq.fw_min_ver < 33)) || - (pf->hw.aq.fw_maj_ver < 4)) { + if (pf->flags & I40E_FLAG_RESTART_AUTONEG) { msleep(75); ret = i40e_aq_set_link_restart_an(&pf->hw, true, NULL); if (ret) @@ -7079,12 +7132,13 @@ static void i40e_sync_udp_filters_subtask(struct i40e_pf *pf) ret = i40e_aq_del_udp_tunnel(hw, i, NULL); if (ret) { - dev_info(&pf->pdev->dev, - "%s vxlan port %d, index %d failed, err %s aq_err %s\n", - port ? "add" : "delete", - ntohs(port), i, - i40e_stat_str(&pf->hw, ret), - i40e_aq_str(&pf->hw, + dev_dbg(&pf->pdev->dev, + "%s %s port %d, index %d failed, err %s aq_err %s\n", + pf->udp_ports[i].type ? "vxlan" : "geneve", + port ? "add" : "delete", + ntohs(port), i, + i40e_stat_str(&pf->hw, ret), + i40e_aq_str(&pf->hw, pf->hw.aq.asq_last_status)); pf->udp_ports[i].index = 0; } @@ -7111,11 +7165,13 @@ static void i40e_service_task(struct work_struct *work) } i40e_detect_recover_hung(pf); + i40e_sync_filters_subtask(pf); i40e_reset_subtask(pf); i40e_handle_mdd_event(pf); i40e_vc_process_vflr_event(pf); i40e_watchdog_subtask(pf); i40e_fdir_reinit_subtask(pf); + i40e_client_subtask(pf); i40e_sync_filters_subtask(pf); i40e_sync_udp_filters_subtask(pf); i40e_clean_adminq_subtask(pf); @@ -7290,8 +7346,6 @@ static int i40e_vsi_mem_alloc(struct i40e_pf *pf, enum i40e_vsi_type type) set_bit(__I40E_DOWN, &vsi->state); vsi->flags = 0; vsi->idx = vsi_idx; - vsi->rx_itr_setting = pf->rx_itr_default; - vsi->tx_itr_setting = pf->tx_itr_default; vsi->int_rate_limit = 0; vsi->rss_table_size = (vsi->type == I40E_VSI_MAIN) ? pf->rss_table_size : 64; @@ -7458,8 +7512,7 @@ static int i40e_alloc_rings(struct i40e_vsi *vsi) tx_ring->dcb_tc = 0; if (vsi->back->flags & I40E_FLAG_WB_ON_ITR_CAPABLE) tx_ring->flags = I40E_TXR_FLAGS_WB_ON_ITR; - if (vsi->back->flags & I40E_FLAG_OUTER_UDP_CSUM_CAPABLE) - tx_ring->flags |= I40E_TXR_FLAGS_OUTER_UDP_CSUM; + tx_ring->tx_itr_setting = pf->tx_itr_default; vsi->tx_rings[i] = tx_ring; rx_ring = &tx_ring[1]; @@ -7476,6 +7529,7 @@ static int i40e_alloc_rings(struct i40e_vsi *vsi) set_ring_16byte_desc_enabled(rx_ring); else clear_ring_16byte_desc_enabled(rx_ring); + rx_ring->rx_itr_setting = pf->rx_itr_default; vsi->rx_rings[i] = rx_ring; } @@ -7520,6 +7574,7 @@ static int i40e_init_msix(struct i40e_pf *pf) int vectors_left; int v_budget, i; int v_actual; + int iwarp_requested = 0; if (!(pf->flags & I40E_FLAG_MSIX_ENABLED)) return -ENODEV; @@ -7533,6 +7588,7 @@ static int i40e_init_msix(struct i40e_pf *pf) * is governed by number of cpus in the system. * - assumes symmetric Tx/Rx pairing * - The number of VMDq pairs + * - The CPU count within the NUMA node if iWARP is enabled #ifdef I40E_FCOE * - The number of FCOE qps. #endif @@ -7579,6 +7635,16 @@ static int i40e_init_msix(struct i40e_pf *pf) } #endif + /* can we reserve enough for iWARP? */ + if (pf->flags & I40E_FLAG_IWARP_ENABLED) { + if (!vectors_left) + pf->num_iwarp_msix = 0; + else if (vectors_left < pf->num_iwarp_msix) + pf->num_iwarp_msix = 1; + v_budget += pf->num_iwarp_msix; + vectors_left -= pf->num_iwarp_msix; + } + /* any vectors left over go for VMDq support */ if (pf->flags & I40E_FLAG_VMDQ_ENABLED) { int vmdq_vecs_wanted = pf->num_vmdq_vsis * pf->num_vmdq_qps; @@ -7613,6 +7679,8 @@ static int i40e_init_msix(struct i40e_pf *pf) * of these features based on the policy and at the end disable * the features that did not get any vectors. */ + iwarp_requested = pf->num_iwarp_msix; + pf->num_iwarp_msix = 0; #ifdef I40E_FCOE pf->num_fcoe_qps = 0; pf->num_fcoe_msix = 0; @@ -7651,17 +7719,33 @@ static int i40e_init_msix(struct i40e_pf *pf) pf->num_lan_msix = 1; break; case 3: + if (pf->flags & I40E_FLAG_IWARP_ENABLED) { + pf->num_lan_msix = 1; + pf->num_iwarp_msix = 1; + } else { + pf->num_lan_msix = 2; + } #ifdef I40E_FCOE /* give one vector to FCoE */ if (pf->flags & I40E_FLAG_FCOE_ENABLED) { pf->num_lan_msix = 1; pf->num_fcoe_msix = 1; } -#else - pf->num_lan_msix = 2; #endif break; default: + if (pf->flags & I40E_FLAG_IWARP_ENABLED) { + pf->num_iwarp_msix = min_t(int, (vec / 3), + iwarp_requested); + pf->num_vmdq_vsis = min_t(int, (vec / 3), + I40E_DEFAULT_NUM_VMDQ_VSI); + } else { + pf->num_vmdq_vsis = min_t(int, (vec / 2), + I40E_DEFAULT_NUM_VMDQ_VSI); + } + pf->num_lan_msix = min_t(int, + (vec - (pf->num_iwarp_msix + pf->num_vmdq_vsis)), + pf->num_lan_msix); #ifdef I40E_FCOE /* give one vector to FCoE */ if (pf->flags & I40E_FLAG_FCOE_ENABLED) { @@ -7669,8 +7753,6 @@ static int i40e_init_msix(struct i40e_pf *pf) vec--; } #endif - /* give the rest to the PF */ - pf->num_lan_msix = min_t(int, vec, pf->num_lan_qps); break; } } @@ -7680,6 +7762,12 @@ static int i40e_init_msix(struct i40e_pf *pf) dev_info(&pf->pdev->dev, "VMDq disabled, not enough MSI-X vectors\n"); pf->flags &= ~I40E_FLAG_VMDQ_ENABLED; } + + if ((pf->flags & I40E_FLAG_IWARP_ENABLED) && + (pf->num_iwarp_msix == 0)) { + dev_info(&pf->pdev->dev, "IWARP disabled, not enough MSI-X vectors\n"); + pf->flags &= ~I40E_FLAG_IWARP_ENABLED; + } #ifdef I40E_FCOE if ((pf->flags & I40E_FLAG_FCOE_ENABLED) && (pf->num_fcoe_msix == 0)) { @@ -7771,6 +7859,7 @@ static int i40e_init_interrupt_scheme(struct i40e_pf *pf) vectors = i40e_init_msix(pf); if (vectors < 0) { pf->flags &= ~(I40E_FLAG_MSIX_ENABLED | + I40E_FLAG_IWARP_ENABLED | #ifdef I40E_FCOE I40E_FLAG_FCOE_ENABLED | #endif @@ -7852,7 +7941,7 @@ static int i40e_setup_misc_vector(struct i40e_pf *pf) i40e_flush(hw); - i40e_irq_dynamic_enable_icr0(pf); + i40e_irq_dynamic_enable_icr0(pf, true); return err; } @@ -7935,6 +8024,52 @@ static int i40e_vsi_config_rss(struct i40e_vsi *vsi) return ret; } +/** + * i40e_get_rss_aq - Get RSS keys and lut by using AQ commands + * @vsi: Pointer to vsi structure + * @seed: Buffter to store the hash keys + * @lut: Buffer to store the lookup table entries + * @lut_size: Size of buffer to store the lookup table entries + * + * Return 0 on success, negative on failure + */ +static int i40e_get_rss_aq(struct i40e_vsi *vsi, const u8 *seed, + u8 *lut, u16 lut_size) +{ + struct i40e_pf *pf = vsi->back; + struct i40e_hw *hw = &pf->hw; + int ret = 0; + + if (seed) { + ret = i40e_aq_get_rss_key(hw, vsi->id, + (struct i40e_aqc_get_set_rss_key_data *)seed); + if (ret) { + dev_info(&pf->pdev->dev, + "Cannot get RSS key, err %s aq_err %s\n", + i40e_stat_str(&pf->hw, ret), + i40e_aq_str(&pf->hw, + pf->hw.aq.asq_last_status)); + return ret; + } + } + + if (lut) { + bool pf_lut = vsi->type == I40E_VSI_MAIN ? true : false; + + ret = i40e_aq_get_rss_lut(hw, vsi->id, pf_lut, lut, lut_size); + if (ret) { + dev_info(&pf->pdev->dev, + "Cannot get RSS lut, err %s aq_err %s\n", + i40e_stat_str(&pf->hw, ret), + i40e_aq_str(&pf->hw, + pf->hw.aq.asq_last_status)); + return ret; + } + } + + return ret; +} + /** * i40e_config_rss_reg - Configure RSS keys and lut by writing registers * @vsi: Pointer to vsi structure @@ -7956,7 +8091,7 @@ static int i40e_config_rss_reg(struct i40e_vsi *vsi, const u8 *seed, u32 *seed_dw = (u32 *)seed; for (i = 0; i <= I40E_PFQF_HKEY_MAX_INDEX; i++) - wr32(hw, I40E_PFQF_HKEY(i), seed_dw[i]); + i40e_write_rx_ctl(hw, I40E_PFQF_HKEY(i), seed_dw[i]); } if (lut) { @@ -7993,7 +8128,7 @@ static int i40e_get_rss_reg(struct i40e_vsi *vsi, u8 *seed, u32 *seed_dw = (u32 *)seed; for (i = 0; i <= I40E_PFQF_HKEY_MAX_INDEX; i++) - seed_dw[i] = rd32(hw, I40E_PFQF_HKEY(i)); + seed_dw[i] = i40e_read_rx_ctl(hw, I40E_PFQF_HKEY(i)); } if (lut) { u32 *lut_dw = (u32 *)lut; @@ -8037,7 +8172,12 @@ int i40e_config_rss(struct i40e_vsi *vsi, u8 *seed, u8 *lut, u16 lut_size) */ int i40e_get_rss(struct i40e_vsi *vsi, u8 *seed, u8 *lut, u16 lut_size) { - return i40e_get_rss_reg(vsi, seed, lut, lut_size); + struct i40e_pf *pf = vsi->back; + + if (pf->flags & I40E_FLAG_RSS_AQ_CAPABLE) + return i40e_get_rss_aq(vsi, seed, lut, lut_size); + else + return i40e_get_rss_reg(vsi, seed, lut, lut_size); } /** @@ -8071,19 +8211,19 @@ static int i40e_pf_config_rss(struct i40e_pf *pf) int ret; /* By default we enable TCP/UDP with IPv4/IPv6 ptypes */ - hena = (u64)rd32(hw, I40E_PFQF_HENA(0)) | - ((u64)rd32(hw, I40E_PFQF_HENA(1)) << 32); + hena = (u64)i40e_read_rx_ctl(hw, I40E_PFQF_HENA(0)) | + ((u64)i40e_read_rx_ctl(hw, I40E_PFQF_HENA(1)) << 32); hena |= i40e_pf_get_default_rss_hena(pf); - wr32(hw, I40E_PFQF_HENA(0), (u32)hena); - wr32(hw, I40E_PFQF_HENA(1), (u32)(hena >> 32)); + i40e_write_rx_ctl(hw, I40E_PFQF_HENA(0), (u32)hena); + i40e_write_rx_ctl(hw, I40E_PFQF_HENA(1), (u32)(hena >> 32)); /* Determine the RSS table size based on the hardware capabilities */ - reg_val = rd32(hw, I40E_PFQF_CTL_0); + reg_val = i40e_read_rx_ctl(hw, I40E_PFQF_CTL_0); reg_val = (pf->rss_table_size == 512) ? (reg_val | I40E_PFQF_CTL_0_HASHLUTSIZE_512) : (reg_val & ~I40E_PFQF_CTL_0_HASHLUTSIZE_512); - wr32(hw, I40E_PFQF_CTL_0, reg_val); + i40e_write_rx_ctl(hw, I40E_PFQF_CTL_0, reg_val); /* Determine the RSS size of the VSI */ if (!vsi->rss_size) @@ -8367,12 +8507,38 @@ static int i40e_sw_init(struct i40e_pf *pf) pf->hw.func_caps.fd_filters_best_effort; } + if (i40e_is_mac_710(&pf->hw) && + (((pf->hw.aq.fw_maj_ver == 4) && (pf->hw.aq.fw_min_ver < 33)) || + (pf->hw.aq.fw_maj_ver < 4))) { + pf->flags |= I40E_FLAG_RESTART_AUTONEG; + /* No DCB support for FW < v4.33 */ + pf->flags |= I40E_FLAG_NO_DCB_SUPPORT; + } + + /* Disable FW LLDP if FW < v4.3 */ + if (i40e_is_mac_710(&pf->hw) && + (((pf->hw.aq.fw_maj_ver == 4) && (pf->hw.aq.fw_min_ver < 3)) || + (pf->hw.aq.fw_maj_ver < 4))) + pf->flags |= I40E_FLAG_STOP_FW_LLDP; + + /* Use the FW Set LLDP MIB API if FW > v4.40 */ + if (i40e_is_mac_710(&pf->hw) && + (((pf->hw.aq.fw_maj_ver == 4) && (pf->hw.aq.fw_min_ver >= 40)) || + (pf->hw.aq.fw_maj_ver >= 5))) + pf->flags |= I40E_FLAG_USE_SET_LLDP_MIB; + if (pf->hw.func_caps.vmdq) { pf->num_vmdq_vsis = I40E_DEFAULT_NUM_VMDQ_VSI; pf->flags |= I40E_FLAG_VMDQ_ENABLED; pf->num_vmdq_qps = i40e_default_queues_per_vmdq(pf); } + if (pf->hw.func_caps.iwarp) { + pf->flags |= I40E_FLAG_IWARP_ENABLED; + /* IWARP needs one extra vector for CQP just like MISC.*/ + pf->num_iwarp_msix = (int)num_online_cpus() + 1; + } + #ifdef I40E_FCOE i40e_init_pf_fcoe(pf); @@ -8393,8 +8559,20 @@ static int i40e_sw_init(struct i40e_pf *pf) I40E_FLAG_OUTER_UDP_CSUM_CAPABLE | I40E_FLAG_WB_ON_ITR_CAPABLE | I40E_FLAG_MULTIPLE_TCP_UDP_RSS_PCTYPE | + I40E_FLAG_NO_PCI_LINK_CHECK | + I40E_FLAG_100M_SGMII_CAPABLE | + I40E_FLAG_USE_SET_LLDP_MIB | I40E_FLAG_GENEVE_OFFLOAD_CAPABLE; + } else if ((pf->hw.aq.api_maj_ver > 1) || + ((pf->hw.aq.api_maj_ver == 1) && + (pf->hw.aq.api_min_ver > 4))) { + /* Supported in FW API version higher than 1.4 */ + pf->flags |= I40E_FLAG_GENEVE_OFFLOAD_CAPABLE; + pf->auto_disable_flags = I40E_FLAG_HW_ATR_EVICT_CAPABLE; + } else { + pf->auto_disable_flags = I40E_FLAG_HW_ATR_EVICT_CAPABLE; } + pf->eeprom_version = 0xDEAD; pf->lan_veb = I40E_NO_VEB; pf->lan_vsi = I40E_NO_VSI; @@ -8530,9 +8708,6 @@ static void i40e_add_vxlan_port(struct net_device *netdev, u8 next_idx; u8 idx; - if (sa_family == AF_INET6) - return; - idx = i40e_get_udp_port_idx(pf, port); /* Check if port already exists */ @@ -8572,9 +8747,6 @@ static void i40e_del_vxlan_port(struct net_device *netdev, struct i40e_pf *pf = vsi->back; u8 idx; - if (sa_family == AF_INET6) - return; - idx = i40e_get_udp_port_idx(pf, port); /* Check if port already exists */ @@ -8608,7 +8780,7 @@ static void i40e_add_geneve_port(struct net_device *netdev, u8 next_idx; u8 idx; - if (sa_family == AF_INET6) + if (!(pf->flags & I40E_FLAG_GENEVE_OFFLOAD_CAPABLE)) return; idx = i40e_get_udp_port_idx(pf, port); @@ -8652,7 +8824,7 @@ static void i40e_del_geneve_port(struct net_device *netdev, struct i40e_pf *pf = vsi->back; u8 idx; - if (sa_family == AF_INET6) + if (!(pf->flags & I40E_FLAG_GENEVE_OFFLOAD_CAPABLE)) return; idx = i40e_get_udp_port_idx(pf, port); @@ -8890,7 +9062,7 @@ static const struct net_device_ops i40e_netdev_ops = { #ifdef CONFIG_NET_POLL_CONTROLLER .ndo_poll_controller = i40e_netpoll, #endif - .ndo_setup_tc = i40e_setup_tc, + .ndo_setup_tc = __i40e_setup_tc, #ifdef I40E_FCOE .ndo_fcoe_enable = i40e_fcoe_enable, .ndo_fcoe_disable = i40e_fcoe_disable, @@ -8942,11 +9114,15 @@ static int i40e_config_netdev(struct i40e_vsi *vsi) np = netdev_priv(netdev); np->vsi = vsi; - netdev->hw_enc_features |= NETIF_F_IP_CSUM | - NETIF_F_RXCSUM | - NETIF_F_GSO_UDP_TUNNEL | - NETIF_F_GSO_GRE | - NETIF_F_TSO; + netdev->hw_enc_features |= NETIF_F_IP_CSUM | + NETIF_F_IPV6_CSUM | + NETIF_F_TSO | + NETIF_F_TSO6 | + NETIF_F_TSO_ECN | + NETIF_F_GSO_GRE | + NETIF_F_GSO_UDP_TUNNEL | + NETIF_F_GSO_UDP_TUNNEL_CSUM | + 0; netdev->features = NETIF_F_SG | NETIF_F_IP_CSUM | @@ -8967,6 +9143,8 @@ static int i40e_config_netdev(struct i40e_vsi *vsi) if (!(pf->flags & I40E_FLAG_MFP_ENABLED)) netdev->features |= NETIF_F_NTUPLE; + if (pf->flags & I40E_FLAG_OUTER_UDP_CSUM_CAPABLE) + netdev->features |= NETIF_F_GSO_UDP_TUNNEL_CSUM; /* copy netdev features into list of user selectable features */ netdev->hw_features |= netdev->features; @@ -9216,6 +9394,13 @@ static int i40e_add_vsi(struct i40e_vsi *vsi) cpu_to_le16(I40E_AQ_VSI_SW_ID_FLAG_ALLOW_LB); } + if (vsi->back->flags & I40E_FLAG_IWARP_ENABLED) { + ctxt.info.valid_sections |= + cpu_to_le16(I40E_AQ_VSI_PROP_QUEUE_OPT_VALID); + ctxt.info.queueing_opt_flags |= + I40E_AQ_VSI_QUE_OPT_TCP_ENA; + } + ctxt.info.valid_sections |= cpu_to_le16(I40E_AQ_VSI_PROP_VLAN_VALID); ctxt.info.port_vlan_flags |= I40E_AQ_VSI_PVLAN_MODE_ALL; if (pf->vf[vsi->vf_id].spoofchk) { @@ -9239,6 +9424,10 @@ static int i40e_add_vsi(struct i40e_vsi *vsi) break; #endif /* I40E_FCOE */ + case I40E_VSI_IWARP: + /* send down message to iWARP */ + break; + default: return -ENODEV; } @@ -9471,10 +9660,15 @@ vector_setup_out: **/ static struct i40e_vsi *i40e_vsi_reinit_setup(struct i40e_vsi *vsi) { - struct i40e_pf *pf = vsi->back; + struct i40e_pf *pf; u8 enabled_tc; int ret; + if (!vsi) + return NULL; + + pf = vsi->back; + i40e_put_lump(pf->qp_pile, vsi->base_queue, vsi->idx); i40e_vsi_clear_rings(vsi); @@ -9975,13 +10169,13 @@ static int i40e_add_veb(struct i40e_veb *veb, struct i40e_vsi *vsi) { struct i40e_pf *pf = veb->pf; bool is_default = veb->pf->cur_promisc; - bool is_cloud = false; + bool enable_stats = !!(pf->flags & I40E_FLAG_VEB_STATS_ENABLED); int ret; /* get a VEB from the hardware */ ret = i40e_aq_add_veb(&pf->hw, veb->uplink_seid, vsi->seid, veb->enabled_tc, is_default, - is_cloud, &veb->seid, NULL); + &veb->seid, enable_stats, NULL); if (ret) { dev_info(&pf->pdev->dev, "couldn't add VEB, err %s aq_err %s\n", @@ -10350,6 +10544,7 @@ static void i40e_determine_queue_usage(struct i40e_pf *pf) /* make sure all the fancies are disabled */ pf->flags &= ~(I40E_FLAG_RSS_ENABLED | + I40E_FLAG_IWARP_ENABLED | #ifdef I40E_FCOE I40E_FLAG_FCOE_ENABLED | #endif @@ -10367,6 +10562,7 @@ static void i40e_determine_queue_usage(struct i40e_pf *pf) queues_left -= pf->num_lan_qps; pf->flags &= ~(I40E_FLAG_RSS_ENABLED | + I40E_FLAG_IWARP_ENABLED | #ifdef I40E_FCOE I40E_FLAG_FCOE_ENABLED | #endif @@ -10538,21 +10734,9 @@ static void i40e_print_features(struct i40e_pf *pf) **/ static void i40e_get_platform_mac_addr(struct pci_dev *pdev, struct i40e_pf *pf) { - struct device_node *dp = pci_device_to_OF_node(pdev); - const unsigned char *addr; - u8 *mac_addr = pf->hw.mac.addr; - pf->flags &= ~I40E_FLAG_PF_MAC; - addr = of_get_mac_address(dp); - if (addr) { - ether_addr_copy(mac_addr, addr); + if (!eth_platform_get_mac_address(&pdev->dev, pf->hw.mac.addr)) pf->flags |= I40E_FLAG_PF_MAC; -#ifdef CONFIG_SPARC - } else { - ether_addr_copy(mac_addr, idprom->id_ethaddr); - pf->flags |= I40E_FLAG_PF_MAC; -#endif /* CONFIG_SPARC */ - } } /** @@ -10575,7 +10759,6 @@ static int i40e_probe(struct pci_dev *pdev, const struct pci_device_id *ent) u16 wol_nvm_bits; u16 link_status; int err; - u32 len; u32 val; u32 i; u8 set_fc_aq_fail; @@ -10758,8 +10941,7 @@ static int i40e_probe(struct pci_dev *pdev, const struct pci_device_id *ent) * Ignore error return codes because if it was already disabled via * hardware settings this will fail */ - if (((pf->hw.aq.fw_maj_ver == 4) && (pf->hw.aq.fw_min_ver < 3)) || - (pf->hw.aq.fw_maj_ver < 4)) { + if (pf->flags & I40E_FLAG_STOP_FW_LLDP) { dev_info(&pdev->dev, "Stopping firmware LLDP agent.\n"); i40e_aq_stop_lldp(hw, true, NULL); } @@ -10834,8 +11016,8 @@ static int i40e_probe(struct pci_dev *pdev, const struct pci_device_id *ent) pf->num_alloc_vsi = pf->hw.func_caps.num_vsis; /* Set up the *vsi struct and our local tracking of the MAIN PF vsi. */ - len = sizeof(struct i40e_vsi *) * pf->num_alloc_vsi; - pf->vsi = kzalloc(len, GFP_KERNEL); + pf->vsi = kcalloc(pf->num_alloc_vsi, sizeof(struct i40e_vsi *), + GFP_KERNEL); if (!pf->vsi) { err = -ENOMEM; goto err_switch_setup; @@ -10882,12 +11064,12 @@ static int i40e_probe(struct pci_dev *pdev, const struct pci_device_id *ent) } } - /* driver is only interested in link up/down and module qualification - * reports from firmware + /* The driver only wants link up/down and module qualification + * reports from firmware. Note the negative logic. */ err = i40e_aq_set_phy_int_mask(&pf->hw, - I40E_AQ_EVENT_LINK_UPDOWN | - I40E_AQ_EVENT_MODULE_QUAL_FAIL, NULL); + ~(I40E_AQ_EVENT_LINK_UPDOWN | + I40E_AQ_EVENT_MODULE_QUAL_FAIL), NULL); if (err) dev_info(&pf->pdev->dev, "set phy mask fail, err %s aq_err %s\n", i40e_stat_str(&pf->hw, err), @@ -10904,8 +11086,7 @@ static int i40e_probe(struct pci_dev *pdev, const struct pci_device_id *ent) wr32(hw, I40E_REG_MSS, val); } - if (((pf->hw.aq.fw_maj_ver == 4) && (pf->hw.aq.fw_min_ver < 33)) || - (pf->hw.aq.fw_maj_ver < 4)) { + if (pf->flags & I40E_FLAG_RESTART_AUTONEG) { msleep(75); err = i40e_aq_set_link_restart_an(&pf->hw, true, NULL); if (err) @@ -10939,8 +11120,6 @@ static int i40e_probe(struct pci_dev *pdev, const struct pci_device_id *ent) if ((pf->flags & I40E_FLAG_SRIOV_ENABLED) && (pf->flags & I40E_FLAG_MSIX_ENABLED) && !test_bit(__I40E_BAD_EEPROM, &pf->state)) { - u32 val; - /* disable link interrupts for VFs */ val = rd32(hw, I40E_PFGEN_PORTMDIO_NUM); val &= ~I40E_PFGEN_PORTMDIO_NUM_VFLINK_STAT_ENA_MASK; @@ -10959,7 +11138,17 @@ static int i40e_probe(struct pci_dev *pdev, const struct pci_device_id *ent) } #endif /* CONFIG_PCI_IOV */ - pfs_found++; + if (pf->flags & I40E_FLAG_IWARP_ENABLED) { + pf->iwarp_base_vector = i40e_get_lump(pf, pf->irq_pile, + pf->num_iwarp_msix, + I40E_IWARP_IRQ_PILE_ID); + if (pf->iwarp_base_vector < 0) { + dev_info(&pdev->dev, + "failed to get tracking for %d vectors for IWARP err=%d\n", + pf->num_iwarp_msix, pf->iwarp_base_vector); + pf->flags &= ~I40E_FLAG_IWARP_ENABLED; + } + } i40e_dbg_pf_init(pf); @@ -10970,6 +11159,12 @@ static int i40e_probe(struct pci_dev *pdev, const struct pci_device_id *ent) mod_timer(&pf->service_timer, round_jiffies(jiffies + pf->service_timer_period)); + /* add this PF to client device list and launch a client service task */ + err = i40e_lan_add_device(pf); + if (err) + dev_info(&pdev->dev, "Failed to add PF to client API service list: %d\n", + err); + #ifdef I40E_FCOE /* create FCoE interface */ i40e_fcoe_vsi_setup(pf); @@ -11051,6 +11246,10 @@ static int i40e_probe(struct pci_dev *pdev, const struct pci_device_id *ent) i40e_add_filter_to_drop_tx_flow_control_frames(&pf->hw, pf->main_vsi_seid); + if ((pf->hw.device_id == I40E_DEV_ID_10G_BASE_T) || + (pf->hw.device_id == I40E_DEV_ID_10G_BASE_T4)) + pf->flags |= I40E_FLAG_HAVE_10GBASET_PHY; + /* print a string summarizing features */ i40e_print_features(pf); @@ -11107,10 +11306,11 @@ static void i40e_remove(struct pci_dev *pdev) i40e_ptp_stop(pf); /* Disable RSS in hw */ - wr32(hw, I40E_PFQF_HENA(0), 0); - wr32(hw, I40E_PFQF_HENA(1), 0); + i40e_write_rx_ctl(hw, I40E_PFQF_HENA(0), 0); + i40e_write_rx_ctl(hw, I40E_PFQF_HENA(1), 0); /* no more scheduling of any task */ + set_bit(__I40E_SUSPENDED, &pf->state); set_bit(__I40E_DOWN, &pf->state); del_timer_sync(&pf->service_timer); cancel_work_sync(&pf->service_task); @@ -11140,9 +11340,16 @@ static void i40e_remove(struct pci_dev *pdev) if (pf->vsi[pf->lan_vsi]) i40e_vsi_release(pf->vsi[pf->lan_vsi]); + /* remove attached clients */ + ret_code = i40e_lan_del_device(pf); + if (ret_code) { + dev_warn(&pdev->dev, "Failed to delete client device: %d\n", + ret_code); + } + /* shutdown and destroy the HMC */ - if (pf->hw.hmc.hmc_obj) { - ret_code = i40e_shutdown_lan_hmc(&pf->hw); + if (hw->hmc.hmc_obj) { + ret_code = i40e_shutdown_lan_hmc(hw); if (ret_code) dev_warn(&pdev->dev, "Failed to destroy the HMC resources: %d\n", @@ -11150,7 +11357,7 @@ static void i40e_remove(struct pci_dev *pdev) } /* shutdown the adminq */ - ret_code = i40e_shutdown_adminq(&pf->hw); + ret_code = i40e_shutdown_adminq(hw); if (ret_code) dev_warn(&pdev->dev, "Failed to destroy the Admin Queue resources: %d\n", @@ -11178,7 +11385,7 @@ static void i40e_remove(struct pci_dev *pdev) kfree(pf->qp_pile); kfree(pf->vsi); - iounmap(pf->hw.hw_addr); + iounmap(hw->hw_addr); kfree(pf); pci_release_selected_regions(pdev, pci_select_bars(pdev, IORESOURCE_MEM)); @@ -11413,6 +11620,16 @@ static int __init i40e_init_module(void) i40e_driver_string, i40e_driver_version_str); pr_info("%s: %s\n", i40e_driver_name, i40e_copyright); + /* we will see if single thread per module is enough for now, + * it can't be any worse than using the system workqueue which + * was already single threaded + */ + i40e_wq = create_singlethread_workqueue(i40e_driver_name); + if (!i40e_wq) { + pr_err("%s: Failed to create workqueue\n", i40e_driver_name); + return -ENOMEM; + } + i40e_dbg_init(); return pci_register_driver(&i40e_driver); } @@ -11427,6 +11644,7 @@ module_init(i40e_init_module); static void __exit i40e_exit_module(void) { pci_unregister_driver(&i40e_driver); + destroy_workqueue(i40e_wq); i40e_dbg_exit(); } module_exit(i40e_exit_module); diff --git a/drivers/net/ethernet/intel/i40e/i40e_nvm.c b/drivers/net/ethernet/intel/i40e/i40e_nvm.c index 6100cdd9a..5730f8091 100644 --- a/drivers/net/ethernet/intel/i40e/i40e_nvm.c +++ b/drivers/net/ethernet/intel/i40e/i40e_nvm.c @@ -693,10 +693,11 @@ i40e_status i40e_nvmupd_command(struct i40e_hw *hw, /* early check for status command and debug msgs */ upd_cmd = i40e_nvmupd_validate_command(hw, cmd, perrno); - i40e_debug(hw, I40E_DEBUG_NVM, "%s state %d nvm_release_on_hold %d\n", + i40e_debug(hw, I40E_DEBUG_NVM, "%s state %d nvm_release_on_hold %d cmd 0x%08x config 0x%08x offset 0x%08x data_size 0x%08x\n", i40e_nvm_update_state_str[upd_cmd], hw->nvmupd_state, - hw->aq.nvm_release_on_done); + hw->aq.nvm_release_on_done, + cmd->command, cmd->config, cmd->offset, cmd->data_size); if (upd_cmd == I40E_NVMUPD_INVALID) { *perrno = -EFAULT; diff --git a/drivers/net/ethernet/intel/i40e/i40e_prototype.h b/drivers/net/ethernet/intel/i40e/i40e_prototype.h index bb9d583e5..d51eee5bf 100644 --- a/drivers/net/ethernet/intel/i40e/i40e_prototype.h +++ b/drivers/net/ethernet/intel/i40e/i40e_prototype.h @@ -1,7 +1,7 @@ /******************************************************************************* * * Intel Ethernet Controller XL710 Family Linux Driver - * Copyright(c) 2013 - 2015 Intel Corporation. + * Copyright(c) 2013 - 2016 Intel Corporation. * * This program is free software; you can redistribute it and/or modify it * under the terms and conditions of the GNU General Public License, @@ -74,6 +74,12 @@ i40e_status i40e_aq_set_rss_key(struct i40e_hw *hw, u32 i40e_led_get(struct i40e_hw *hw); void i40e_led_set(struct i40e_hw *hw, u32 mode, bool blink); +i40e_status i40e_led_set_phy(struct i40e_hw *hw, bool on, + u16 led_addr, u32 mode); +i40e_status i40e_led_get_phy(struct i40e_hw *hw, u16 *led_addr, + u16 *val); +i40e_status i40e_blink_phy_link_led(struct i40e_hw *hw, + u32 time, u32 interval); /* admin send queue commands */ @@ -127,6 +133,9 @@ i40e_status i40e_aq_set_vsi_unicast_promiscuous(struct i40e_hw *hw, u16 vsi_id, bool set, struct i40e_asq_cmd_details *cmd_details); i40e_status i40e_aq_set_vsi_multicast_promiscuous(struct i40e_hw *hw, u16 vsi_id, bool set, struct i40e_asq_cmd_details *cmd_details); +i40e_status i40e_aq_set_vsi_vlan_promisc(struct i40e_hw *hw, + u16 seid, bool enable, + struct i40e_asq_cmd_details *cmd_details); i40e_status i40e_aq_get_vsi_params(struct i40e_hw *hw, struct i40e_vsi_context *vsi_ctx, struct i40e_asq_cmd_details *cmd_details); @@ -135,8 +144,8 @@ i40e_status i40e_aq_update_vsi_params(struct i40e_hw *hw, struct i40e_asq_cmd_details *cmd_details); i40e_status i40e_aq_add_veb(struct i40e_hw *hw, u16 uplink_seid, u16 downlink_seid, u8 enabled_tc, - bool default_port, bool enable_l2_filtering, - u16 *pveb_seid, + bool default_port, u16 *pveb_seid, + bool enable_stats, struct i40e_asq_cmd_details *cmd_details); i40e_status i40e_aq_get_veb_parameters(struct i40e_hw *hw, u16 veb_seid, u16 *switch_id, bool *floating, @@ -149,6 +158,15 @@ i40e_status i40e_aq_add_macvlan(struct i40e_hw *hw, u16 vsi_id, i40e_status i40e_aq_remove_macvlan(struct i40e_hw *hw, u16 vsi_id, struct i40e_aqc_remove_macvlan_element_data *mv_list, u16 count, struct i40e_asq_cmd_details *cmd_details); +i40e_status i40e_aq_add_mirrorrule(struct i40e_hw *hw, u16 sw_seid, + u16 rule_type, u16 dest_vsi, u16 count, __le16 *mr_list, + struct i40e_asq_cmd_details *cmd_details, + u16 *rule_id, u16 *rules_used, u16 *rules_free); +i40e_status i40e_aq_delete_mirrorrule(struct i40e_hw *hw, u16 sw_seid, + u16 rule_type, u16 rule_id, u16 count, __le16 *mr_list, + struct i40e_asq_cmd_details *cmd_details, + u16 *rules_used, u16 *rules_free); + i40e_status i40e_aq_send_msg_to_vf(struct i40e_hw *hw, u16 vfid, u32 v_opcode, u32 v_retval, u8 *msg, u16 msglen, struct i40e_asq_cmd_details *cmd_details); @@ -324,4 +342,19 @@ i40e_status i40e_aq_debug_dump(struct i40e_hw *hw, u8 cluster_id, struct i40e_asq_cmd_details *cmd_details); void i40e_add_filter_to_drop_tx_flow_control_frames(struct i40e_hw *hw, u16 vsi_seid); +i40e_status i40e_aq_rx_ctl_read_register(struct i40e_hw *hw, + u32 reg_addr, u32 *reg_val, + struct i40e_asq_cmd_details *cmd_details); +u32 i40e_read_rx_ctl(struct i40e_hw *hw, u32 reg_addr); +i40e_status i40e_aq_rx_ctl_write_register(struct i40e_hw *hw, + u32 reg_addr, u32 reg_val, + struct i40e_asq_cmd_details *cmd_details); +void i40e_write_rx_ctl(struct i40e_hw *hw, u32 reg_addr, u32 reg_val); +i40e_status i40e_read_phy_register(struct i40e_hw *hw, u8 page, + u16 reg, u8 phy_addr, u16 *value); +i40e_status i40e_write_phy_register(struct i40e_hw *hw, u8 page, + u16 reg, u8 phy_addr, u16 value); +u8 i40e_get_phy_address(struct i40e_hw *hw, u8 dev_num); +i40e_status i40e_blink_phy_link_led(struct i40e_hw *hw, + u32 time, u32 interval); #endif /* _I40E_PROTOTYPE_H_ */ diff --git a/drivers/net/ethernet/intel/i40e/i40e_register.h b/drivers/net/ethernet/intel/i40e/i40e_register.h index dc0402fe3..86ca27f72 100644 --- a/drivers/net/ethernet/intel/i40e/i40e_register.h +++ b/drivers/net/ethernet/intel/i40e/i40e_register.h @@ -2045,6 +2045,14 @@ #define I40E_PRTPM_TLPIC 0x001E43C0 /* Reset: GLOBR */ #define I40E_PRTPM_TLPIC_ETLPIC_SHIFT 0 #define I40E_PRTPM_TLPIC_ETLPIC_MASK I40E_MASK(0xFFFFFFFF, I40E_PRTPM_TLPIC_ETLPIC_SHIFT) +#define I40E_GL_PRS_FVBM(_i) (0x00269760 + ((_i) * 4)) /* _i=0...3 */ /* Reset: CORER */ +#define I40E_GL_PRS_FVBM_MAX_INDEX 3 +#define I40E_GL_PRS_FVBM_FV_BYTE_INDX_SHIFT 0 +#define I40E_GL_PRS_FVBM_FV_BYTE_INDX_MASK I40E_MASK(0x7F, I40E_GL_PRS_FVBM_FV_BYTE_INDX_SHIFT) +#define I40E_GL_PRS_FVBM_RULE_BUS_INDX_SHIFT 8 +#define I40E_GL_PRS_FVBM_RULE_BUS_INDX_MASK I40E_MASK(0x3F, I40E_GL_PRS_FVBM_RULE_BUS_INDX_SHIFT) +#define I40E_GL_PRS_FVBM_MSK_ENA_SHIFT 31 +#define I40E_GL_PRS_FVBM_MSK_ENA_MASK I40E_MASK(0x1, I40E_GL_PRS_FVBM_MSK_ENA_SHIFT) #define I40E_GLRPB_DPSS 0x000AC828 /* Reset: CORER */ #define I40E_GLRPB_DPSS_DPS_TCN_SHIFT 0 #define I40E_GLRPB_DPSS_DPS_TCN_MASK I40E_MASK(0xFFFFF, I40E_GLRPB_DPSS_DPS_TCN_SHIFT) @@ -2216,6 +2224,14 @@ #define I40E_PRTQF_FD_FLXINSET_MAX_INDEX 63 #define I40E_PRTQF_FD_FLXINSET_INSET_SHIFT 0 #define I40E_PRTQF_FD_FLXINSET_INSET_MASK I40E_MASK(0xFF, I40E_PRTQF_FD_FLXINSET_INSET_SHIFT) +#define I40E_PRTQF_FD_INSET(_i, _j) (0x00250000 + ((_i) * 64 + (_j) * 32)) /* _i=0...63, _j=0...1 */ /* Reset: CORER */ +#define I40E_PRTQF_FD_INSET_MAX_INDEX 63 +#define I40E_PRTQF_FD_INSET_INSET_SHIFT 0 +#define I40E_PRTQF_FD_INSET_INSET_MASK I40E_MASK(0xFFFFFFFF, I40E_PRTQF_FD_INSET_INSET_SHIFT) +#define I40E_PRTQF_FD_INSET(_i, _j) (0x00250000 + ((_i) * 64 + (_j) * 32)) /* _i=0...63, _j=0...1 */ /* Reset: CORER */ +#define I40E_PRTQF_FD_INSET_MAX_INDEX 63 +#define I40E_PRTQF_FD_INSET_INSET_SHIFT 0 +#define I40E_PRTQF_FD_INSET_INSET_MASK I40E_MASK(0xFFFFFFFF, I40E_PRTQF_FD_INSET_INSET_SHIFT) #define I40E_PRTQF_FD_MSK(_i, _j) (0x00252000 + ((_i) * 64 + (_j) * 32)) /* _i=0...63, _j=0...1 */ /* Reset: CORER */ #define I40E_PRTQF_FD_MSK_MAX_INDEX 63 #define I40E_PRTQF_FD_MSK_MASK_SHIFT 0 @@ -5155,6 +5171,38 @@ #define I40E_GLQF_FD_PCTYPES_MAX_INDEX 63 #define I40E_GLQF_FD_PCTYPES_FD_PCTYPE_SHIFT 0 #define I40E_GLQF_FD_PCTYPES_FD_PCTYPE_MASK I40E_MASK(0x3F, I40E_GLQF_FD_PCTYPES_FD_PCTYPE_SHIFT) +#define I40E_GLQF_FD_MSK(_i, _j) (0x00267200 + ((_i) * 4 + (_j) * 8)) /* _i=0...1, _j=0...63 */ /* Reset: CORER */ +#define I40E_GLQF_FD_MSK_MAX_INDEX 1 +#define I40E_GLQF_FD_MSK_MASK_SHIFT 0 +#define I40E_GLQF_FD_MSK_MASK_MASK I40E_MASK(0xFFFF, I40E_GLQF_FD_MSK_MASK_SHIFT) +#define I40E_GLQF_FD_MSK_OFFSET_SHIFT 16 +#define I40E_GLQF_FD_MSK_OFFSET_MASK I40E_MASK(0x3F, I40E_GLQF_FD_MSK_OFFSET_SHIFT) +#define I40E_GLQF_HASH_INSET(_i, _j) (0x00267600 + ((_i) * 4 + (_j) * 8)) /* _i=0...1, _j=0...63 */ /* Reset: CORER */ +#define I40E_GLQF_HASH_INSET_MAX_INDEX 1 +#define I40E_GLQF_HASH_INSET_INSET_SHIFT 0 +#define I40E_GLQF_HASH_INSET_INSET_MASK I40E_MASK(0xFFFFFFFF, I40E_GLQF_HASH_INSET_INSET_SHIFT) +#define I40E_GLQF_HASH_MSK(_i, _j) (0x00267A00 + ((_i) * 4 + (_j) * 8)) /* _i=0...1, _j=0...63 */ /* Reset: CORER */ +#define I40E_GLQF_HASH_MSK_MAX_INDEX 1 +#define I40E_GLQF_HASH_MSK_MASK_SHIFT 0 +#define I40E_GLQF_HASH_MSK_MASK_MASK I40E_MASK(0xFFFF, I40E_GLQF_HASH_MSK_MASK_SHIFT) +#define I40E_GLQF_HASH_MSK_OFFSET_SHIFT 16 +#define I40E_GLQF_HASH_MSK_OFFSET_MASK I40E_MASK(0x3F, I40E_GLQF_HASH_MSK_OFFSET_SHIFT) +#define I40E_GLQF_ORT(_i) (0x00268900 + ((_i) * 4)) /* _i=0...63 */ /* Reset: CORER */ +#define I40E_GLQF_ORT_MAX_INDEX 63 +#define I40E_GLQF_ORT_PIT_INDX_SHIFT 0 +#define I40E_GLQF_ORT_PIT_INDX_MASK I40E_MASK(0x1F, I40E_GLQF_ORT_PIT_INDX_SHIFT) +#define I40E_GLQF_ORT_FIELD_CNT_SHIFT 5 +#define I40E_GLQF_ORT_FIELD_CNT_MASK I40E_MASK(0x3, I40E_GLQF_ORT_FIELD_CNT_SHIFT) +#define I40E_GLQF_ORT_FLX_PAYLOAD_SHIFT 7 +#define I40E_GLQF_ORT_FLX_PAYLOAD_MASK I40E_MASK(0x1, I40E_GLQF_ORT_FLX_PAYLOAD_SHIFT) +#define I40E_GLQF_PIT(_i) (0x00268C80 + ((_i) * 4)) /* _i=0...23 */ /* Reset: CORER */ +#define I40E_GLQF_PIT_MAX_INDEX 23 +#define I40E_GLQF_PIT_SOURCE_OFF_SHIFT 0 +#define I40E_GLQF_PIT_SOURCE_OFF_MASK I40E_MASK(0x1F, I40E_GLQF_PIT_SOURCE_OFF_SHIFT) +#define I40E_GLQF_PIT_FSIZE_SHIFT 5 +#define I40E_GLQF_PIT_FSIZE_MASK I40E_MASK(0x1F, I40E_GLQF_PIT_FSIZE_SHIFT) +#define I40E_GLQF_PIT_DEST_OFF_SHIFT 10 +#define I40E_GLQF_PIT_DEST_OFF_MASK I40E_MASK(0x3F, I40E_GLQF_PIT_DEST_OFF_SHIFT) #define I40E_GLQF_FDEVICTENA(_i) (0x00270384 + ((_i) * 4)) /* _i=0...1 */ /* Reset: CORER */ #define I40E_GLQF_FDEVICTENA_MAX_INDEX 1 #define I40E_GLQF_FDEVICTENA_GLQF_FDEVICTENA_SHIFT 0 diff --git a/drivers/net/ethernet/intel/i40e/i40e_txrx.c b/drivers/net/ethernet/intel/i40e/i40e_txrx.c index 47bd8b314..6a49b7ae5 100644 --- a/drivers/net/ethernet/intel/i40e/i40e_txrx.c +++ b/drivers/net/ethernet/intel/i40e/i40e_txrx.c @@ -1,7 +1,7 @@ /******************************************************************************* * * Intel Ethernet Controller XL710 Family Linux Driver - * Copyright(c) 2013 - 2014 Intel Corporation. + * Copyright(c) 2013 - 2016 Intel Corporation. * * This program is free software; you can redistribute it and/or modify it * under the terms and conditions of the GNU General Public License, @@ -610,15 +610,19 @@ void i40e_free_tx_resources(struct i40e_ring *tx_ring) /** * i40e_get_tx_pending - how many tx descriptors not processed * @tx_ring: the ring of descriptors + * @in_sw: is tx_pending being checked in SW or HW * * Since there is no access to the ring head register * in XL710, we need to use our local copies **/ -u32 i40e_get_tx_pending(struct i40e_ring *ring) +u32 i40e_get_tx_pending(struct i40e_ring *ring, bool in_sw) { u32 head, tail; - head = i40e_get_head(ring); + if (!in_sw) + head = i40e_get_head(ring); + else + head = ring->next_to_clean; tail = readl(ring->tail); if (head != tail) @@ -741,7 +745,7 @@ static bool i40e_clean_tx_irq(struct i40e_ring *tx_ring, int budget) * them to be written back in case we stay in NAPI. * In this mode on X722 we do not enable Interrupt. */ - j = i40e_get_tx_pending(tx_ring); + j = i40e_get_tx_pending(tx_ring, false); if (budget && ((j / (WB_STRIDE + 1)) == 0) && (j != 0) && @@ -774,29 +778,48 @@ static bool i40e_clean_tx_irq(struct i40e_ring *tx_ring, int budget) } /** - * i40e_force_wb - Arm hardware to do a wb on noncache aligned descriptors + * i40e_enable_wb_on_itr - Arm hardware to do a wb, interrupts are not enabled * @vsi: the VSI we care about - * @q_vector: the vector on which to force writeback + * @q_vector: the vector on which to enable writeback * **/ -void i40e_force_wb(struct i40e_vsi *vsi, struct i40e_q_vector *q_vector) +static void i40e_enable_wb_on_itr(struct i40e_vsi *vsi, + struct i40e_q_vector *q_vector) { u16 flags = q_vector->tx.ring[0].flags; + u32 val; - if (flags & I40E_TXR_FLAGS_WB_ON_ITR) { - u32 val; + if (!(flags & I40E_TXR_FLAGS_WB_ON_ITR)) + return; - if (q_vector->arm_wb_state) - return; + if (q_vector->arm_wb_state) + return; - val = I40E_PFINT_DYN_CTLN_WB_ON_ITR_MASK; + if (vsi->back->flags & I40E_FLAG_MSIX_ENABLED) { + val = I40E_PFINT_DYN_CTLN_WB_ON_ITR_MASK | + I40E_PFINT_DYN_CTLN_ITR_INDX_MASK; /* set noitr */ wr32(&vsi->back->hw, - I40E_PFINT_DYN_CTLN(q_vector->v_idx + - vsi->base_vector - 1), + I40E_PFINT_DYN_CTLN(q_vector->v_idx + vsi->base_vector - 1), val); - q_vector->arm_wb_state = true; - } else if (vsi->back->flags & I40E_FLAG_MSIX_ENABLED) { + } else { + val = I40E_PFINT_DYN_CTL0_WB_ON_ITR_MASK | + I40E_PFINT_DYN_CTL0_ITR_INDX_MASK; /* set noitr */ + + wr32(&vsi->back->hw, I40E_PFINT_DYN_CTL0, val); + } + q_vector->arm_wb_state = true; +} + +/** + * i40e_force_wb - Issue SW Interrupt so HW does a wb + * @vsi: the VSI we care about + * @q_vector: the vector on which to force writeback + * + **/ +void i40e_force_wb(struct i40e_vsi *vsi, struct i40e_q_vector *q_vector) +{ + if (vsi->back->flags & I40E_FLAG_MSIX_ENABLED) { u32 val = I40E_PFINT_DYN_CTLN_INTENA_MASK | I40E_PFINT_DYN_CTLN_ITR_INDX_MASK | /* set noitr */ I40E_PFINT_DYN_CTLN_SWINT_TRIG_MASK | @@ -1041,7 +1064,7 @@ void i40e_clean_rx_ring(struct i40e_ring *rx_ring) if (rx_bi->page_dma) { dma_unmap_page(dev, rx_bi->page_dma, - PAGE_SIZE / 2, + PAGE_SIZE, DMA_FROM_DEVICE); rx_bi->page_dma = 0; } @@ -1176,16 +1199,19 @@ static inline void i40e_release_rx_desc(struct i40e_ring *rx_ring, u32 val) * i40e_alloc_rx_buffers_ps - Replace used receive buffers; packet split * @rx_ring: ring to place buffers on * @cleaned_count: number of buffers to replace + * + * Returns true if any errors on allocation **/ -void i40e_alloc_rx_buffers_ps(struct i40e_ring *rx_ring, u16 cleaned_count) +bool i40e_alloc_rx_buffers_ps(struct i40e_ring *rx_ring, u16 cleaned_count) { u16 i = rx_ring->next_to_use; union i40e_rx_desc *rx_desc; struct i40e_rx_buffer *bi; + const int current_node = numa_node_id(); /* do nothing if no valid netdev defined */ if (!rx_ring->netdev || !cleaned_count) - return; + return false; while (cleaned_count--) { rx_desc = I40E_RX_DESC(rx_ring, i); @@ -1193,56 +1219,79 @@ void i40e_alloc_rx_buffers_ps(struct i40e_ring *rx_ring, u16 cleaned_count) if (bi->skb) /* desc is in use */ goto no_buffers; + + /* If we've been moved to a different NUMA node, release the + * page so we can get a new one on the current node. + */ + if (bi->page && page_to_nid(bi->page) != current_node) { + dma_unmap_page(rx_ring->dev, + bi->page_dma, + PAGE_SIZE, + DMA_FROM_DEVICE); + __free_page(bi->page); + bi->page = NULL; + bi->page_dma = 0; + rx_ring->rx_stats.realloc_count++; + } else if (bi->page) { + rx_ring->rx_stats.page_reuse_count++; + } + if (!bi->page) { bi->page = alloc_page(GFP_ATOMIC); if (!bi->page) { rx_ring->rx_stats.alloc_page_failed++; goto no_buffers; } - } - - if (!bi->page_dma) { - /* use a half page if we're re-using */ - bi->page_offset ^= PAGE_SIZE / 2; bi->page_dma = dma_map_page(rx_ring->dev, bi->page, - bi->page_offset, - PAGE_SIZE / 2, + 0, + PAGE_SIZE, DMA_FROM_DEVICE); - if (dma_mapping_error(rx_ring->dev, - bi->page_dma)) { + if (dma_mapping_error(rx_ring->dev, bi->page_dma)) { rx_ring->rx_stats.alloc_page_failed++; + __free_page(bi->page); + bi->page = NULL; bi->page_dma = 0; + bi->page_offset = 0; goto no_buffers; } + bi->page_offset = 0; } - dma_sync_single_range_for_device(rx_ring->dev, - bi->dma, - 0, - rx_ring->rx_hdr_len, - DMA_FROM_DEVICE); /* Refresh the desc even if buffer_addrs didn't change * because each write-back erases this info. */ - rx_desc->read.pkt_addr = cpu_to_le64(bi->page_dma); + rx_desc->read.pkt_addr = + cpu_to_le64(bi->page_dma + bi->page_offset); rx_desc->read.hdr_addr = cpu_to_le64(bi->dma); i++; if (i == rx_ring->count) i = 0; } + if (rx_ring->next_to_use != i) + i40e_release_rx_desc(rx_ring, i); + + return false; + no_buffers: if (rx_ring->next_to_use != i) i40e_release_rx_desc(rx_ring, i); + + /* make sure to come back via polling to try again after + * allocation failure + */ + return true; } /** * i40e_alloc_rx_buffers_1buf - Replace used receive buffers; single buffer * @rx_ring: ring to place buffers on * @cleaned_count: number of buffers to replace + * + * Returns true if any errors on allocation **/ -void i40e_alloc_rx_buffers_1buf(struct i40e_ring *rx_ring, u16 cleaned_count) +bool i40e_alloc_rx_buffers_1buf(struct i40e_ring *rx_ring, u16 cleaned_count) { u16 i = rx_ring->next_to_use; union i40e_rx_desc *rx_desc; @@ -1251,7 +1300,7 @@ void i40e_alloc_rx_buffers_1buf(struct i40e_ring *rx_ring, u16 cleaned_count) /* do nothing if no valid netdev defined */ if (!rx_ring->netdev || !cleaned_count) - return; + return false; while (cleaned_count--) { rx_desc = I40E_RX_DESC(rx_ring, i); @@ -1259,8 +1308,10 @@ void i40e_alloc_rx_buffers_1buf(struct i40e_ring *rx_ring, u16 cleaned_count) skb = bi->skb; if (!skb) { - skb = netdev_alloc_skb_ip_align(rx_ring->netdev, - rx_ring->rx_buf_len); + skb = __netdev_alloc_skb_ip_align(rx_ring->netdev, + rx_ring->rx_buf_len, + GFP_ATOMIC | + __GFP_NOWARN); if (!skb) { rx_ring->rx_stats.alloc_buff_failed++; goto no_buffers; @@ -1278,6 +1329,8 @@ void i40e_alloc_rx_buffers_1buf(struct i40e_ring *rx_ring, u16 cleaned_count) if (dma_mapping_error(rx_ring->dev, bi->dma)) { rx_ring->rx_stats.alloc_buff_failed++; bi->dma = 0; + dev_kfree_skb(bi->skb); + bi->skb = NULL; goto no_buffers; } } @@ -1289,9 +1342,19 @@ void i40e_alloc_rx_buffers_1buf(struct i40e_ring *rx_ring, u16 cleaned_count) i = 0; } + if (rx_ring->next_to_use != i) + i40e_release_rx_desc(rx_ring, i); + + return false; + no_buffers: if (rx_ring->next_to_use != i) i40e_release_rx_desc(rx_ring, i); + + /* make sure to come back via polling to try again after + * allocation failure + */ + return true; } /** @@ -1326,16 +1389,7 @@ static inline void i40e_rx_checksum(struct i40e_vsi *vsi, u16 rx_ptype) { struct i40e_rx_ptype_decoded decoded = decode_rx_desc_ptype(rx_ptype); - bool ipv4 = false, ipv6 = false; - bool ipv4_tunnel, ipv6_tunnel; - __wsum rx_udp_csum; - struct iphdr *iph; - __sum16 csum; - - ipv4_tunnel = (rx_ptype >= I40E_RX_PTYPE_GRENAT4_MAC_PAY3) && - (rx_ptype <= I40E_RX_PTYPE_GRENAT4_MACVLAN_IPV6_ICMP_PAY4); - ipv6_tunnel = (rx_ptype >= I40E_RX_PTYPE_GRENAT6_MAC_PAY3) && - (rx_ptype <= I40E_RX_PTYPE_GRENAT6_MACVLAN_IPV6_ICMP_PAY4); + bool ipv4, ipv6, ipv4_tunnel, ipv6_tunnel; skb->ip_summed = CHECKSUM_NONE; @@ -1351,12 +1405,10 @@ static inline void i40e_rx_checksum(struct i40e_vsi *vsi, if (!(decoded.known && decoded.outer_ip)) return; - if (decoded.outer_ip == I40E_RX_PTYPE_OUTER_IP && - decoded.outer_ip_ver == I40E_RX_PTYPE_OUTER_IPV4) - ipv4 = true; - else if (decoded.outer_ip == I40E_RX_PTYPE_OUTER_IP && - decoded.outer_ip_ver == I40E_RX_PTYPE_OUTER_IPV6) - ipv6 = true; + ipv4 = (decoded.outer_ip == I40E_RX_PTYPE_OUTER_IP) && + (decoded.outer_ip_ver == I40E_RX_PTYPE_OUTER_IPV4); + ipv6 = (decoded.outer_ip == I40E_RX_PTYPE_OUTER_IP) && + (decoded.outer_ip_ver == I40E_RX_PTYPE_OUTER_IPV6); if (ipv4 && (rx_error & (BIT(I40E_RX_DESC_ERROR_IPE_SHIFT) | @@ -1380,37 +1432,17 @@ static inline void i40e_rx_checksum(struct i40e_vsi *vsi, if (rx_error & BIT(I40E_RX_DESC_ERROR_PPRS_SHIFT)) return; - /* If VXLAN/GENEVE traffic has an outer UDPv4 checksum we need to check - * it in the driver, hardware does not do it for us. - * Since L3L4P bit was set we assume a valid IHL value (>=5) - * so the total length of IPv4 header is IHL*4 bytes - * The UDP_0 bit *may* bet set if the *inner* header is UDP + /* The hardware supported by this driver does not validate outer + * checksums for tunneled VXLAN or GENEVE frames. I don't agree + * with it but the specification states that you "MAY validate", it + * doesn't make it a hard requirement so if we have validated the + * inner checksum report CHECKSUM_UNNECESSARY. */ - if (!(vsi->back->flags & I40E_FLAG_OUTER_UDP_CSUM_CAPABLE) && - (ipv4_tunnel)) { - skb->transport_header = skb->mac_header + - sizeof(struct ethhdr) + - (ip_hdr(skb)->ihl * 4); - - /* Add 4 bytes for VLAN tagged packets */ - skb->transport_header += (skb->protocol == htons(ETH_P_8021Q) || - skb->protocol == htons(ETH_P_8021AD)) - ? VLAN_HLEN : 0; - - if ((ip_hdr(skb)->protocol == IPPROTO_UDP) && - (udp_hdr(skb)->check != 0)) { - rx_udp_csum = udp_csum(skb); - iph = ip_hdr(skb); - csum = csum_tcpudp_magic( - iph->saddr, iph->daddr, - (skb->len - skb_transport_offset(skb)), - IPPROTO_UDP, rx_udp_csum); - - if (udp_hdr(skb)->check != csum) - goto checksum_fail; - - } /* else its GRE and so no outer UDP header */ - } + + ipv4_tunnel = (rx_ptype >= I40E_RX_PTYPE_GRENAT4_MAC_PAY3) && + (rx_ptype <= I40E_RX_PTYPE_GRENAT4_MACVLAN_IPV6_ICMP_PAY4); + ipv6_tunnel = (rx_ptype >= I40E_RX_PTYPE_GRENAT6_MAC_PAY3) && + (rx_ptype <= I40E_RX_PTYPE_GRENAT6_MACVLAN_IPV6_ICMP_PAY4); skb->ip_summed = CHECKSUM_UNNECESSARY; skb->csum_level = ipv4_tunnel || ipv6_tunnel; @@ -1475,18 +1507,19 @@ static inline void i40e_rx_hash(struct i40e_ring *ring, * * Returns true if there's any budget left (e.g. the clean is finished) **/ -static int i40e_clean_rx_irq_ps(struct i40e_ring *rx_ring, int budget) +static int i40e_clean_rx_irq_ps(struct i40e_ring *rx_ring, const int budget) { unsigned int total_rx_bytes = 0, total_rx_packets = 0; u16 rx_packet_len, rx_header_len, rx_sph, rx_hbo; u16 cleaned_count = I40E_DESC_UNUSED(rx_ring); - const int current_node = numa_mem_id(); struct i40e_vsi *vsi = rx_ring->vsi; u16 i = rx_ring->next_to_clean; union i40e_rx_desc *rx_desc; u32 rx_error, rx_status; + bool failure = false; u8 rx_ptype; u64 qword; + u32 copysize; if (budget <= 0) return 0; @@ -1497,7 +1530,9 @@ static int i40e_clean_rx_irq_ps(struct i40e_ring *rx_ring, int budget) u16 vlan_tag; /* return some buffers to hardware, one at a time is too slow */ if (cleaned_count >= I40E_RX_BUFFER_WRITE) { - i40e_alloc_rx_buffers_ps(rx_ring, cleaned_count); + failure = failure || + i40e_alloc_rx_buffers_ps(rx_ring, + cleaned_count); cleaned_count = 0; } @@ -1515,6 +1550,12 @@ static int i40e_clean_rx_irq_ps(struct i40e_ring *rx_ring, int budget) * DD bit is set. */ dma_rmb(); + /* sync header buffer for reading */ + dma_sync_single_range_for_cpu(rx_ring->dev, + rx_ring->rx_bi[0].dma, + i * rx_ring->rx_hdr_len, + rx_ring->rx_hdr_len, + DMA_FROM_DEVICE); if (i40e_rx_is_programming_status(qword)) { i40e_clean_programming_status(rx_ring, rx_desc); I40E_RX_INCREMENT(rx_ring, i); @@ -1523,10 +1564,13 @@ static int i40e_clean_rx_irq_ps(struct i40e_ring *rx_ring, int budget) rx_bi = &rx_ring->rx_bi[i]; skb = rx_bi->skb; if (likely(!skb)) { - skb = netdev_alloc_skb_ip_align(rx_ring->netdev, - rx_ring->rx_hdr_len); + skb = __netdev_alloc_skb_ip_align(rx_ring->netdev, + rx_ring->rx_hdr_len, + GFP_ATOMIC | + __GFP_NOWARN); if (!skb) { rx_ring->rx_stats.alloc_buff_failed++; + failure = true; break; } @@ -1534,8 +1578,8 @@ static int i40e_clean_rx_irq_ps(struct i40e_ring *rx_ring, int budget) skb_record_rx_queue(skb, rx_ring->queue_index); /* we are reusing so sync this buffer for CPU use */ dma_sync_single_range_for_cpu(rx_ring->dev, - rx_bi->dma, - 0, + rx_ring->rx_bi[0].dma, + i * rx_ring->rx_hdr_len, rx_ring->rx_hdr_len, DMA_FROM_DEVICE); } @@ -1553,9 +1597,16 @@ static int i40e_clean_rx_irq_ps(struct i40e_ring *rx_ring, int budget) rx_ptype = (qword & I40E_RXD_QW1_PTYPE_MASK) >> I40E_RXD_QW1_PTYPE_SHIFT; - prefetch(rx_bi->page); + /* sync half-page for reading */ + dma_sync_single_range_for_cpu(rx_ring->dev, + rx_bi->page_dma, + rx_bi->page_offset, + PAGE_SIZE / 2, + DMA_FROM_DEVICE); + prefetch(page_address(rx_bi->page) + rx_bi->page_offset); rx_bi->skb = NULL; cleaned_count++; + copysize = 0; if (rx_hbo || rx_sph) { int len; @@ -1566,38 +1617,50 @@ static int i40e_clean_rx_irq_ps(struct i40e_ring *rx_ring, int budget) memcpy(__skb_put(skb, len), rx_bi->hdr_buf, len); } else if (skb->len == 0) { int len; + unsigned char *va = page_address(rx_bi->page) + + rx_bi->page_offset; - len = (rx_packet_len > skb_headlen(skb) ? - skb_headlen(skb) : rx_packet_len); - memcpy(__skb_put(skb, len), - rx_bi->page + rx_bi->page_offset, - len); - rx_bi->page_offset += len; + len = min(rx_packet_len, rx_ring->rx_hdr_len); + memcpy(__skb_put(skb, len), va, len); + copysize = len; rx_packet_len -= len; } - /* Get the rest of the data if this was a header split */ if (rx_packet_len) { - skb_fill_page_desc(skb, skb_shinfo(skb)->nr_frags, - rx_bi->page, - rx_bi->page_offset, - rx_packet_len); - - skb->len += rx_packet_len; - skb->data_len += rx_packet_len; - skb->truesize += rx_packet_len; - - if ((page_count(rx_bi->page) == 1) && - (page_to_nid(rx_bi->page) == current_node)) - get_page(rx_bi->page); - else + skb_add_rx_frag(skb, skb_shinfo(skb)->nr_frags, + rx_bi->page, + rx_bi->page_offset + copysize, + rx_packet_len, I40E_RXBUFFER_2048); + + /* If the page count is more than 2, then both halves + * of the page are used and we need to free it. Do it + * here instead of in the alloc code. Otherwise one + * of the half-pages might be released between now and + * then, and we wouldn't know which one to use. + * Don't call get_page and free_page since those are + * both expensive atomic operations that just change + * the refcount in opposite directions. Just give the + * page to the stack; he can have our refcount. + */ + if (page_count(rx_bi->page) > 2) { + dma_unmap_page(rx_ring->dev, + rx_bi->page_dma, + PAGE_SIZE, + DMA_FROM_DEVICE); rx_bi->page = NULL; + rx_bi->page_dma = 0; + rx_ring->rx_stats.realloc_count++; + } else { + get_page(rx_bi->page); + /* switch to the other half-page here; the + * allocation code programs the right addr + * into HW. If we haven't used this half-page, + * the address won't be changed, and HW can + * just use it next time through. + */ + rx_bi->page_offset ^= PAGE_SIZE / 2; + } - dma_unmap_page(rx_ring->dev, - rx_bi->page_dma, - PAGE_SIZE / 2, - DMA_FROM_DEVICE); - rx_bi->page_dma = 0; } I40E_RX_INCREMENT(rx_ring, i); @@ -1656,7 +1719,7 @@ static int i40e_clean_rx_irq_ps(struct i40e_ring *rx_ring, int budget) rx_ring->q_vector->rx.total_packets += total_rx_packets; rx_ring->q_vector->rx.total_bytes += total_rx_bytes; - return total_rx_packets; + return failure ? budget : total_rx_packets; } /** @@ -1674,6 +1737,7 @@ static int i40e_clean_rx_irq_1buf(struct i40e_ring *rx_ring, int budget) union i40e_rx_desc *rx_desc; u32 rx_error, rx_status; u16 rx_packet_len; + bool failure = false; u8 rx_ptype; u64 qword; u16 i; @@ -1684,7 +1748,9 @@ static int i40e_clean_rx_irq_1buf(struct i40e_ring *rx_ring, int budget) u16 vlan_tag; /* return some buffers to hardware, one at a time is too slow */ if (cleaned_count >= I40E_RX_BUFFER_WRITE) { - i40e_alloc_rx_buffers_1buf(rx_ring, cleaned_count); + failure = failure || + i40e_alloc_rx_buffers_1buf(rx_ring, + cleaned_count); cleaned_count = 0; } @@ -1783,7 +1849,7 @@ static int i40e_clean_rx_irq_1buf(struct i40e_ring *rx_ring, int budget) rx_ring->q_vector->rx.total_packets += total_rx_packets; rx_ring->q_vector->rx.total_bytes += total_rx_bytes; - return total_rx_packets; + return failure ? budget : total_rx_packets; } static u32 i40e_buildreg_itr(const int type, const u16 itr) @@ -1791,7 +1857,9 @@ static u32 i40e_buildreg_itr(const int type, const u16 itr) u32 val; val = I40E_PFINT_DYN_CTLN_INTENA_MASK | - I40E_PFINT_DYN_CTLN_CLEARPBA_MASK | + /* Don't clear PBA because that can cause lost interrupts that + * came in while we were cleaning/polling + */ (type << I40E_PFINT_DYN_CTLN_ITR_INDX_SHIFT) | (itr << I40E_PFINT_DYN_CTLN_INTERVAL_SHIFT); @@ -1814,6 +1882,7 @@ static inline void i40e_update_enable_itr(struct i40e_vsi *vsi, bool rx = false, tx = false; u32 rxval, txval; int vector; + int idx = q_vector->v_idx; vector = (q_vector->v_idx + vsi->base_vector); @@ -1823,17 +1892,17 @@ static inline void i40e_update_enable_itr(struct i40e_vsi *vsi, rxval = txval = i40e_buildreg_itr(I40E_ITR_NONE, 0); if (q_vector->itr_countdown > 0 || - (!ITR_IS_DYNAMIC(vsi->rx_itr_setting) && - !ITR_IS_DYNAMIC(vsi->tx_itr_setting))) { + (!ITR_IS_DYNAMIC(vsi->rx_rings[idx]->rx_itr_setting) && + !ITR_IS_DYNAMIC(vsi->tx_rings[idx]->tx_itr_setting))) { goto enable_int; } - if (ITR_IS_DYNAMIC(vsi->rx_itr_setting)) { + if (ITR_IS_DYNAMIC(vsi->rx_rings[idx]->rx_itr_setting)) { rx = i40e_set_new_dynamic_itr(&q_vector->rx); rxval = i40e_buildreg_itr(I40E_RX_ITR, q_vector->rx.itr); } - if (ITR_IS_DYNAMIC(vsi->tx_itr_setting)) { + if (ITR_IS_DYNAMIC(vsi->tx_rings[idx]->tx_itr_setting)) { tx = i40e_set_new_dynamic_itr(&q_vector->tx); txval = i40e_buildreg_itr(I40E_TX_ITR, q_vector->tx.itr); } @@ -1906,7 +1975,8 @@ int i40e_napi_poll(struct napi_struct *napi, int budget) * budget and be more aggressive about cleaning up the Tx descriptors. */ i40e_for_each_ring(ring, q_vector->tx) { - clean_complete &= i40e_clean_tx_irq(ring, vsi->work_limit); + clean_complete = clean_complete && + i40e_clean_tx_irq(ring, vsi->work_limit); arm_wb = arm_wb || ring->arm_wb; ring->arm_wb = false; } @@ -1930,7 +2000,7 @@ int i40e_napi_poll(struct napi_struct *napi, int budget) work_done += cleaned; /* if we didn't clean as many as budgeted, we must be done */ - clean_complete &= (budget_per_ring != cleaned); + clean_complete = clean_complete && (budget_per_ring > cleaned); } /* If work not completed, return budget and polling will return */ @@ -1938,7 +2008,7 @@ int i40e_napi_poll(struct napi_struct *napi, int budget) tx_only: if (arm_wb) { q_vector->tx.ring[0].tx_stats.tx_force_wb++; - i40e_force_wb(vsi, q_vector); + i40e_enable_wb_on_itr(vsi, q_vector); } return budget; } @@ -1951,20 +2021,7 @@ tx_only: if (vsi->back->flags & I40E_FLAG_MSIX_ENABLED) { i40e_update_enable_itr(vsi, q_vector); } else { /* Legacy mode */ - struct i40e_hw *hw = &vsi->back->hw; - /* We re-enable the queue 0 cause, but - * don't worry about dynamic_enable - * because we left it on for the other - * possible interrupts during napi - */ - u32 qval = rd32(hw, I40E_QINT_RQCTL(0)) | - I40E_QINT_RQCTL_CAUSE_ENA_MASK; - - wr32(hw, I40E_QINT_RQCTL(0), qval); - qval = rd32(hw, I40E_QINT_TQCTL(0)) | - I40E_QINT_TQCTL_CAUSE_ENA_MASK; - wr32(hw, I40E_QINT_TQCTL(0), qval); - i40e_irq_dynamic_enable_icr0(vsi->back); + i40e_irq_dynamic_enable_icr0(vsi->back, false); } return 0; } @@ -1974,10 +2031,9 @@ tx_only: * @tx_ring: ring to add programming descriptor to * @skb: send buffer * @tx_flags: send tx flags - * @protocol: wire protocol **/ static void i40e_atr(struct i40e_ring *tx_ring, struct sk_buff *skb, - u32 tx_flags, __be16 protocol) + u32 tx_flags) { struct i40e_filter_program_desc *fdir_desc; struct i40e_pf *pf = tx_ring->vsi->back; @@ -1989,6 +2045,7 @@ static void i40e_atr(struct i40e_ring *tx_ring, struct sk_buff *skb, struct tcphdr *th; unsigned int hlen; u32 flex_ptype, dtype_cmd; + int l4_proto; u16 i; /* make sure ATR is enabled */ @@ -2002,36 +2059,28 @@ static void i40e_atr(struct i40e_ring *tx_ring, struct sk_buff *skb, if (!tx_ring->atr_sample_rate) return; + /* Currently only IPv4/IPv6 with TCP is supported */ if (!(tx_flags & (I40E_TX_FLAGS_IPV4 | I40E_TX_FLAGS_IPV6))) return; - if (!(tx_flags & I40E_TX_FLAGS_UDP_TUNNEL)) { - /* snag network header to get L4 type and address */ - hdr.network = skb_network_header(skb); + /* snag network header to get L4 type and address */ + hdr.network = (tx_flags & I40E_TX_FLAGS_UDP_TUNNEL) ? + skb_inner_network_header(skb) : skb_network_header(skb); - /* Currently only IPv4/IPv6 with TCP is supported - * access ihl as u8 to avoid unaligned access on ia64 - */ - if (tx_flags & I40E_TX_FLAGS_IPV4) - hlen = (hdr.network[0] & 0x0F) << 2; - else if (protocol == htons(ETH_P_IPV6)) - hlen = sizeof(struct ipv6hdr); - else - return; + /* Note: tx_flags gets modified to reflect inner protocols in + * tx_enable_csum function if encap is enabled. + */ + if (tx_flags & I40E_TX_FLAGS_IPV4) { + /* access ihl as u8 to avoid unaligned access on ia64 */ + hlen = (hdr.network[0] & 0x0F) << 2; + l4_proto = hdr.ipv4->protocol; } else { - hdr.network = skb_inner_network_header(skb); - hlen = skb_inner_network_header_len(skb); + hlen = hdr.network - skb->data; + l4_proto = ipv6_find_hdr(skb, &hlen, IPPROTO_TCP, NULL, NULL); + hlen -= hdr.network - skb->data; } - /* Currently only IPv4/IPv6 with TCP is supported - * Note: tx_flags gets modified to reflect inner protocols in - * tx_enable_csum function if encap is enabled. - */ - if ((tx_flags & I40E_TX_FLAGS_IPV4) && - (hdr.ipv4->protocol != IPPROTO_TCP)) - return; - else if ((tx_flags & I40E_TX_FLAGS_IPV6) && - (hdr.ipv6->nexthdr != IPPROTO_TCP)) + if (l4_proto != IPPROTO_TCP) return; th = (struct tcphdr *)(hdr.network + hlen); @@ -2039,7 +2088,8 @@ static void i40e_atr(struct i40e_ring *tx_ring, struct sk_buff *skb, /* Due to lack of space, no more new filters can be programmed */ if (th->syn && (pf->auto_disable_flags & I40E_FLAG_FD_ATR_ENABLED)) return; - if (pf->flags & I40E_FLAG_HW_ATR_EVICT_CAPABLE) { + if ((pf->flags & I40E_FLAG_HW_ATR_EVICT_CAPABLE) && + (!(pf->auto_disable_flags & I40E_FLAG_HW_ATR_EVICT_CAPABLE))) { /* HW ATR eviction will take care of removing filters on FIN * and RST packets. */ @@ -2067,7 +2117,7 @@ static void i40e_atr(struct i40e_ring *tx_ring, struct sk_buff *skb, flex_ptype = (tx_ring->queue_index << I40E_TXD_FLTR_QW0_QINDEX_SHIFT) & I40E_TXD_FLTR_QW0_QINDEX_MASK; - flex_ptype |= (protocol == htons(ETH_P_IP)) ? + flex_ptype |= (tx_flags & I40E_TX_FLAGS_IPV4) ? (I40E_FILTER_PCTYPE_NONF_IPV4_TCP << I40E_TXD_FLTR_QW0_PCTYPE_SHIFT) : (I40E_FILTER_PCTYPE_NONF_IPV6_TCP << @@ -2101,7 +2151,8 @@ static void i40e_atr(struct i40e_ring *tx_ring, struct sk_buff *skb, I40E_TXD_FLTR_QW1_CNTINDEX_SHIFT) & I40E_TXD_FLTR_QW1_CNTINDEX_MASK; - if (pf->flags & I40E_FLAG_HW_ATR_EVICT_CAPABLE) + if ((pf->flags & I40E_FLAG_HW_ATR_EVICT_CAPABLE) && + (!(pf->auto_disable_flags & I40E_FLAG_HW_ATR_EVICT_CAPABLE))) dtype_cmd |= I40E_TXD_FLTR_QW1_ATR_MASK; fdir_desc->qindex_flex_ptype_vsi = cpu_to_le32(flex_ptype); @@ -2206,13 +2257,23 @@ out: static int i40e_tso(struct i40e_ring *tx_ring, struct sk_buff *skb, u8 *hdr_len, u64 *cd_type_cmd_tso_mss) { - u32 cd_cmd, cd_tso_len, cd_mss; - struct ipv6hdr *ipv6h; - struct tcphdr *tcph; - struct iphdr *iph; - u32 l4len; + u64 cd_cmd, cd_tso_len, cd_mss; + union { + struct iphdr *v4; + struct ipv6hdr *v6; + unsigned char *hdr; + } ip; + union { + struct tcphdr *tcp; + struct udphdr *udp; + unsigned char *hdr; + } l4; + u32 paylen, l4_offset; int err; + if (skb->ip_summed != CHECKSUM_PARTIAL) + return 0; + if (!skb_is_gso(skb)) return 0; @@ -2220,35 +2281,60 @@ static int i40e_tso(struct i40e_ring *tx_ring, struct sk_buff *skb, if (err < 0) return err; - iph = skb->encapsulation ? inner_ip_hdr(skb) : ip_hdr(skb); - ipv6h = skb->encapsulation ? inner_ipv6_hdr(skb) : ipv6_hdr(skb); - - if (iph->version == 4) { - tcph = skb->encapsulation ? inner_tcp_hdr(skb) : tcp_hdr(skb); - iph->tot_len = 0; - iph->check = 0; - tcph->check = ~csum_tcpudp_magic(iph->saddr, iph->daddr, - 0, IPPROTO_TCP, 0); - } else if (ipv6h->version == 6) { - tcph = skb->encapsulation ? inner_tcp_hdr(skb) : tcp_hdr(skb); - ipv6h->payload_len = 0; - tcph->check = ~csum_ipv6_magic(&ipv6h->saddr, &ipv6h->daddr, - 0, IPPROTO_TCP, 0); + ip.hdr = skb_network_header(skb); + l4.hdr = skb_transport_header(skb); + + /* initialize outer IP header fields */ + if (ip.v4->version == 4) { + ip.v4->tot_len = 0; + ip.v4->check = 0; + } else { + ip.v6->payload_len = 0; } - l4len = skb->encapsulation ? inner_tcp_hdrlen(skb) : tcp_hdrlen(skb); - *hdr_len = (skb->encapsulation - ? (skb_inner_transport_header(skb) - skb->data) - : skb_transport_offset(skb)) + l4len; + if (skb_shinfo(skb)->gso_type & (SKB_GSO_UDP_TUNNEL | SKB_GSO_GRE | + SKB_GSO_UDP_TUNNEL_CSUM)) { + if (skb_shinfo(skb)->gso_type & SKB_GSO_UDP_TUNNEL_CSUM) { + /* determine offset of outer transport header */ + l4_offset = l4.hdr - skb->data; + + /* remove payload length from outer checksum */ + paylen = (__force u16)l4.udp->check; + paylen += ntohs(1) * (u16)~(skb->len - l4_offset); + l4.udp->check = ~csum_fold((__force __wsum)paylen); + } + + /* reset pointers to inner headers */ + ip.hdr = skb_inner_network_header(skb); + l4.hdr = skb_inner_transport_header(skb); + + /* initialize inner IP header fields */ + if (ip.v4->version == 4) { + ip.v4->tot_len = 0; + ip.v4->check = 0; + } else { + ip.v6->payload_len = 0; + } + } + + /* determine offset of inner transport header */ + l4_offset = l4.hdr - skb->data; + + /* remove payload length from inner checksum */ + paylen = (__force u16)l4.tcp->check; + paylen += ntohs(1) * (u16)~(skb->len - l4_offset); + l4.tcp->check = ~csum_fold((__force __wsum)paylen); + + /* compute length of segmentation header */ + *hdr_len = (l4.tcp->doff * 4) + l4_offset; /* find the field values */ cd_cmd = I40E_TX_CTX_DESC_TSO; cd_tso_len = skb->len - *hdr_len; cd_mss = skb_shinfo(skb)->gso_size; - *cd_type_cmd_tso_mss |= ((u64)cd_cmd << I40E_TXD_CTX_QW1_CMD_SHIFT) | - ((u64)cd_tso_len << - I40E_TXD_CTX_QW1_TSO_LEN_SHIFT) | - ((u64)cd_mss << I40E_TXD_CTX_QW1_MSS_SHIFT); + *cd_type_cmd_tso_mss |= (cd_cmd << I40E_TXD_CTX_QW1_CMD_SHIFT) | + (cd_tso_len << I40E_TXD_CTX_QW1_TSO_LEN_SHIFT) | + (cd_mss << I40E_TXD_CTX_QW1_MSS_SHIFT); return 1; } @@ -2303,129 +2389,154 @@ static int i40e_tsyn(struct i40e_ring *tx_ring, struct sk_buff *skb, * @tx_ring: Tx descriptor ring * @cd_tunneling: ptr to context desc bits **/ -static void i40e_tx_enable_csum(struct sk_buff *skb, u32 *tx_flags, - u32 *td_cmd, u32 *td_offset, - struct i40e_ring *tx_ring, - u32 *cd_tunneling) +static int i40e_tx_enable_csum(struct sk_buff *skb, u32 *tx_flags, + u32 *td_cmd, u32 *td_offset, + struct i40e_ring *tx_ring, + u32 *cd_tunneling) { - struct ipv6hdr *this_ipv6_hdr; - unsigned int this_tcp_hdrlen; - struct iphdr *this_ip_hdr; - u32 network_hdr_len; - u8 l4_hdr = 0; - struct udphdr *oudph = NULL; - struct iphdr *oiph = NULL; - u32 l4_tunnel = 0; + union { + struct iphdr *v4; + struct ipv6hdr *v6; + unsigned char *hdr; + } ip; + union { + struct tcphdr *tcp; + struct udphdr *udp; + unsigned char *hdr; + } l4; + unsigned char *exthdr; + u32 offset, cmd = 0, tunnel = 0; + __be16 frag_off; + u8 l4_proto = 0; + + if (skb->ip_summed != CHECKSUM_PARTIAL) + return 0; + + ip.hdr = skb_network_header(skb); + l4.hdr = skb_transport_header(skb); + + /* compute outer L2 header size */ + offset = ((ip.hdr - skb->data) / 2) << I40E_TX_DESC_LENGTH_MACLEN_SHIFT; if (skb->encapsulation) { - switch (ip_hdr(skb)->protocol) { + /* define outer network header type */ + if (*tx_flags & I40E_TX_FLAGS_IPV4) { + tunnel |= (*tx_flags & I40E_TX_FLAGS_TSO) ? + I40E_TX_CTX_EXT_IP_IPV4 : + I40E_TX_CTX_EXT_IP_IPV4_NO_CSUM; + + l4_proto = ip.v4->protocol; + } else if (*tx_flags & I40E_TX_FLAGS_IPV6) { + tunnel |= I40E_TX_CTX_EXT_IP_IPV6; + + exthdr = ip.hdr + sizeof(*ip.v6); + l4_proto = ip.v6->nexthdr; + if (l4.hdr != exthdr) + ipv6_skip_exthdr(skb, exthdr - skb->data, + &l4_proto, &frag_off); + } + + /* compute outer L3 header size */ + tunnel |= ((l4.hdr - ip.hdr) / 4) << + I40E_TXD_CTX_QW0_EXT_IPLEN_SHIFT; + + /* switch IP header pointer from outer to inner header */ + ip.hdr = skb_inner_network_header(skb); + + /* define outer transport */ + switch (l4_proto) { case IPPROTO_UDP: - oudph = udp_hdr(skb); - oiph = ip_hdr(skb); - l4_tunnel = I40E_TXD_CTX_UDP_TUNNELING; + tunnel |= I40E_TXD_CTX_UDP_TUNNELING; *tx_flags |= I40E_TX_FLAGS_UDP_TUNNEL; break; case IPPROTO_GRE: - l4_tunnel = I40E_TXD_CTX_GRE_TUNNELING; + tunnel |= I40E_TXD_CTX_GRE_TUNNELING; + *tx_flags |= I40E_TX_FLAGS_UDP_TUNNEL; break; default: - return; - } - network_hdr_len = skb_inner_network_header_len(skb); - this_ip_hdr = inner_ip_hdr(skb); - this_ipv6_hdr = inner_ipv6_hdr(skb); - this_tcp_hdrlen = inner_tcp_hdrlen(skb); - - if (*tx_flags & I40E_TX_FLAGS_IPV4) { - if (*tx_flags & I40E_TX_FLAGS_TSO) { - *cd_tunneling |= I40E_TX_CTX_EXT_IP_IPV4; - ip_hdr(skb)->check = 0; - } else { - *cd_tunneling |= - I40E_TX_CTX_EXT_IP_IPV4_NO_CSUM; - } - } else if (*tx_flags & I40E_TX_FLAGS_IPV6) { - *cd_tunneling |= I40E_TX_CTX_EXT_IP_IPV6; if (*tx_flags & I40E_TX_FLAGS_TSO) - ip_hdr(skb)->check = 0; + return -1; + + skb_checksum_help(skb); + return 0; } - /* Now set the ctx descriptor fields */ - *cd_tunneling |= (skb_network_header_len(skb) >> 2) << - I40E_TXD_CTX_QW0_EXT_IPLEN_SHIFT | - l4_tunnel | - ((skb_inner_network_offset(skb) - - skb_transport_offset(skb)) >> 1) << - I40E_TXD_CTX_QW0_NATLEN_SHIFT; - if (this_ip_hdr->version == 6) { - *tx_flags &= ~I40E_TX_FLAGS_IPV4; + /* compute tunnel header size */ + tunnel |= ((ip.hdr - l4.hdr) / 2) << + I40E_TXD_CTX_QW0_NATLEN_SHIFT; + + /* indicate if we need to offload outer UDP header */ + if ((*tx_flags & I40E_TX_FLAGS_TSO) && + (skb_shinfo(skb)->gso_type & SKB_GSO_UDP_TUNNEL_CSUM)) + tunnel |= I40E_TXD_CTX_QW0_L4T_CS_MASK; + + /* record tunnel offload values */ + *cd_tunneling |= tunnel; + + /* switch L4 header pointer from outer to inner */ + l4.hdr = skb_inner_transport_header(skb); + l4_proto = 0; + + /* reset type as we transition from outer to inner headers */ + *tx_flags &= ~(I40E_TX_FLAGS_IPV4 | I40E_TX_FLAGS_IPV6); + if (ip.v4->version == 4) + *tx_flags |= I40E_TX_FLAGS_IPV4; + if (ip.v6->version == 6) *tx_flags |= I40E_TX_FLAGS_IPV6; - } - if ((tx_ring->flags & I40E_TXR_FLAGS_OUTER_UDP_CSUM) && - (l4_tunnel == I40E_TXD_CTX_UDP_TUNNELING) && - (*cd_tunneling & I40E_TXD_CTX_QW0_EXT_IP_MASK)) { - oudph->check = ~csum_tcpudp_magic(oiph->saddr, - oiph->daddr, - (skb->len - skb_transport_offset(skb)), - IPPROTO_UDP, 0); - *cd_tunneling |= I40E_TXD_CTX_QW0_L4T_CS_MASK; - } - } else { - network_hdr_len = skb_network_header_len(skb); - this_ip_hdr = ip_hdr(skb); - this_ipv6_hdr = ipv6_hdr(skb); - this_tcp_hdrlen = tcp_hdrlen(skb); } /* Enable IP checksum offloads */ if (*tx_flags & I40E_TX_FLAGS_IPV4) { - l4_hdr = this_ip_hdr->protocol; + l4_proto = ip.v4->protocol; /* the stack computes the IP header already, the only time we * need the hardware to recompute it is in the case of TSO. */ - if (*tx_flags & I40E_TX_FLAGS_TSO) { - *td_cmd |= I40E_TX_DESC_CMD_IIPT_IPV4_CSUM; - this_ip_hdr->check = 0; - } else { - *td_cmd |= I40E_TX_DESC_CMD_IIPT_IPV4; - } - /* Now set the td_offset for IP header length */ - *td_offset = (network_hdr_len >> 2) << - I40E_TX_DESC_LENGTH_IPLEN_SHIFT; + cmd |= (*tx_flags & I40E_TX_FLAGS_TSO) ? + I40E_TX_DESC_CMD_IIPT_IPV4_CSUM : + I40E_TX_DESC_CMD_IIPT_IPV4; } else if (*tx_flags & I40E_TX_FLAGS_IPV6) { - l4_hdr = this_ipv6_hdr->nexthdr; - *td_cmd |= I40E_TX_DESC_CMD_IIPT_IPV6; - /* Now set the td_offset for IP header length */ - *td_offset = (network_hdr_len >> 2) << - I40E_TX_DESC_LENGTH_IPLEN_SHIFT; + cmd |= I40E_TX_DESC_CMD_IIPT_IPV6; + + exthdr = ip.hdr + sizeof(*ip.v6); + l4_proto = ip.v6->nexthdr; + if (l4.hdr != exthdr) + ipv6_skip_exthdr(skb, exthdr - skb->data, + &l4_proto, &frag_off); } - /* words in MACLEN + dwords in IPLEN + dwords in L4Len */ - *td_offset |= (skb_network_offset(skb) >> 1) << - I40E_TX_DESC_LENGTH_MACLEN_SHIFT; + + /* compute inner L3 header size */ + offset |= ((l4.hdr - ip.hdr) / 4) << I40E_TX_DESC_LENGTH_IPLEN_SHIFT; /* Enable L4 checksum offloads */ - switch (l4_hdr) { + switch (l4_proto) { case IPPROTO_TCP: /* enable checksum offloads */ - *td_cmd |= I40E_TX_DESC_CMD_L4T_EOFT_TCP; - *td_offset |= (this_tcp_hdrlen >> 2) << - I40E_TX_DESC_LENGTH_L4_FC_LEN_SHIFT; + cmd |= I40E_TX_DESC_CMD_L4T_EOFT_TCP; + offset |= l4.tcp->doff << I40E_TX_DESC_LENGTH_L4_FC_LEN_SHIFT; break; case IPPROTO_SCTP: /* enable SCTP checksum offload */ - *td_cmd |= I40E_TX_DESC_CMD_L4T_EOFT_SCTP; - *td_offset |= (sizeof(struct sctphdr) >> 2) << - I40E_TX_DESC_LENGTH_L4_FC_LEN_SHIFT; + cmd |= I40E_TX_DESC_CMD_L4T_EOFT_SCTP; + offset |= (sizeof(struct sctphdr) >> 2) << + I40E_TX_DESC_LENGTH_L4_FC_LEN_SHIFT; break; case IPPROTO_UDP: /* enable UDP checksum offload */ - *td_cmd |= I40E_TX_DESC_CMD_L4T_EOFT_UDP; - *td_offset |= (sizeof(struct udphdr) >> 2) << - I40E_TX_DESC_LENGTH_L4_FC_LEN_SHIFT; + cmd |= I40E_TX_DESC_CMD_L4T_EOFT_UDP; + offset |= (sizeof(struct udphdr) >> 2) << + I40E_TX_DESC_LENGTH_L4_FC_LEN_SHIFT; break; default: - break; + if (*tx_flags & I40E_TX_FLAGS_TSO) + return -1; + skb_checksum_help(skb); + return 0; } + + *td_cmd |= cmd; + *td_offset |= offset; + + return 1; } /** @@ -2466,7 +2577,7 @@ static void i40e_create_tx_ctx(struct i40e_ring *tx_ring, * * Returns -EBUSY if a stop is needed, else 0 **/ -static inline int __i40e_maybe_stop_tx(struct i40e_ring *tx_ring, int size) +int __i40e_maybe_stop_tx(struct i40e_ring *tx_ring, int size) { netif_stop_subqueue(tx_ring->netdev, tx_ring->queue_index); /* Memory barrier before checking head and tail */ @@ -2483,77 +2594,70 @@ static inline int __i40e_maybe_stop_tx(struct i40e_ring *tx_ring, int size) } /** - * i40e_maybe_stop_tx - 1st level check for tx stop conditions - * @tx_ring: the ring to be checked - * @size: the size buffer we want to assure is available - * - * Returns 0 if stop is not needed - **/ -#ifdef I40E_FCOE -inline int i40e_maybe_stop_tx(struct i40e_ring *tx_ring, int size) -#else -static inline int i40e_maybe_stop_tx(struct i40e_ring *tx_ring, int size) -#endif -{ - if (likely(I40E_DESC_UNUSED(tx_ring) >= size)) - return 0; - return __i40e_maybe_stop_tx(tx_ring, size); -} - -/** - * i40e_chk_linearize - Check if there are more than 8 fragments per packet + * __i40e_chk_linearize - Check if there are more than 8 buffers per packet * @skb: send buffer - * @tx_flags: collected send information * - * Note: Our HW can't scatter-gather more than 8 fragments to build - * a packet on the wire and so we need to figure out the cases where we - * need to linearize the skb. + * Note: Our HW can't DMA more than 8 buffers to build a packet on the wire + * and so we need to figure out the cases where we need to linearize the skb. + * + * For TSO we need to count the TSO header and segment payload separately. + * As such we need to check cases where we have 7 fragments or more as we + * can potentially require 9 DMA transactions, 1 for the TSO header, 1 for + * the segment payload in the first descriptor, and another 7 for the + * fragments. **/ -static bool i40e_chk_linearize(struct sk_buff *skb, u32 tx_flags) +bool __i40e_chk_linearize(struct sk_buff *skb) { - struct skb_frag_struct *frag; - bool linearize = false; - unsigned int size = 0; - u16 num_frags; - u16 gso_segs; + const struct skb_frag_struct *frag, *stale; + int nr_frags, sum; - num_frags = skb_shinfo(skb)->nr_frags; - gso_segs = skb_shinfo(skb)->gso_segs; + /* no need to check if number of frags is less than 7 */ + nr_frags = skb_shinfo(skb)->nr_frags; + if (nr_frags < (I40E_MAX_BUFFER_TXD - 1)) + return false; - if (tx_flags & (I40E_TX_FLAGS_TSO | I40E_TX_FLAGS_FSO)) { - u16 j = 0; + /* We need to walk through the list and validate that each group + * of 6 fragments totals at least gso_size. However we don't need + * to perform such validation on the last 6 since the last 6 cannot + * inherit any data from a descriptor after them. + */ + nr_frags -= I40E_MAX_BUFFER_TXD - 2; + frag = &skb_shinfo(skb)->frags[0]; + + /* Initialize size to the negative value of gso_size minus 1. We + * use this as the worst case scenerio in which the frag ahead + * of us only provides one byte which is why we are limited to 6 + * descriptors for a single transmit as the header and previous + * fragment are already consuming 2 descriptors. + */ + sum = 1 - skb_shinfo(skb)->gso_size; - if (num_frags < (I40E_MAX_BUFFER_TXD)) - goto linearize_chk_done; - /* try the simple math, if we have too many frags per segment */ - if (DIV_ROUND_UP((num_frags + gso_segs), gso_segs) > - I40E_MAX_BUFFER_TXD) { - linearize = true; - goto linearize_chk_done; - } - frag = &skb_shinfo(skb)->frags[0]; - /* we might still have more fragments per segment */ - do { - size += skb_frag_size(frag); - frag++; j++; - if ((size >= skb_shinfo(skb)->gso_size) && - (j < I40E_MAX_BUFFER_TXD)) { - size = (size % skb_shinfo(skb)->gso_size); - j = (size) ? 1 : 0; - } - if (j == I40E_MAX_BUFFER_TXD) { - linearize = true; - break; - } - num_frags--; - } while (num_frags); - } else { - if (num_frags >= I40E_MAX_BUFFER_TXD) - linearize = true; + /* Add size of frags 0 through 4 to create our initial sum */ + sum += skb_frag_size(frag++); + sum += skb_frag_size(frag++); + sum += skb_frag_size(frag++); + sum += skb_frag_size(frag++); + sum += skb_frag_size(frag++); + + /* Walk through fragments adding latest fragment, testing it, and + * then removing stale fragments from the sum. + */ + stale = &skb_shinfo(skb)->frags[0]; + for (;;) { + sum += skb_frag_size(frag++); + + /* if sum is negative we failed to make sufficient progress */ + if (sum < 0) + return true; + + /* use pre-decrement to avoid processing last fragment */ + if (!--nr_frags) + break; + + sum -= skb_frag_size(stale++); } -linearize_chk_done: - return linearize; + return false; } /** @@ -2759,43 +2863,6 @@ dma_error: tx_ring->next_to_use = i; } -/** - * i40e_xmit_descriptor_count - calculate number of tx descriptors needed - * @skb: send buffer - * @tx_ring: ring to send buffer on - * - * Returns number of data descriptors needed for this skb. Returns 0 to indicate - * there is not enough descriptors available in this ring since we need at least - * one descriptor. - **/ -#ifdef I40E_FCOE -inline int i40e_xmit_descriptor_count(struct sk_buff *skb, - struct i40e_ring *tx_ring) -#else -static inline int i40e_xmit_descriptor_count(struct sk_buff *skb, - struct i40e_ring *tx_ring) -#endif -{ - unsigned int f; - int count = 0; - - /* need: 1 descriptor per page * PAGE_SIZE/I40E_MAX_DATA_PER_TXD, - * + 1 desc for skb_head_len/I40E_MAX_DATA_PER_TXD, - * + 4 desc gap to avoid the cache line where head is, - * + 1 desc for context descriptor, - * otherwise try next time - */ - for (f = 0; f < skb_shinfo(skb)->nr_frags; f++) - count += TXD_USE_COUNT(skb_shinfo(skb)->frags[f].size); - - count += TXD_USE_COUNT(skb_headlen(skb)); - if (i40e_maybe_stop_tx(tx_ring, count + 4 + 1)) { - tx_ring->tx_stats.tx_busy++; - return 0; - } - return count; -} - /** * i40e_xmit_frame_ring - Sends buffer on Tx ring * @skb: send buffer @@ -2814,14 +2881,30 @@ static netdev_tx_t i40e_xmit_frame_ring(struct sk_buff *skb, __be16 protocol; u32 td_cmd = 0; u8 hdr_len = 0; + int tso, count; int tsyn; - int tso; /* prefetch the data, we'll need it later */ prefetch(skb->data); - if (0 == i40e_xmit_descriptor_count(skb, tx_ring)) + count = i40e_xmit_descriptor_count(skb); + if (i40e_chk_linearize(skb, count)) { + if (__skb_linearize(skb)) + goto out_drop; + count = TXD_USE_COUNT(skb->len); + tx_ring->tx_stats.tx_linearize++; + } + + /* need: 1 descriptor per page * PAGE_SIZE/I40E_MAX_DATA_PER_TXD, + * + 1 desc for skb_head_len/I40E_MAX_DATA_PER_TXD, + * + 4 desc gap to avoid the cache line where head is, + * + 1 desc for context descriptor, + * otherwise try next time + */ + if (i40e_maybe_stop_tx(tx_ring, count + 4 + 1)) { + tx_ring->tx_stats.tx_busy++; return NETDEV_TX_BUSY; + } /* prepare the xmit flags */ if (i40e_tx_prepare_vlan_flags(skb, tx_ring, &tx_flags)) @@ -2846,29 +2929,22 @@ static netdev_tx_t i40e_xmit_frame_ring(struct sk_buff *skb, else if (tso) tx_flags |= I40E_TX_FLAGS_TSO; + /* Always offload the checksum, since it's in the data descriptor */ + tso = i40e_tx_enable_csum(skb, &tx_flags, &td_cmd, &td_offset, + tx_ring, &cd_tunneling); + if (tso < 0) + goto out_drop; + tsyn = i40e_tsyn(tx_ring, skb, tx_flags, &cd_type_cmd_tso_mss); if (tsyn) tx_flags |= I40E_TX_FLAGS_TSYN; - if (i40e_chk_linearize(skb, tx_flags)) { - if (skb_linearize(skb)) - goto out_drop; - tx_ring->tx_stats.tx_linearize++; - } skb_tx_timestamp(skb); /* always enable CRC insertion offload */ td_cmd |= I40E_TX_DESC_CMD_ICRC; - /* Always offload the checksum, since it's in the data descriptor */ - if (skb->ip_summed == CHECKSUM_PARTIAL) { - tx_flags |= I40E_TX_FLAGS_CSUM; - - i40e_tx_enable_csum(skb, &tx_flags, &td_cmd, &td_offset, - tx_ring, &cd_tunneling); - } - i40e_create_tx_ctx(tx_ring, cd_type_cmd_tso_mss, cd_tunneling, cd_l2tag2); @@ -2876,7 +2952,7 @@ static netdev_tx_t i40e_xmit_frame_ring(struct sk_buff *skb, * * NOTE: this must always be directly before the data descriptor. */ - i40e_atr(tx_ring, skb, tx_flags, protocol); + i40e_atr(tx_ring, skb, tx_flags); i40e_tx_map(tx_ring, skb, first, tx_flags, hdr_len, td_cmd, td_offset); diff --git a/drivers/net/ethernet/intel/i40e/i40e_txrx.h b/drivers/net/ethernet/intel/i40e/i40e_txrx.h index 3f081e25e..a9bd70537 100644 --- a/drivers/net/ethernet/intel/i40e/i40e_txrx.h +++ b/drivers/net/ethernet/intel/i40e/i40e_txrx.h @@ -1,7 +1,7 @@ /******************************************************************************* * * Intel Ethernet Controller XL710 Family Linux Driver - * Copyright(c) 2013 - 2014 Intel Corporation. + * Copyright(c) 2013 - 2016 Intel Corporation. * * This program is free software; you can redistribute it and/or modify it * under the terms and conditions of the GNU General Public License, @@ -153,7 +153,6 @@ enum i40e_dyn_idx_t { #define DESC_NEEDED (MAX_SKB_FRAGS + 4) #define I40E_MIN_DESC_PENDING 4 -#define I40E_TX_FLAGS_CSUM BIT(0) #define I40E_TX_FLAGS_HW_VLAN BIT(1) #define I40E_TX_FLAGS_SW_VLAN BIT(2) #define I40E_TX_FLAGS_TSO BIT(3) @@ -203,12 +202,15 @@ struct i40e_tx_queue_stats { u64 tx_done_old; u64 tx_linearize; u64 tx_force_wb; + u64 tx_lost_interrupt; }; struct i40e_rx_queue_stats { u64 non_eop_descs; u64 alloc_page_failed; u64 alloc_buff_failed; + u64 page_reuse_count; + u64 realloc_count; }; enum i40e_ring_state_t { @@ -246,6 +248,14 @@ struct i40e_ring { u8 dcb_tc; /* Traffic class of ring */ u8 __iomem *tail; + /* high bit set means dynamic, use accessor routines to read/write. + * hardware only supports 2us resolution for the ITR registers. + * these values always store the USER setting, and must be converted + * before programming to a register. + */ + u16 rx_itr_setting; + u16 tx_itr_setting; + u16 count; /* Number of descriptors */ u16 reg_idx; /* HW register index of the ring */ u16 rx_hdr_len; @@ -254,7 +264,6 @@ struct i40e_ring { #define I40E_RX_DTYPE_NO_SPLIT 0 #define I40E_RX_DTYPE_HEADER_SPLIT 1 #define I40E_RX_DTYPE_SPLIT_ALWAYS 2 - u8 hsplit; #define I40E_RX_SPLIT_L2 0x1 #define I40E_RX_SPLIT_IP 0x2 #define I40E_RX_SPLIT_TCP_UDP 0x4 @@ -275,7 +284,6 @@ struct i40e_ring { u16 flags; #define I40E_TXR_FLAGS_WB_ON_ITR BIT(0) -#define I40E_TXR_FLAGS_OUTER_UDP_CSUM BIT(1) #define I40E_TXR_FLAGS_LAST_XMIT_MORE_SET BIT(2) /* stats structs */ @@ -316,8 +324,8 @@ struct i40e_ring_container { #define i40e_for_each_ring(pos, head) \ for (pos = (head).ring; pos != NULL; pos = pos->next) -void i40e_alloc_rx_buffers_ps(struct i40e_ring *rxr, u16 cleaned_count); -void i40e_alloc_rx_buffers_1buf(struct i40e_ring *rxr, u16 cleaned_count); +bool i40e_alloc_rx_buffers_ps(struct i40e_ring *rxr, u16 cleaned_count); +bool i40e_alloc_rx_buffers_1buf(struct i40e_ring *rxr, u16 cleaned_count); void i40e_alloc_rx_headers(struct i40e_ring *rxr); netdev_tx_t i40e_lan_xmit_frame(struct sk_buff *skb, struct net_device *netdev); void i40e_clean_tx_ring(struct i40e_ring *tx_ring); @@ -331,13 +339,13 @@ int i40e_napi_poll(struct napi_struct *napi, int budget); void i40e_tx_map(struct i40e_ring *tx_ring, struct sk_buff *skb, struct i40e_tx_buffer *first, u32 tx_flags, const u8 hdr_len, u32 td_cmd, u32 td_offset); -int i40e_maybe_stop_tx(struct i40e_ring *tx_ring, int size); -int i40e_xmit_descriptor_count(struct sk_buff *skb, struct i40e_ring *tx_ring); int i40e_tx_prepare_vlan_flags(struct sk_buff *skb, struct i40e_ring *tx_ring, u32 *flags); #endif void i40e_force_wb(struct i40e_vsi *vsi, struct i40e_q_vector *q_vector); -u32 i40e_get_tx_pending(struct i40e_ring *ring); +u32 i40e_get_tx_pending(struct i40e_ring *ring, bool in_sw); +int __i40e_maybe_stop_tx(struct i40e_ring *tx_ring, int size); +bool __i40e_chk_linearize(struct sk_buff *skb); /** * i40e_get_head - Retrieve head from head writeback @@ -352,4 +360,67 @@ static inline u32 i40e_get_head(struct i40e_ring *tx_ring) return le32_to_cpu(*(volatile __le32 *)head); } + +/** + * i40e_xmit_descriptor_count - calculate number of Tx descriptors needed + * @skb: send buffer + * @tx_ring: ring to send buffer on + * + * Returns number of data descriptors needed for this skb. Returns 0 to indicate + * there is not enough descriptors available in this ring since we need at least + * one descriptor. + **/ +static inline int i40e_xmit_descriptor_count(struct sk_buff *skb) +{ + const struct skb_frag_struct *frag = &skb_shinfo(skb)->frags[0]; + unsigned int nr_frags = skb_shinfo(skb)->nr_frags; + int count = 0, size = skb_headlen(skb); + + for (;;) { + count += TXD_USE_COUNT(size); + + if (!nr_frags--) + break; + + size = skb_frag_size(frag++); + } + + return count; +} + +/** + * i40e_maybe_stop_tx - 1st level check for Tx stop conditions + * @tx_ring: the ring to be checked + * @size: the size buffer we want to assure is available + * + * Returns 0 if stop is not needed + **/ +static inline int i40e_maybe_stop_tx(struct i40e_ring *tx_ring, int size) +{ + if (likely(I40E_DESC_UNUSED(tx_ring) >= size)) + return 0; + return __i40e_maybe_stop_tx(tx_ring, size); +} + +/** + * i40e_chk_linearize - Check if there are more than 8 fragments per packet + * @skb: send buffer + * @count: number of buffers used + * + * Note: Our HW can't scatter-gather more than 8 fragments to build + * a packet on the wire and so we need to figure out the cases where we + * need to linearize the skb. + **/ +static inline bool i40e_chk_linearize(struct sk_buff *skb, int count) +{ + /* Both TSO and single send will work if count is less than 8 */ + if (likely(count < I40E_MAX_BUFFER_TXD)) + return false; + + if (skb_is_gso(skb)) + return __i40e_chk_linearize(skb); + + /* we can support up to 8 data buffers for a single send */ + return count != I40E_MAX_BUFFER_TXD; +} #endif /* _I40E_TXRX_H_ */ diff --git a/drivers/net/ethernet/intel/i40e/i40e_type.h b/drivers/net/ethernet/intel/i40e/i40e_type.h index dd2da356d..3335f9d13 100644 --- a/drivers/net/ethernet/intel/i40e/i40e_type.h +++ b/drivers/net/ethernet/intel/i40e/i40e_type.h @@ -78,7 +78,7 @@ enum i40e_debug_mask { I40E_DEBUG_DCB = 0x00000400, I40E_DEBUG_DIAG = 0x00000800, I40E_DEBUG_FD = 0x00001000, - + I40E_DEBUG_IWARP = 0x00F00000, I40E_DEBUG_AQ_MESSAGE = 0x01000000, I40E_DEBUG_AQ_DESCRIPTOR = 0x02000000, I40E_DEBUG_AQ_DESC_BUFFER = 0x04000000, @@ -90,6 +90,22 @@ enum i40e_debug_mask { I40E_DEBUG_ALL = 0xFFFFFFFF }; +#define I40E_MDIO_STCODE 0 +#define I40E_MDIO_OPCODE_ADDRESS 0 +#define I40E_MDIO_OPCODE_WRITE I40E_MASK(1, \ + I40E_GLGEN_MSCA_OPCODE_SHIFT) +#define I40E_MDIO_OPCODE_READ_INC_ADDR I40E_MASK(2, \ + I40E_GLGEN_MSCA_OPCODE_SHIFT) +#define I40E_MDIO_OPCODE_READ I40E_MASK(3, \ + I40E_GLGEN_MSCA_OPCODE_SHIFT) + +#define I40E_PHY_COM_REG_PAGE 0x1E +#define I40E_PHY_LED_LINK_MODE_MASK 0xF0 +#define I40E_PHY_LED_MANUAL_ON 0x100 +#define I40E_PHY_LED_PROV_REG_1 0xC430 +#define I40E_PHY_LED_MODE_MASK 0xFFFF +#define I40E_PHY_LED_MODE_ORIG 0x80000000 + /* These are structs for managing the hardware information and the operations. * The structures of function pointers are filled out at init time when we * know for sure exactly which hardware we're working with. This gives us the @@ -144,6 +160,7 @@ enum i40e_vsi_type { I40E_VSI_MIRROR = 5, I40E_VSI_SRIOV = 6, I40E_VSI_FDIR = 7, + I40E_VSI_IWARP = 8, I40E_VSI_TYPE_UNKNOWN }; @@ -1098,6 +1115,10 @@ enum i40e_filter_program_desc_pcmd { I40E_TXD_FLTR_QW1_CMD_SHIFT) #define I40E_TXD_FLTR_QW1_ATR_MASK BIT_ULL(I40E_TXD_FLTR_QW1_ATR_SHIFT) +#define I40E_TXD_FLTR_QW1_ATR_SHIFT (0xEULL + \ + I40E_TXD_FLTR_QW1_CMD_SHIFT) +#define I40E_TXD_FLTR_QW1_ATR_MASK BIT_ULL(I40E_TXD_FLTR_QW1_ATR_SHIFT) + #define I40E_TXD_FLTR_QW1_CNTINDEX_SHIFT 20 #define I40E_TXD_FLTR_QW1_CNTINDEX_MASK (0x1FFUL << \ I40E_TXD_FLTR_QW1_CNTINDEX_SHIFT) diff --git a/drivers/net/ethernet/intel/i40e/i40e_virtchnl.h b/drivers/net/ethernet/intel/i40e/i40e_virtchnl.h index 3226946bf..ab866cf3d 100644 --- a/drivers/net/ethernet/intel/i40e/i40e_virtchnl.h +++ b/drivers/net/ethernet/intel/i40e/i40e_virtchnl.h @@ -81,6 +81,9 @@ enum i40e_virtchnl_ops { I40E_VIRTCHNL_OP_GET_STATS = 15, I40E_VIRTCHNL_OP_FCOE = 16, I40E_VIRTCHNL_OP_EVENT = 17, + I40E_VIRTCHNL_OP_IWARP = 20, + I40E_VIRTCHNL_OP_CONFIG_IWARP_IRQ_MAP = 21, + I40E_VIRTCHNL_OP_RELEASE_IWARP_IRQ_MAP = 22, }; /* Virtual channel message descriptor. This overlays the admin queue @@ -348,6 +351,37 @@ struct i40e_virtchnl_pf_event { int severity; }; +/* I40E_VIRTCHNL_OP_CONFIG_IWARP_IRQ_MAP + * VF uses this message to request PF to map IWARP vectors to IWARP queues. + * The request for this originates from the VF IWARP driver through + * a client interface between VF LAN and VF IWARP driver. + * A vector could have an AEQ and CEQ attached to it although + * there is a single AEQ per VF IWARP instance in which case + * most vectors will have an INVALID_IDX for aeq and valid idx for ceq. + * There will never be a case where there will be multiple CEQs attached + * to a single vector. + * PF configures interrupt mapping and returns status. + */ + +/* HW does not define a type value for AEQ; only for RX/TX and CEQ. + * In order for us to keep the interface simple, SW will define a + * unique type value for AEQ. +*/ +#define I40E_QUEUE_TYPE_PE_AEQ 0x80 +#define I40E_QUEUE_INVALID_IDX 0xFFFF + +struct i40e_virtchnl_iwarp_qv_info { + u32 v_idx; /* msix_vector */ + u16 ceq_idx; + u16 aeq_idx; + u8 itr_idx; +}; + +struct i40e_virtchnl_iwarp_qvlist_info { + u32 num_vectors; + struct i40e_virtchnl_iwarp_qv_info qv_info[1]; +}; + /* VF reset states - these are written into the RSTAT register: * I40E_VFGEN_RSTAT1 on the PF * I40E_VFGEN_RSTAT on the VF diff --git a/drivers/net/ethernet/intel/i40e/i40e_virtchnl_pf.c b/drivers/net/ethernet/intel/i40e/i40e_virtchnl_pf.c index 63e62f9ae..816c6bbf7 100644 --- a/drivers/net/ethernet/intel/i40e/i40e_virtchnl_pf.c +++ b/drivers/net/ethernet/intel/i40e/i40e_virtchnl_pf.c @@ -1,7 +1,7 @@ /******************************************************************************* * * Intel Ethernet Controller XL710 Family Linux Driver - * Copyright(c) 2013 - 2015 Intel Corporation. + * Copyright(c) 2013 - 2016 Intel Corporation. * * This program is free software; you can redistribute it and/or modify it * under the terms and conditions of the GNU General Public License, @@ -351,6 +351,136 @@ irq_list_done: i40e_flush(hw); } +/** + * i40e_release_iwarp_qvlist + * @vf: pointer to the VF. + * + **/ +static void i40e_release_iwarp_qvlist(struct i40e_vf *vf) +{ + struct i40e_pf *pf = vf->pf; + struct i40e_virtchnl_iwarp_qvlist_info *qvlist_info = vf->qvlist_info; + u32 msix_vf; + u32 i; + + if (!vf->qvlist_info) + return; + + msix_vf = pf->hw.func_caps.num_msix_vectors_vf; + for (i = 0; i < qvlist_info->num_vectors; i++) { + struct i40e_virtchnl_iwarp_qv_info *qv_info; + u32 next_q_index, next_q_type; + struct i40e_hw *hw = &pf->hw; + u32 v_idx, reg_idx, reg; + + qv_info = &qvlist_info->qv_info[i]; + if (!qv_info) + continue; + v_idx = qv_info->v_idx; + if (qv_info->ceq_idx != I40E_QUEUE_INVALID_IDX) { + /* Figure out the queue after CEQ and make that the + * first queue. + */ + reg_idx = (msix_vf - 1) * vf->vf_id + qv_info->ceq_idx; + reg = rd32(hw, I40E_VPINT_CEQCTL(reg_idx)); + next_q_index = (reg & I40E_VPINT_CEQCTL_NEXTQ_INDX_MASK) + >> I40E_VPINT_CEQCTL_NEXTQ_INDX_SHIFT; + next_q_type = (reg & I40E_VPINT_CEQCTL_NEXTQ_TYPE_MASK) + >> I40E_VPINT_CEQCTL_NEXTQ_TYPE_SHIFT; + + reg_idx = ((msix_vf - 1) * vf->vf_id) + (v_idx - 1); + reg = (next_q_index & + I40E_VPINT_LNKLSTN_FIRSTQ_INDX_MASK) | + (next_q_type << + I40E_VPINT_LNKLSTN_FIRSTQ_TYPE_SHIFT); + + wr32(hw, I40E_VPINT_LNKLSTN(reg_idx), reg); + } + } + kfree(vf->qvlist_info); + vf->qvlist_info = NULL; +} + +/** + * i40e_config_iwarp_qvlist + * @vf: pointer to the VF info + * @qvlist_info: queue and vector list + * + * Return 0 on success or < 0 on error + **/ +static int i40e_config_iwarp_qvlist(struct i40e_vf *vf, + struct i40e_virtchnl_iwarp_qvlist_info *qvlist_info) +{ + struct i40e_pf *pf = vf->pf; + struct i40e_hw *hw = &pf->hw; + struct i40e_virtchnl_iwarp_qv_info *qv_info; + u32 v_idx, i, reg_idx, reg; + u32 next_q_idx, next_q_type; + u32 msix_vf, size; + + size = sizeof(struct i40e_virtchnl_iwarp_qvlist_info) + + (sizeof(struct i40e_virtchnl_iwarp_qv_info) * + (qvlist_info->num_vectors - 1)); + vf->qvlist_info = kzalloc(size, GFP_KERNEL); + vf->qvlist_info->num_vectors = qvlist_info->num_vectors; + + msix_vf = pf->hw.func_caps.num_msix_vectors_vf; + for (i = 0; i < qvlist_info->num_vectors; i++) { + qv_info = &qvlist_info->qv_info[i]; + if (!qv_info) + continue; + v_idx = qv_info->v_idx; + + /* Validate vector id belongs to this vf */ + if (!i40e_vc_isvalid_vector_id(vf, v_idx)) + goto err; + + vf->qvlist_info->qv_info[i] = *qv_info; + + reg_idx = ((msix_vf - 1) * vf->vf_id) + (v_idx - 1); + /* We might be sharing the interrupt, so get the first queue + * index and type, push it down the list by adding the new + * queue on top. Also link it with the new queue in CEQCTL. + */ + reg = rd32(hw, I40E_VPINT_LNKLSTN(reg_idx)); + next_q_idx = ((reg & I40E_VPINT_LNKLSTN_FIRSTQ_INDX_MASK) >> + I40E_VPINT_LNKLSTN_FIRSTQ_INDX_SHIFT); + next_q_type = ((reg & I40E_VPINT_LNKLSTN_FIRSTQ_TYPE_MASK) >> + I40E_VPINT_LNKLSTN_FIRSTQ_TYPE_SHIFT); + + if (qv_info->ceq_idx != I40E_QUEUE_INVALID_IDX) { + reg_idx = (msix_vf - 1) * vf->vf_id + qv_info->ceq_idx; + reg = (I40E_VPINT_CEQCTL_CAUSE_ENA_MASK | + (v_idx << I40E_VPINT_CEQCTL_MSIX_INDX_SHIFT) | + (qv_info->itr_idx << I40E_VPINT_CEQCTL_ITR_INDX_SHIFT) | + (next_q_type << I40E_VPINT_CEQCTL_NEXTQ_TYPE_SHIFT) | + (next_q_idx << I40E_VPINT_CEQCTL_NEXTQ_INDX_SHIFT)); + wr32(hw, I40E_VPINT_CEQCTL(reg_idx), reg); + + reg_idx = ((msix_vf - 1) * vf->vf_id) + (v_idx - 1); + reg = (qv_info->ceq_idx & + I40E_VPINT_LNKLSTN_FIRSTQ_INDX_MASK) | + (I40E_QUEUE_TYPE_PE_CEQ << + I40E_VPINT_LNKLSTN_FIRSTQ_TYPE_SHIFT); + wr32(hw, I40E_VPINT_LNKLSTN(reg_idx), reg); + } + + if (qv_info->aeq_idx != I40E_QUEUE_INVALID_IDX) { + reg = (I40E_VPINT_AEQCTL_CAUSE_ENA_MASK | + (v_idx << I40E_VPINT_AEQCTL_MSIX_INDX_SHIFT) | + (qv_info->itr_idx << I40E_VPINT_AEQCTL_ITR_INDX_SHIFT)); + + wr32(hw, I40E_VPINT_AEQCTL(vf->vf_id), reg); + } + } + + return 0; +err: + kfree(vf->qvlist_info); + vf->qvlist_info = NULL; + return -EINVAL; +} + /** * i40e_config_vsi_tx_queue * @vf: pointer to the VF info @@ -461,7 +591,7 @@ static int i40e_config_vsi_rx_queue(struct i40e_vf *vf, u16 vsi_id, rx_ctx.hbuff = info->hdr_size >> I40E_RXQ_CTX_HBUFF_SHIFT; /* set splitalways mode 10b */ - rx_ctx.dtype = 0x2; + rx_ctx.dtype = I40E_RX_DTYPE_HEADER_SPLIT; } /* databuffer length validation */ @@ -602,8 +732,8 @@ static void i40e_enable_vf_mappings(struct i40e_vf *vf) * that VF queues be mapped using this method, even when they are * contiguous in real life */ - wr32(hw, I40E_VSILAN_QBASE(vf->lan_vsi_id), - I40E_VSILAN_QBASE_VSIQTABLE_ENA_MASK); + i40e_write_rx_ctl(hw, I40E_VSILAN_QBASE(vf->lan_vsi_id), + I40E_VSILAN_QBASE_VSIQTABLE_ENA_MASK); /* enable VF vplan_qtable mappings */ reg = I40E_VPLAN_MAPENA_TXRX_ENA_MASK; @@ -630,7 +760,8 @@ static void i40e_enable_vf_mappings(struct i40e_vf *vf) (j * 2) + 1); reg |= qid << 16; } - wr32(hw, I40E_VSILAN_QTABLE(j, vf->lan_vsi_id), reg); + i40e_write_rx_ctl(hw, I40E_VSILAN_QTABLE(j, vf->lan_vsi_id), + reg); } i40e_flush(hw); @@ -849,9 +980,11 @@ complete_reset: /* reallocate VF resources to reset the VSI state */ i40e_free_vf_res(vf); if (!i40e_alloc_vf_res(vf)) { + int abs_vf_id = vf->vf_id + hw->func_caps.vf_base_id; i40e_enable_vf_mappings(vf); set_bit(I40E_VF_STAT_ACTIVE, &vf->vf_states); clear_bit(I40E_VF_STAT_DISABLED, &vf->vf_states); + i40e_notify_client_of_vf_reset(pf, abs_vf_id); } /* tell the VF the reset is done */ wr32(hw, I40E_VFGEN_RSTAT1(vf->vf_id), I40E_VFR_VFACTIVE); @@ -876,11 +1009,7 @@ void i40e_free_vfs(struct i40e_pf *pf) while (test_and_set_bit(__I40E_VF_DISABLE, &pf->state)) usleep_range(1000, 2000); - for (i = 0; i < pf->num_alloc_vfs; i++) - if (test_bit(I40E_VF_STAT_INIT, &pf->vf[i].vf_states)) - i40e_vsi_control_rings(pf->vsi[pf->vf[i].lan_vsi_idx], - false); - + i40e_notify_client_of_vf_enable(pf, 0); for (i = 0; i < pf->num_alloc_vfs; i++) if (test_bit(I40E_VF_STAT_INIT, &pf->vf[i].vf_states)) i40e_vsi_control_rings(pf->vsi[pf->vf[i].lan_vsi_idx], @@ -952,6 +1081,7 @@ int i40e_alloc_vfs(struct i40e_pf *pf, u16 num_alloc_vfs) goto err_iov; } } + i40e_notify_client_of_vf_enable(pf, num_alloc_vfs); /* allocate memory */ vfs = kcalloc(num_alloc_vfs, sizeof(struct i40e_vf), GFP_KERNEL); if (!vfs) { @@ -980,7 +1110,7 @@ err_alloc: i40e_free_vfs(pf); err_iov: /* Re-enable interrupt 0. */ - i40e_irq_dynamic_enable_icr0(pf); + i40e_irq_dynamic_enable_icr0(pf, false); return ret; } @@ -1205,6 +1335,13 @@ static int i40e_vc_get_vf_resources_msg(struct i40e_vf *vf, u8 *msg) vsi = pf->vsi[vf->lan_vsi_idx]; if (!vsi->info.pvid) vfres->vf_offload_flags |= I40E_VIRTCHNL_VF_OFFLOAD_VLAN; + + if (i40e_vf_client_capable(pf, vf->vf_id, I40E_CLIENT_IWARP) && + (vf->driver_caps & I40E_VIRTCHNL_VF_OFFLOAD_IWARP)) { + vfres->vf_offload_flags |= I40E_VIRTCHNL_VF_OFFLOAD_IWARP; + set_bit(I40E_VF_STAT_IWARPENA, &vf->vf_states); + } + if (pf->flags & I40E_FLAG_RSS_AQ_CAPABLE) { if (vf->driver_caps & I40E_VIRTCHNL_VF_OFFLOAD_RSS_AQ) vfres->vf_offload_flags |= @@ -1213,9 +1350,21 @@ static int i40e_vc_get_vf_resources_msg(struct i40e_vf *vf, u8 *msg) vfres->vf_offload_flags |= I40E_VIRTCHNL_VF_OFFLOAD_RSS_REG; } + if (pf->flags & I40E_FLAG_MULTIPLE_TCP_UDP_RSS_PCTYPE) { + if (vf->driver_caps & I40E_VIRTCHNL_VF_OFFLOAD_RSS_PCTYPE_V2) + vfres->vf_offload_flags |= + I40E_VIRTCHNL_VF_OFFLOAD_RSS_PCTYPE_V2; + } + if (vf->driver_caps & I40E_VIRTCHNL_VF_OFFLOAD_RX_POLLING) vfres->vf_offload_flags |= I40E_VIRTCHNL_VF_OFFLOAD_RX_POLLING; + if (pf->flags & I40E_FLAG_WB_ON_ITR_CAPABLE) { + if (vf->driver_caps & I40E_VIRTCHNL_VF_OFFLOAD_WB_ON_ITR) + vfres->vf_offload_flags |= + I40E_VIRTCHNL_VF_OFFLOAD_WB_ON_ITR; + } + vfres->num_vsis = num_vsis; vfres->num_queue_pairs = vf->num_queue_pairs; vfres->max_vectors = pf->hw.func_caps.num_msix_vectors_vf; @@ -1813,6 +1962,72 @@ error_param: return i40e_vc_send_resp_to_vf(vf, I40E_VIRTCHNL_OP_DEL_VLAN, aq_ret); } +/** + * i40e_vc_iwarp_msg + * @vf: pointer to the VF info + * @msg: pointer to the msg buffer + * @msglen: msg length + * + * called from the VF for the iwarp msgs + **/ +static int i40e_vc_iwarp_msg(struct i40e_vf *vf, u8 *msg, u16 msglen) +{ + struct i40e_pf *pf = vf->pf; + int abs_vf_id = vf->vf_id + pf->hw.func_caps.vf_base_id; + i40e_status aq_ret = 0; + + if (!test_bit(I40E_VF_STAT_ACTIVE, &vf->vf_states) || + !test_bit(I40E_VF_STAT_IWARPENA, &vf->vf_states)) { + aq_ret = I40E_ERR_PARAM; + goto error_param; + } + + i40e_notify_client_of_vf_msg(pf->vsi[pf->lan_vsi], abs_vf_id, + msg, msglen); + +error_param: + /* send the response to the VF */ + return i40e_vc_send_resp_to_vf(vf, I40E_VIRTCHNL_OP_IWARP, + aq_ret); +} + +/** + * i40e_vc_iwarp_qvmap_msg + * @vf: pointer to the VF info + * @msg: pointer to the msg buffer + * @msglen: msg length + * @config: config qvmap or release it + * + * called from the VF for the iwarp msgs + **/ +static int i40e_vc_iwarp_qvmap_msg(struct i40e_vf *vf, u8 *msg, u16 msglen, + bool config) +{ + struct i40e_virtchnl_iwarp_qvlist_info *qvlist_info = + (struct i40e_virtchnl_iwarp_qvlist_info *)msg; + i40e_status aq_ret = 0; + + if (!test_bit(I40E_VF_STAT_ACTIVE, &vf->vf_states) || + !test_bit(I40E_VF_STAT_IWARPENA, &vf->vf_states)) { + aq_ret = I40E_ERR_PARAM; + goto error_param; + } + + if (config) { + if (i40e_config_iwarp_qvlist(vf, qvlist_info)) + aq_ret = I40E_ERR_PARAM; + } else { + i40e_release_iwarp_qvlist(vf); + } + +error_param: + /* send the response to the VF */ + return i40e_vc_send_resp_to_vf(vf, + config ? I40E_VIRTCHNL_OP_RELEASE_IWARP_IRQ_MAP : + I40E_VIRTCHNL_OP_CONFIG_IWARP_IRQ_MAP, + aq_ret); +} + /** * i40e_vc_validate_vf_msg * @vf: pointer to the VF info @@ -1908,6 +2123,32 @@ static int i40e_vc_validate_vf_msg(struct i40e_vf *vf, u32 v_opcode, case I40E_VIRTCHNL_OP_GET_STATS: valid_len = sizeof(struct i40e_virtchnl_queue_select); break; + case I40E_VIRTCHNL_OP_IWARP: + /* These messages are opaque to us and will be validated in + * the RDMA client code. We just need to check for nonzero + * length. The firmware will enforce max length restrictions. + */ + if (msglen) + valid_len = msglen; + else + err_msg_format = true; + break; + case I40E_VIRTCHNL_OP_RELEASE_IWARP_IRQ_MAP: + valid_len = 0; + break; + case I40E_VIRTCHNL_OP_CONFIG_IWARP_IRQ_MAP: + valid_len = sizeof(struct i40e_virtchnl_iwarp_qvlist_info); + if (msglen >= valid_len) { + struct i40e_virtchnl_iwarp_qvlist_info *qv = + (struct i40e_virtchnl_iwarp_qvlist_info *)msg; + if (qv->num_vectors == 0) { + err_msg_format = true; + break; + } + valid_len += ((qv->num_vectors - 1) * + sizeof(struct i40e_virtchnl_iwarp_qv_info)); + } + break; /* These are always errors coming from the VF. */ case I40E_VIRTCHNL_OP_EVENT: case I40E_VIRTCHNL_OP_UNKNOWN: @@ -1997,6 +2238,15 @@ int i40e_vc_process_vf_msg(struct i40e_pf *pf, u16 vf_id, u32 v_opcode, case I40E_VIRTCHNL_OP_GET_STATS: ret = i40e_vc_get_stats_msg(vf, msg, msglen); break; + case I40E_VIRTCHNL_OP_IWARP: + ret = i40e_vc_iwarp_msg(vf, msg, msglen); + break; + case I40E_VIRTCHNL_OP_CONFIG_IWARP_IRQ_MAP: + ret = i40e_vc_iwarp_qvmap_msg(vf, msg, msglen, true); + break; + case I40E_VIRTCHNL_OP_RELEASE_IWARP_IRQ_MAP: + ret = i40e_vc_iwarp_qvmap_msg(vf, msg, msglen, false); + break; case I40E_VIRTCHNL_OP_UNKNOWN: default: dev_err(&pf->pdev->dev, "Unsupported opcode %d from VF %d\n", @@ -2025,7 +2275,11 @@ int i40e_vc_process_vflr_event(struct i40e_pf *pf) if (!test_bit(__I40E_VFLR_EVENT_PENDING, &pf->state)) return 0; - /* re-enable vflr interrupt cause */ + /* Re-enable the VFLR interrupt cause here, before looking for which + * VF got reset. Otherwise, if another VF gets a reset while the + * first one is being processed, that interrupt will be lost, and + * that VF will be stuck in reset forever. + */ reg = rd32(hw, I40E_PFINT_ICR0_ENA); reg |= I40E_PFINT_ICR0_ENA_VFLR_MASK; wr32(hw, I40E_PFINT_ICR0_ENA, reg); @@ -2186,6 +2440,8 @@ int i40e_ndo_set_vf_port_vlan(struct net_device *netdev, * and then reloading the VF driver. */ i40e_vc_disable_vf(pf, vf); + /* During reset the VF got a new VSI, so refresh the pointer. */ + vsi = pf->vsi[vf->lan_vsi_idx]; } /* Check for condition where there was already a port VLAN ID @@ -2294,6 +2550,9 @@ int i40e_ndo_set_vf_bw(struct net_device *netdev, int vf_id, int min_tx_rate, case I40E_LINK_SPEED_40GB: speed = 40000; break; + case I40E_LINK_SPEED_20GB: + speed = 20000; + break; case I40E_LINK_SPEED_10GB: speed = 10000; break; diff --git a/drivers/net/ethernet/intel/i40e/i40e_virtchnl_pf.h b/drivers/net/ethernet/intel/i40e/i40e_virtchnl_pf.h index da44995de..e7b2fba03 100644 --- a/drivers/net/ethernet/intel/i40e/i40e_virtchnl_pf.h +++ b/drivers/net/ethernet/intel/i40e/i40e_virtchnl_pf.h @@ -58,6 +58,7 @@ enum i40e_queue_ctrl { enum i40e_vf_states { I40E_VF_STAT_INIT = 0, I40E_VF_STAT_ACTIVE, + I40E_VF_STAT_IWARPENA, I40E_VF_STAT_FCOEENA, I40E_VF_STAT_DISABLED, }; @@ -66,6 +67,7 @@ enum i40e_vf_states { enum i40e_vf_capabilities { I40E_VIRTCHNL_VF_CAP_PRIVILEGE = 0, I40E_VIRTCHNL_VF_CAP_L2, + I40E_VIRTCHNL_VF_CAP_IWARP, }; /* VF information structure */ @@ -91,8 +93,8 @@ struct i40e_vf { * When assigned, these will be non-zero, because VSI 0 is always * the main LAN VSI for the PF. */ - u8 lan_vsi_idx; /* index into PF struct */ - u8 lan_vsi_id; /* ID as used by firmware */ + u16 lan_vsi_idx; /* index into PF struct */ + u16 lan_vsi_id; /* ID as used by firmware */ u8 num_queue_pairs; /* num of qps assigned to VF vsis */ u64 num_mdd_events; /* num of mdd events detected */ @@ -106,6 +108,8 @@ struct i40e_vf { bool link_forced; bool link_up; /* only valid if VF link is forced */ bool spoofchk; + /* RDMA Client */ + struct i40e_virtchnl_iwarp_qvlist_info *qvlist_info; }; void i40e_free_vfs(struct i40e_pf *pf); diff --git a/drivers/net/ethernet/intel/i40evf/i40e_adminq.c b/drivers/net/ethernet/intel/i40evf/i40e_adminq.c index 3f65e39b3..44f7ed758 100644 --- a/drivers/net/ethernet/intel/i40evf/i40e_adminq.c +++ b/drivers/net/ethernet/intel/i40evf/i40e_adminq.c @@ -1,7 +1,7 @@ /******************************************************************************* * * Intel Ethernet Controller XL710 Family Linux Virtual Function Driver - * Copyright(c) 2013 - 2014 Intel Corporation. + * Copyright(c) 2013 - 2016 Intel Corporation. * * This program is free software; you can redistribute it and/or modify it * under the terms and conditions of the GNU General Public License, @@ -887,6 +887,9 @@ i40e_status i40evf_clean_arq_element(struct i40e_hw *hw, u16 flags; u16 ntu; + /* pre-clean the event info */ + memset(&e->desc, 0, sizeof(e->desc)); + /* take the lock before we start messing with the ring */ mutex_lock(&hw->aq.arq_mutex); diff --git a/drivers/net/ethernet/intel/i40evf/i40e_adminq_cmd.h b/drivers/net/ethernet/intel/i40evf/i40e_adminq_cmd.h index f5b2b369d..aad8d6277 100644 --- a/drivers/net/ethernet/intel/i40evf/i40e_adminq_cmd.h +++ b/drivers/net/ethernet/intel/i40evf/i40e_adminq_cmd.h @@ -1,7 +1,7 @@ /******************************************************************************* * * Intel Ethernet Controller XL710 Family Linux Virtual Function Driver - * Copyright(c) 2013 - 2014 Intel Corporation. + * Copyright(c) 2013 - 2016 Intel Corporation. * * This program is free software; you can redistribute it and/or modify it * under the terms and conditions of the GNU General Public License, @@ -34,7 +34,7 @@ */ #define I40E_FW_API_VERSION_MAJOR 0x0001 -#define I40E_FW_API_VERSION_MINOR 0x0004 +#define I40E_FW_API_VERSION_MINOR 0x0005 struct i40e_aq_desc { __le16 flags; @@ -145,6 +145,9 @@ enum i40e_admin_queue_opc { i40e_aqc_opc_remove_statistics = 0x0202, i40e_aqc_opc_set_port_parameters = 0x0203, i40e_aqc_opc_get_switch_resource_alloc = 0x0204, + i40e_aqc_opc_set_switch_config = 0x0205, + i40e_aqc_opc_rx_ctl_reg_read = 0x0206, + i40e_aqc_opc_rx_ctl_reg_write = 0x0207, i40e_aqc_opc_add_vsi = 0x0210, i40e_aqc_opc_update_vsi_parameters = 0x0211, @@ -220,6 +223,7 @@ enum i40e_admin_queue_opc { i40e_aqc_opc_get_phy_wol_caps = 0x0621, i40e_aqc_opc_set_phy_debug = 0x0622, i40e_aqc_opc_upload_ext_phy_fm = 0x0625, + i40e_aqc_opc_run_phy_activity = 0x0626, /* NVM commands */ i40e_aqc_opc_nvm_read = 0x0701, @@ -228,6 +232,7 @@ enum i40e_admin_queue_opc { i40e_aqc_opc_nvm_config_read = 0x0704, i40e_aqc_opc_nvm_config_write = 0x0705, i40e_aqc_opc_oem_post_update = 0x0720, + i40e_aqc_opc_thermal_sensor = 0x0721, /* virtualization commands */ i40e_aqc_opc_send_msg_to_pf = 0x0801, @@ -399,6 +404,7 @@ struct i40e_aqc_list_capabilities_element_resp { #define I40E_AQ_CAP_ID_OS2BMC_CAP 0x0004 #define I40E_AQ_CAP_ID_FUNCTIONS_VALID 0x0005 #define I40E_AQ_CAP_ID_ALTERNATE_RAM 0x0006 +#define I40E_AQ_CAP_ID_WOL_AND_PROXY 0x0008 #define I40E_AQ_CAP_ID_SRIOV 0x0012 #define I40E_AQ_CAP_ID_VF 0x0013 #define I40E_AQ_CAP_ID_VMDQ 0x0014 @@ -419,6 +425,7 @@ struct i40e_aqc_list_capabilities_element_resp { #define I40E_AQ_CAP_ID_LED 0x0061 #define I40E_AQ_CAP_ID_SDP 0x0062 #define I40E_AQ_CAP_ID_MDIO 0x0063 +#define I40E_AQ_CAP_ID_WSR_PROT 0x0064 #define I40E_AQ_CAP_ID_FLEX10 0x00F1 #define I40E_AQ_CAP_ID_CEM 0x00F2 @@ -677,6 +684,31 @@ struct i40e_aqc_switch_resource_alloc_element_resp { I40E_CHECK_STRUCT_LEN(0x10, i40e_aqc_switch_resource_alloc_element_resp); +/* Set Switch Configuration (direct 0x0205) */ +struct i40e_aqc_set_switch_config { + __le16 flags; +#define I40E_AQ_SET_SWITCH_CFG_PROMISC 0x0001 +#define I40E_AQ_SET_SWITCH_CFG_L2_FILTER 0x0002 + __le16 valid_flags; + u8 reserved[12]; +}; + +I40E_CHECK_CMD_LENGTH(i40e_aqc_set_switch_config); + +/* Read Receive control registers (direct 0x0206) + * Write Receive control registers (direct 0x0207) + * used for accessing Rx control registers that can be + * slow and need special handling when under high Rx load + */ +struct i40e_aqc_rx_ctl_reg_read_write { + __le32 reserved1; + __le32 address; + __le32 reserved2; + __le32 value; +}; + +I40E_CHECK_CMD_LENGTH(i40e_aqc_rx_ctl_reg_read_write); + /* Add VSI (indirect 0x0210) * this indirect command uses struct i40e_aqc_vsi_properties_data * as the indirect buffer (128 bytes) @@ -903,7 +935,8 @@ struct i40e_aqc_add_veb { I40E_AQC_ADD_VEB_PORT_TYPE_SHIFT) #define I40E_AQC_ADD_VEB_PORT_TYPE_DEFAULT 0x2 #define I40E_AQC_ADD_VEB_PORT_TYPE_DATA 0x4 -#define I40E_AQC_ADD_VEB_ENABLE_L2_FILTER 0x8 +#define I40E_AQC_ADD_VEB_ENABLE_L2_FILTER 0x8 /* deprecated */ +#define I40E_AQC_ADD_VEB_ENABLE_DISABLE_STATS 0x10 u8 enable_tcs; u8 reserved[9]; }; @@ -970,6 +1003,7 @@ struct i40e_aqc_add_macvlan_element_data { #define I40E_AQC_MACVLAN_ADD_HASH_MATCH 0x0002 #define I40E_AQC_MACVLAN_ADD_IGNORE_VLAN 0x0004 #define I40E_AQC_MACVLAN_ADD_TO_QUEUE 0x0008 +#define I40E_AQC_MACVLAN_ADD_USE_SHARED_MAC 0x0010 __le16 queue_number; #define I40E_AQC_MACVLAN_CMD_QUEUE_SHIFT 0 #define I40E_AQC_MACVLAN_CMD_QUEUE_MASK (0x7FF << \ @@ -1066,6 +1100,7 @@ struct i40e_aqc_set_vsi_promiscuous_modes { #define I40E_AQC_SET_VSI_PROMISC_BROADCAST 0x04 #define I40E_AQC_SET_VSI_DEFAULT 0x08 #define I40E_AQC_SET_VSI_PROMISC_VLAN 0x10 +#define I40E_AQC_SET_VSI_PROMISC_TX 0x8000 __le16 seid; #define I40E_AQC_VSI_PROM_CMD_SEID_MASK 0x3FF __le16 vlan_tag; @@ -1254,10 +1289,16 @@ struct i40e_aqc_add_remove_cloud_filters_element_data { #define I40E_AQC_ADD_CLOUD_TNL_TYPE_SHIFT 9 #define I40E_AQC_ADD_CLOUD_TNL_TYPE_MASK 0x1E00 -#define I40E_AQC_ADD_CLOUD_TNL_TYPE_XVLAN 0 +#define I40E_AQC_ADD_CLOUD_TNL_TYPE_VXLAN 0 #define I40E_AQC_ADD_CLOUD_TNL_TYPE_NVGRE_OMAC 1 -#define I40E_AQC_ADD_CLOUD_TNL_TYPE_NGE 2 +#define I40E_AQC_ADD_CLOUD_TNL_TYPE_GENEVE 2 #define I40E_AQC_ADD_CLOUD_TNL_TYPE_IP 3 +#define I40E_AQC_ADD_CLOUD_TNL_TYPE_RESERVED 4 +#define I40E_AQC_ADD_CLOUD_TNL_TYPE_VXLAN_GPE 5 + +#define I40E_AQC_ADD_CLOUD_FLAGS_SHARED_OUTER_MAC 0x2000 +#define I40E_AQC_ADD_CLOUD_FLAGS_SHARED_INNER_MAC 0x4000 +#define I40E_AQC_ADD_CLOUD_FLAGS_SHARED_OUTER_IP 0x8000 __le32 tenant_id; u8 reserved[4]; @@ -1752,7 +1793,12 @@ struct i40e_aqc_get_link_status { u8 config; #define I40E_AQ_CONFIG_CRC_ENA 0x04 #define I40E_AQ_CONFIG_PACING_MASK 0x78 - u8 reserved[5]; + u8 external_power_ability; +#define I40E_AQ_LINK_POWER_CLASS_1 0x00 +#define I40E_AQ_LINK_POWER_CLASS_2 0x01 +#define I40E_AQ_LINK_POWER_CLASS_3 0x02 +#define I40E_AQ_LINK_POWER_CLASS_4 0x03 + u8 reserved[4]; }; I40E_CHECK_CMD_LENGTH(i40e_aqc_get_link_status); @@ -1820,6 +1866,18 @@ enum i40e_aq_phy_reg_type { I40E_AQC_PHY_REG_EXERNAL_MODULE = 0x3 }; +/* Run PHY Activity (0x0626) */ +struct i40e_aqc_run_phy_activity { + __le16 activity_id; + u8 flags; + u8 reserved1; + __le32 control; + __le32 data; + u8 reserved2[4]; +}; + +I40E_CHECK_CMD_LENGTH(i40e_aqc_run_phy_activity); + /* NVM Read command (indirect 0x0701) * NVM Erase commands (direct 0x0702) * NVM Update commands (indirect 0x0703) @@ -1909,6 +1967,22 @@ struct i40e_aqc_nvm_oem_post_update_buffer { I40E_CHECK_STRUCT_LEN(0x28, i40e_aqc_nvm_oem_post_update_buffer); +/* Thermal Sensor (indirect 0x0721) + * read or set thermal sensor configs and values + * takes a sensor and command specific data buffer, not detailed here + */ +struct i40e_aqc_thermal_sensor { + u8 sensor_action; +#define I40E_AQ_THERMAL_SENSOR_READ_CONFIG 0 +#define I40E_AQ_THERMAL_SENSOR_SET_CONFIG 1 +#define I40E_AQ_THERMAL_SENSOR_READ_TEMP 2 + u8 reserved[7]; + __le32 addr_high; + __le32 addr_low; +}; + +I40E_CHECK_CMD_LENGTH(i40e_aqc_thermal_sensor); + /* Send to PF command (indirect 0x0801) id is only used by PF * Send to VF command (indirect 0x0802) id is only used by PF * Send to Peer PF command (indirect 0x0803) @@ -2083,6 +2157,7 @@ struct i40e_aqc_add_udp_tunnel { #define I40E_AQC_TUNNEL_TYPE_VXLAN 0x00 #define I40E_AQC_TUNNEL_TYPE_NGE 0x01 #define I40E_AQC_TUNNEL_TYPE_TEREDO 0x10 +#define I40E_AQC_TUNNEL_TYPE_VXLAN_GPE 0x11 u8 reserved1[10]; }; diff --git a/drivers/net/ethernet/intel/i40evf/i40e_common.c b/drivers/net/ethernet/intel/i40evf/i40e_common.c index 938783e0b..771ac6ad8 100644 --- a/drivers/net/ethernet/intel/i40evf/i40e_common.c +++ b/drivers/net/ethernet/intel/i40evf/i40e_common.c @@ -903,6 +903,131 @@ struct i40e_rx_ptype_decoded i40evf_ptype_lookup[] = { I40E_PTT_UNUSED_ENTRY(255) }; +/** + * i40evf_aq_rx_ctl_read_register - use FW to read from an Rx control register + * @hw: pointer to the hw struct + * @reg_addr: register address + * @reg_val: ptr to register value + * @cmd_details: pointer to command details structure or NULL + * + * Use the firmware to read the Rx control register, + * especially useful if the Rx unit is under heavy pressure + **/ +i40e_status i40evf_aq_rx_ctl_read_register(struct i40e_hw *hw, + u32 reg_addr, u32 *reg_val, + struct i40e_asq_cmd_details *cmd_details) +{ + struct i40e_aq_desc desc; + struct i40e_aqc_rx_ctl_reg_read_write *cmd_resp = + (struct i40e_aqc_rx_ctl_reg_read_write *)&desc.params.raw; + i40e_status status; + + if (!reg_val) + return I40E_ERR_PARAM; + + i40evf_fill_default_direct_cmd_desc(&desc, + i40e_aqc_opc_rx_ctl_reg_read); + + cmd_resp->address = cpu_to_le32(reg_addr); + + status = i40evf_asq_send_command(hw, &desc, NULL, 0, cmd_details); + + if (status == 0) + *reg_val = le32_to_cpu(cmd_resp->value); + + return status; +} + +/** + * i40evf_read_rx_ctl - read from an Rx control register + * @hw: pointer to the hw struct + * @reg_addr: register address + **/ +u32 i40evf_read_rx_ctl(struct i40e_hw *hw, u32 reg_addr) +{ + i40e_status status = 0; + bool use_register; + int retry = 5; + u32 val = 0; + + use_register = (hw->aq.api_maj_ver == 1) && (hw->aq.api_min_ver < 5); + if (!use_register) { +do_retry: + status = i40evf_aq_rx_ctl_read_register(hw, reg_addr, + &val, NULL); + if (hw->aq.asq_last_status == I40E_AQ_RC_EAGAIN && retry) { + usleep_range(1000, 2000); + retry--; + goto do_retry; + } + } + + /* if the AQ access failed, try the old-fashioned way */ + if (status || use_register) + val = rd32(hw, reg_addr); + + return val; +} + +/** + * i40evf_aq_rx_ctl_write_register + * @hw: pointer to the hw struct + * @reg_addr: register address + * @reg_val: register value + * @cmd_details: pointer to command details structure or NULL + * + * Use the firmware to write to an Rx control register, + * especially useful if the Rx unit is under heavy pressure + **/ +i40e_status i40evf_aq_rx_ctl_write_register(struct i40e_hw *hw, + u32 reg_addr, u32 reg_val, + struct i40e_asq_cmd_details *cmd_details) +{ + struct i40e_aq_desc desc; + struct i40e_aqc_rx_ctl_reg_read_write *cmd = + (struct i40e_aqc_rx_ctl_reg_read_write *)&desc.params.raw; + i40e_status status; + + i40evf_fill_default_direct_cmd_desc(&desc, + i40e_aqc_opc_rx_ctl_reg_write); + + cmd->address = cpu_to_le32(reg_addr); + cmd->value = cpu_to_le32(reg_val); + + status = i40evf_asq_send_command(hw, &desc, NULL, 0, cmd_details); + + return status; +} + +/** + * i40evf_write_rx_ctl - write to an Rx control register + * @hw: pointer to the hw struct + * @reg_addr: register address + * @reg_val: register value + **/ +void i40evf_write_rx_ctl(struct i40e_hw *hw, u32 reg_addr, u32 reg_val) +{ + i40e_status status = 0; + bool use_register; + int retry = 5; + + use_register = (hw->aq.api_maj_ver == 1) && (hw->aq.api_min_ver < 5); + if (!use_register) { +do_retry: + status = i40evf_aq_rx_ctl_write_register(hw, reg_addr, + reg_val, NULL); + if (hw->aq.asq_last_status == I40E_AQ_RC_EAGAIN && retry) { + usleep_range(1000, 2000); + retry--; + goto do_retry; + } + } + + /* if the AQ access failed, try the old-fashioned way */ + if (status || use_register) + wr32(hw, reg_addr, reg_val); +} + /** * i40e_aq_send_msg_to_pf * @hw: pointer to the hardware structure diff --git a/drivers/net/ethernet/intel/i40evf/i40e_prototype.h b/drivers/net/ethernet/intel/i40evf/i40e_prototype.h index cbd9a1b07..d89d52109 100644 --- a/drivers/net/ethernet/intel/i40evf/i40e_prototype.h +++ b/drivers/net/ethernet/intel/i40evf/i40e_prototype.h @@ -103,4 +103,19 @@ i40e_status i40e_aq_add_rem_control_packet_filter(struct i40e_hw *hw, struct i40e_asq_cmd_details *cmd_details); void i40e_add_filter_to_drop_tx_flow_control_frames(struct i40e_hw *hw, u16 vsi_seid); +i40e_status i40evf_aq_rx_ctl_read_register(struct i40e_hw *hw, + u32 reg_addr, u32 *reg_val, + struct i40e_asq_cmd_details *cmd_details); +u32 i40evf_read_rx_ctl(struct i40e_hw *hw, u32 reg_addr); +i40e_status i40evf_aq_rx_ctl_write_register(struct i40e_hw *hw, + u32 reg_addr, u32 reg_val, + struct i40e_asq_cmd_details *cmd_details); +void i40evf_write_rx_ctl(struct i40e_hw *hw, u32 reg_addr, u32 reg_val); +i40e_status i40e_read_phy_register(struct i40e_hw *hw, u8 page, + u16 reg, u8 phy_addr, u16 *value); +i40e_status i40e_write_phy_register(struct i40e_hw *hw, u8 page, + u16 reg, u8 phy_addr, u16 value); +u8 i40e_get_phy_address(struct i40e_hw *hw, u8 dev_num); +i40e_status i40e_blink_phy_link_led(struct i40e_hw *hw, + u32 time, u32 interval); #endif /* _I40E_PROTOTYPE_H_ */ diff --git a/drivers/net/ethernet/intel/i40evf/i40e_txrx.c b/drivers/net/ethernet/intel/i40evf/i40e_txrx.c index 7a00657da..cea97daa8 100644 --- a/drivers/net/ethernet/intel/i40evf/i40e_txrx.c +++ b/drivers/net/ethernet/intel/i40evf/i40e_txrx.c @@ -1,7 +1,7 @@ /******************************************************************************* * * Intel Ethernet Controller XL710 Family Linux Virtual Function Driver - * Copyright(c) 2013 - 2014 Intel Corporation. + * Copyright(c) 2013 - 2016 Intel Corporation. * * This program is free software; you can redistribute it and/or modify it * under the terms and conditions of the GNU General Public License, @@ -129,15 +129,19 @@ void i40evf_free_tx_resources(struct i40e_ring *tx_ring) /** * i40evf_get_tx_pending - how many Tx descriptors not processed * @tx_ring: the ring of descriptors + * @in_sw: is tx_pending being checked in SW or HW * * Since there is no access to the ring head register * in XL710, we need to use our local copies **/ -u32 i40evf_get_tx_pending(struct i40e_ring *ring) +u32 i40evf_get_tx_pending(struct i40e_ring *ring, bool in_sw) { u32 head, tail; - head = i40e_get_head(ring); + if (!in_sw) + head = i40e_get_head(ring); + else + head = ring->next_to_clean; tail = readl(ring->tail); if (head != tail) @@ -252,6 +256,22 @@ static bool i40e_clean_tx_irq(struct i40e_ring *tx_ring, int budget) tx_ring->q_vector->tx.total_bytes += total_bytes; tx_ring->q_vector->tx.total_packets += total_packets; + if (tx_ring->flags & I40E_TXR_FLAGS_WB_ON_ITR) { + unsigned int j = 0; + /* check to see if there are < 4 descriptors + * waiting to be written back, then kick the hardware to force + * them to be written back in case we stay in NAPI. + * In this mode on X722 we do not enable Interrupt. + */ + j = i40evf_get_tx_pending(tx_ring, false); + + if (budget && + ((j / (WB_STRIDE + 1)) == 0) && (j > 0) && + !test_bit(__I40E_DOWN, &tx_ring->vsi->state) && + (I40E_DESC_UNUSED(tx_ring) != tx_ring->count)) + tx_ring->arm_wb = true; + } + netdev_tx_completed_queue(netdev_get_tx_queue(tx_ring->netdev, tx_ring->queue_index), total_packets, total_bytes); @@ -276,39 +296,49 @@ static bool i40e_clean_tx_irq(struct i40e_ring *tx_ring, int budget) } /** - * i40evf_force_wb -Arm hardware to do a wb on noncache aligned descriptors + * i40evf_enable_wb_on_itr - Arm hardware to do a wb, interrupts are not enabled * @vsi: the VSI we care about - * @q_vector: the vector on which to force writeback + * @q_vector: the vector on which to enable writeback * **/ -static void i40evf_force_wb(struct i40e_vsi *vsi, struct i40e_q_vector *q_vector) +static void i40e_enable_wb_on_itr(struct i40e_vsi *vsi, + struct i40e_q_vector *q_vector) { u16 flags = q_vector->tx.ring[0].flags; + u32 val; - if (flags & I40E_TXR_FLAGS_WB_ON_ITR) { - u32 val; + if (!(flags & I40E_TXR_FLAGS_WB_ON_ITR)) + return; - if (q_vector->arm_wb_state) - return; + if (q_vector->arm_wb_state) + return; - val = I40E_VFINT_DYN_CTLN1_WB_ON_ITR_MASK; + val = I40E_VFINT_DYN_CTLN1_WB_ON_ITR_MASK | + I40E_VFINT_DYN_CTLN1_ITR_INDX_MASK; /* set noitr */ - wr32(&vsi->back->hw, - I40E_VFINT_DYN_CTLN1(q_vector->v_idx + - vsi->base_vector - 1), - val); - q_vector->arm_wb_state = true; - } else { - u32 val = I40E_VFINT_DYN_CTLN1_INTENA_MASK | - I40E_VFINT_DYN_CTLN1_ITR_INDX_MASK | /* set noitr */ - I40E_VFINT_DYN_CTLN1_SWINT_TRIG_MASK | - I40E_VFINT_DYN_CTLN1_SW_ITR_INDX_ENA_MASK; - /* allow 00 to be written to the index */ - - wr32(&vsi->back->hw, - I40E_VFINT_DYN_CTLN1(q_vector->v_idx + - vsi->base_vector - 1), val); - } + wr32(&vsi->back->hw, + I40E_VFINT_DYN_CTLN1(q_vector->v_idx + + vsi->base_vector - 1), val); + q_vector->arm_wb_state = true; +} + +/** + * i40evf_force_wb - Issue SW Interrupt so HW does a wb + * @vsi: the VSI we care about + * @q_vector: the vector on which to force writeback + * + **/ +void i40evf_force_wb(struct i40e_vsi *vsi, struct i40e_q_vector *q_vector) +{ + u32 val = I40E_VFINT_DYN_CTLN1_INTENA_MASK | + I40E_VFINT_DYN_CTLN1_ITR_INDX_MASK | /* set noitr */ + I40E_VFINT_DYN_CTLN1_SWINT_TRIG_MASK | + I40E_VFINT_DYN_CTLN1_SW_ITR_INDX_ENA_MASK + /* allow 00 to be written to the index */; + + wr32(&vsi->back->hw, + I40E_VFINT_DYN_CTLN1(q_vector->v_idx + vsi->base_vector - 1), + val); } /** @@ -506,7 +536,7 @@ void i40evf_clean_rx_ring(struct i40e_ring *rx_ring) if (rx_bi->page_dma) { dma_unmap_page(dev, rx_bi->page_dma, - PAGE_SIZE / 2, + PAGE_SIZE, DMA_FROM_DEVICE); rx_bi->page_dma = 0; } @@ -641,16 +671,19 @@ static inline void i40e_release_rx_desc(struct i40e_ring *rx_ring, u32 val) * i40evf_alloc_rx_buffers_ps - Replace used receive buffers; packet split * @rx_ring: ring to place buffers on * @cleaned_count: number of buffers to replace + * + * Returns true if any errors on allocation **/ -void i40evf_alloc_rx_buffers_ps(struct i40e_ring *rx_ring, u16 cleaned_count) +bool i40evf_alloc_rx_buffers_ps(struct i40e_ring *rx_ring, u16 cleaned_count) { u16 i = rx_ring->next_to_use; union i40e_rx_desc *rx_desc; struct i40e_rx_buffer *bi; + const int current_node = numa_node_id(); /* do nothing if no valid netdev defined */ if (!rx_ring->netdev || !cleaned_count) - return; + return false; while (cleaned_count--) { rx_desc = I40E_RX_DESC(rx_ring, i); @@ -658,56 +691,79 @@ void i40evf_alloc_rx_buffers_ps(struct i40e_ring *rx_ring, u16 cleaned_count) if (bi->skb) /* desc is in use */ goto no_buffers; + + /* If we've been moved to a different NUMA node, release the + * page so we can get a new one on the current node. + */ + if (bi->page && page_to_nid(bi->page) != current_node) { + dma_unmap_page(rx_ring->dev, + bi->page_dma, + PAGE_SIZE, + DMA_FROM_DEVICE); + __free_page(bi->page); + bi->page = NULL; + bi->page_dma = 0; + rx_ring->rx_stats.realloc_count++; + } else if (bi->page) { + rx_ring->rx_stats.page_reuse_count++; + } + if (!bi->page) { bi->page = alloc_page(GFP_ATOMIC); if (!bi->page) { rx_ring->rx_stats.alloc_page_failed++; goto no_buffers; } - } - - if (!bi->page_dma) { - /* use a half page if we're re-using */ - bi->page_offset ^= PAGE_SIZE / 2; bi->page_dma = dma_map_page(rx_ring->dev, bi->page, - bi->page_offset, - PAGE_SIZE / 2, + 0, + PAGE_SIZE, DMA_FROM_DEVICE); - if (dma_mapping_error(rx_ring->dev, - bi->page_dma)) { + if (dma_mapping_error(rx_ring->dev, bi->page_dma)) { rx_ring->rx_stats.alloc_page_failed++; + __free_page(bi->page); + bi->page = NULL; bi->page_dma = 0; + bi->page_offset = 0; goto no_buffers; } + bi->page_offset = 0; } - dma_sync_single_range_for_device(rx_ring->dev, - bi->dma, - 0, - rx_ring->rx_hdr_len, - DMA_FROM_DEVICE); /* Refresh the desc even if buffer_addrs didn't change * because each write-back erases this info. */ - rx_desc->read.pkt_addr = cpu_to_le64(bi->page_dma); + rx_desc->read.pkt_addr = + cpu_to_le64(bi->page_dma + bi->page_offset); rx_desc->read.hdr_addr = cpu_to_le64(bi->dma); i++; if (i == rx_ring->count) i = 0; } + if (rx_ring->next_to_use != i) + i40e_release_rx_desc(rx_ring, i); + + return false; + no_buffers: if (rx_ring->next_to_use != i) i40e_release_rx_desc(rx_ring, i); + + /* make sure to come back via polling to try again after + * allocation failure + */ + return true; } /** * i40evf_alloc_rx_buffers_1buf - Replace used receive buffers; single buffer * @rx_ring: ring to place buffers on * @cleaned_count: number of buffers to replace + * + * Returns true if any errors on allocation **/ -void i40evf_alloc_rx_buffers_1buf(struct i40e_ring *rx_ring, u16 cleaned_count) +bool i40evf_alloc_rx_buffers_1buf(struct i40e_ring *rx_ring, u16 cleaned_count) { u16 i = rx_ring->next_to_use; union i40e_rx_desc *rx_desc; @@ -716,7 +772,7 @@ void i40evf_alloc_rx_buffers_1buf(struct i40e_ring *rx_ring, u16 cleaned_count) /* do nothing if no valid netdev defined */ if (!rx_ring->netdev || !cleaned_count) - return; + return false; while (cleaned_count--) { rx_desc = I40E_RX_DESC(rx_ring, i); @@ -724,8 +780,10 @@ void i40evf_alloc_rx_buffers_1buf(struct i40e_ring *rx_ring, u16 cleaned_count) skb = bi->skb; if (!skb) { - skb = netdev_alloc_skb_ip_align(rx_ring->netdev, - rx_ring->rx_buf_len); + skb = __netdev_alloc_skb_ip_align(rx_ring->netdev, + rx_ring->rx_buf_len, + GFP_ATOMIC | + __GFP_NOWARN); if (!skb) { rx_ring->rx_stats.alloc_buff_failed++; goto no_buffers; @@ -743,6 +801,8 @@ void i40evf_alloc_rx_buffers_1buf(struct i40e_ring *rx_ring, u16 cleaned_count) if (dma_mapping_error(rx_ring->dev, bi->dma)) { rx_ring->rx_stats.alloc_buff_failed++; bi->dma = 0; + dev_kfree_skb(bi->skb); + bi->skb = NULL; goto no_buffers; } } @@ -754,9 +814,19 @@ void i40evf_alloc_rx_buffers_1buf(struct i40e_ring *rx_ring, u16 cleaned_count) i = 0; } + if (rx_ring->next_to_use != i) + i40e_release_rx_desc(rx_ring, i); + + return false; + no_buffers: if (rx_ring->next_to_use != i) i40e_release_rx_desc(rx_ring, i); + + /* make sure to come back via polling to try again after + * allocation failure + */ + return true; } /** @@ -791,16 +861,7 @@ static inline void i40e_rx_checksum(struct i40e_vsi *vsi, u16 rx_ptype) { struct i40e_rx_ptype_decoded decoded = decode_rx_desc_ptype(rx_ptype); - bool ipv4 = false, ipv6 = false; - bool ipv4_tunnel, ipv6_tunnel; - __wsum rx_udp_csum; - struct iphdr *iph; - __sum16 csum; - - ipv4_tunnel = (rx_ptype >= I40E_RX_PTYPE_GRENAT4_MAC_PAY3) && - (rx_ptype <= I40E_RX_PTYPE_GRENAT4_MACVLAN_IPV6_ICMP_PAY4); - ipv6_tunnel = (rx_ptype >= I40E_RX_PTYPE_GRENAT6_MAC_PAY3) && - (rx_ptype <= I40E_RX_PTYPE_GRENAT6_MACVLAN_IPV6_ICMP_PAY4); + bool ipv4, ipv6, ipv4_tunnel, ipv6_tunnel; skb->ip_summed = CHECKSUM_NONE; @@ -816,12 +877,10 @@ static inline void i40e_rx_checksum(struct i40e_vsi *vsi, if (!(decoded.known && decoded.outer_ip)) return; - if (decoded.outer_ip == I40E_RX_PTYPE_OUTER_IP && - decoded.outer_ip_ver == I40E_RX_PTYPE_OUTER_IPV4) - ipv4 = true; - else if (decoded.outer_ip == I40E_RX_PTYPE_OUTER_IP && - decoded.outer_ip_ver == I40E_RX_PTYPE_OUTER_IPV6) - ipv6 = true; + ipv4 = (decoded.outer_ip == I40E_RX_PTYPE_OUTER_IP) && + (decoded.outer_ip_ver == I40E_RX_PTYPE_OUTER_IPV4); + ipv6 = (decoded.outer_ip == I40E_RX_PTYPE_OUTER_IP) && + (decoded.outer_ip_ver == I40E_RX_PTYPE_OUTER_IPV6); if (ipv4 && (rx_error & (BIT(I40E_RX_DESC_ERROR_IPE_SHIFT) | @@ -845,36 +904,17 @@ static inline void i40e_rx_checksum(struct i40e_vsi *vsi, if (rx_error & BIT(I40E_RX_DESC_ERROR_PPRS_SHIFT)) return; - /* If VXLAN traffic has an outer UDPv4 checksum we need to check - * it in the driver, hardware does not do it for us. - * Since L3L4P bit was set we assume a valid IHL value (>=5) - * so the total length of IPv4 header is IHL*4 bytes - * The UDP_0 bit *may* bet set if the *inner* header is UDP + /* The hardware supported by this driver does not validate outer + * checksums for tunneled VXLAN or GENEVE frames. I don't agree + * with it but the specification states that you "MAY validate", it + * doesn't make it a hard requirement so if we have validated the + * inner checksum report CHECKSUM_UNNECESSARY. */ - if (ipv4_tunnel) { - skb->transport_header = skb->mac_header + - sizeof(struct ethhdr) + - (ip_hdr(skb)->ihl * 4); - - /* Add 4 bytes for VLAN tagged packets */ - skb->transport_header += (skb->protocol == htons(ETH_P_8021Q) || - skb->protocol == htons(ETH_P_8021AD)) - ? VLAN_HLEN : 0; - - if ((ip_hdr(skb)->protocol == IPPROTO_UDP) && - (udp_hdr(skb)->check != 0)) { - rx_udp_csum = udp_csum(skb); - iph = ip_hdr(skb); - csum = csum_tcpudp_magic(iph->saddr, iph->daddr, - (skb->len - - skb_transport_offset(skb)), - IPPROTO_UDP, rx_udp_csum); - - if (udp_hdr(skb)->check != csum) - goto checksum_fail; - - } /* else its GRE and so no outer UDP header */ - } + + ipv4_tunnel = (rx_ptype >= I40E_RX_PTYPE_GRENAT4_MAC_PAY3) && + (rx_ptype <= I40E_RX_PTYPE_GRENAT4_MACVLAN_IPV6_ICMP_PAY4); + ipv6_tunnel = (rx_ptype >= I40E_RX_PTYPE_GRENAT6_MAC_PAY3) && + (rx_ptype <= I40E_RX_PTYPE_GRENAT6_MACVLAN_IPV6_ICMP_PAY4); skb->ip_summed = CHECKSUM_UNNECESSARY; skb->csum_level = ipv4_tunnel || ipv6_tunnel; @@ -939,18 +979,19 @@ static inline void i40e_rx_hash(struct i40e_ring *ring, * * Returns true if there's any budget left (e.g. the clean is finished) **/ -static int i40e_clean_rx_irq_ps(struct i40e_ring *rx_ring, int budget) +static int i40e_clean_rx_irq_ps(struct i40e_ring *rx_ring, const int budget) { unsigned int total_rx_bytes = 0, total_rx_packets = 0; u16 rx_packet_len, rx_header_len, rx_sph, rx_hbo; u16 cleaned_count = I40E_DESC_UNUSED(rx_ring); - const int current_node = numa_mem_id(); struct i40e_vsi *vsi = rx_ring->vsi; u16 i = rx_ring->next_to_clean; union i40e_rx_desc *rx_desc; u32 rx_error, rx_status; + bool failure = false; u8 rx_ptype; u64 qword; + u32 copysize; do { struct i40e_rx_buffer *rx_bi; @@ -958,7 +999,9 @@ static int i40e_clean_rx_irq_ps(struct i40e_ring *rx_ring, int budget) u16 vlan_tag; /* return some buffers to hardware, one at a time is too slow */ if (cleaned_count >= I40E_RX_BUFFER_WRITE) { - i40evf_alloc_rx_buffers_ps(rx_ring, cleaned_count); + failure = failure || + i40evf_alloc_rx_buffers_ps(rx_ring, + cleaned_count); cleaned_count = 0; } @@ -976,13 +1019,22 @@ static int i40e_clean_rx_irq_ps(struct i40e_ring *rx_ring, int budget) * DD bit is set. */ dma_rmb(); + /* sync header buffer for reading */ + dma_sync_single_range_for_cpu(rx_ring->dev, + rx_ring->rx_bi[0].dma, + i * rx_ring->rx_hdr_len, + rx_ring->rx_hdr_len, + DMA_FROM_DEVICE); rx_bi = &rx_ring->rx_bi[i]; skb = rx_bi->skb; if (likely(!skb)) { - skb = netdev_alloc_skb_ip_align(rx_ring->netdev, - rx_ring->rx_hdr_len); + skb = __netdev_alloc_skb_ip_align(rx_ring->netdev, + rx_ring->rx_hdr_len, + GFP_ATOMIC | + __GFP_NOWARN); if (!skb) { rx_ring->rx_stats.alloc_buff_failed++; + failure = true; break; } @@ -990,8 +1042,8 @@ static int i40e_clean_rx_irq_ps(struct i40e_ring *rx_ring, int budget) skb_record_rx_queue(skb, rx_ring->queue_index); /* we are reusing so sync this buffer for CPU use */ dma_sync_single_range_for_cpu(rx_ring->dev, - rx_bi->dma, - 0, + rx_ring->rx_bi[0].dma, + i * rx_ring->rx_hdr_len, rx_ring->rx_hdr_len, DMA_FROM_DEVICE); } @@ -1009,9 +1061,16 @@ static int i40e_clean_rx_irq_ps(struct i40e_ring *rx_ring, int budget) rx_ptype = (qword & I40E_RXD_QW1_PTYPE_MASK) >> I40E_RXD_QW1_PTYPE_SHIFT; - prefetch(rx_bi->page); + /* sync half-page for reading */ + dma_sync_single_range_for_cpu(rx_ring->dev, + rx_bi->page_dma, + rx_bi->page_offset, + PAGE_SIZE / 2, + DMA_FROM_DEVICE); + prefetch(page_address(rx_bi->page) + rx_bi->page_offset); rx_bi->skb = NULL; cleaned_count++; + copysize = 0; if (rx_hbo || rx_sph) { int len; @@ -1022,38 +1081,50 @@ static int i40e_clean_rx_irq_ps(struct i40e_ring *rx_ring, int budget) memcpy(__skb_put(skb, len), rx_bi->hdr_buf, len); } else if (skb->len == 0) { int len; + unsigned char *va = page_address(rx_bi->page) + + rx_bi->page_offset; - len = (rx_packet_len > skb_headlen(skb) ? - skb_headlen(skb) : rx_packet_len); - memcpy(__skb_put(skb, len), - rx_bi->page + rx_bi->page_offset, - len); - rx_bi->page_offset += len; + len = min(rx_packet_len, rx_ring->rx_hdr_len); + memcpy(__skb_put(skb, len), va, len); + copysize = len; rx_packet_len -= len; } - /* Get the rest of the data if this was a header split */ if (rx_packet_len) { - skb_fill_page_desc(skb, skb_shinfo(skb)->nr_frags, - rx_bi->page, - rx_bi->page_offset, - rx_packet_len); - - skb->len += rx_packet_len; - skb->data_len += rx_packet_len; - skb->truesize += rx_packet_len; - - if ((page_count(rx_bi->page) == 1) && - (page_to_nid(rx_bi->page) == current_node)) - get_page(rx_bi->page); - else + skb_add_rx_frag(skb, skb_shinfo(skb)->nr_frags, + rx_bi->page, + rx_bi->page_offset + copysize, + rx_packet_len, I40E_RXBUFFER_2048); + + /* If the page count is more than 2, then both halves + * of the page are used and we need to free it. Do it + * here instead of in the alloc code. Otherwise one + * of the half-pages might be released between now and + * then, and we wouldn't know which one to use. + * Don't call get_page and free_page since those are + * both expensive atomic operations that just change + * the refcount in opposite directions. Just give the + * page to the stack; he can have our refcount. + */ + if (page_count(rx_bi->page) > 2) { + dma_unmap_page(rx_ring->dev, + rx_bi->page_dma, + PAGE_SIZE, + DMA_FROM_DEVICE); rx_bi->page = NULL; + rx_bi->page_dma = 0; + rx_ring->rx_stats.realloc_count++; + } else { + get_page(rx_bi->page); + /* switch to the other half-page here; the + * allocation code programs the right addr + * into HW. If we haven't used this half-page, + * the address won't be changed, and HW can + * just use it next time through. + */ + rx_bi->page_offset ^= PAGE_SIZE / 2; + } - dma_unmap_page(rx_ring->dev, - rx_bi->page_dma, - PAGE_SIZE / 2, - DMA_FROM_DEVICE); - rx_bi->page_dma = 0; } I40E_RX_INCREMENT(rx_ring, i); @@ -1105,7 +1176,7 @@ static int i40e_clean_rx_irq_ps(struct i40e_ring *rx_ring, int budget) rx_ring->q_vector->rx.total_packets += total_rx_packets; rx_ring->q_vector->rx.total_bytes += total_rx_bytes; - return total_rx_packets; + return failure ? budget : total_rx_packets; } /** @@ -1123,6 +1194,7 @@ static int i40e_clean_rx_irq_1buf(struct i40e_ring *rx_ring, int budget) union i40e_rx_desc *rx_desc; u32 rx_error, rx_status; u16 rx_packet_len; + bool failure = false; u8 rx_ptype; u64 qword; u16 i; @@ -1133,7 +1205,9 @@ static int i40e_clean_rx_irq_1buf(struct i40e_ring *rx_ring, int budget) u16 vlan_tag; /* return some buffers to hardware, one at a time is too slow */ if (cleaned_count >= I40E_RX_BUFFER_WRITE) { - i40evf_alloc_rx_buffers_1buf(rx_ring, cleaned_count); + failure = failure || + i40evf_alloc_rx_buffers_1buf(rx_ring, + cleaned_count); cleaned_count = 0; } @@ -1214,7 +1288,7 @@ static int i40e_clean_rx_irq_1buf(struct i40e_ring *rx_ring, int budget) rx_ring->q_vector->rx.total_packets += total_rx_packets; rx_ring->q_vector->rx.total_bytes += total_rx_bytes; - return total_rx_packets; + return failure ? budget : total_rx_packets; } static u32 i40e_buildreg_itr(const int type, const u16 itr) @@ -1222,7 +1296,9 @@ static u32 i40e_buildreg_itr(const int type, const u16 itr) u32 val; val = I40E_VFINT_DYN_CTLN1_INTENA_MASK | - I40E_VFINT_DYN_CTLN1_CLEARPBA_MASK | + /* Don't clear PBA because that can cause lost interrupts that + * came in while we were cleaning/polling + */ (type << I40E_VFINT_DYN_CTLN1_ITR_INDX_SHIFT) | (itr << I40E_VFINT_DYN_CTLN1_INTERVAL_SHIFT); @@ -1335,7 +1411,8 @@ int i40evf_napi_poll(struct napi_struct *napi, int budget) * budget and be more aggressive about cleaning up the Tx descriptors. */ i40e_for_each_ring(ring, q_vector->tx) { - clean_complete &= i40e_clean_tx_irq(ring, vsi->work_limit); + clean_complete = clean_complete && + i40e_clean_tx_irq(ring, vsi->work_limit); arm_wb = arm_wb || ring->arm_wb; ring->arm_wb = false; } @@ -1359,7 +1436,7 @@ int i40evf_napi_poll(struct napi_struct *napi, int budget) work_done += cleaned; /* if we didn't clean as many as budgeted, we must be done */ - clean_complete &= (budget_per_ring != cleaned); + clean_complete = clean_complete && (budget_per_ring > cleaned); } /* If work not completed, return budget and polling will return */ @@ -1367,7 +1444,7 @@ int i40evf_napi_poll(struct napi_struct *napi, int budget) tx_only: if (arm_wb) { q_vector->tx.ring[0].tx_stats.tx_force_wb++; - i40evf_force_wb(vsi, q_vector); + i40e_enable_wb_on_itr(vsi, q_vector); } return budget; } @@ -1447,13 +1524,23 @@ out: static int i40e_tso(struct i40e_ring *tx_ring, struct sk_buff *skb, u8 *hdr_len, u64 *cd_type_cmd_tso_mss) { - u32 cd_cmd, cd_tso_len, cd_mss; - struct ipv6hdr *ipv6h; - struct tcphdr *tcph; - struct iphdr *iph; - u32 l4len; + u64 cd_cmd, cd_tso_len, cd_mss; + union { + struct iphdr *v4; + struct ipv6hdr *v6; + unsigned char *hdr; + } ip; + union { + struct tcphdr *tcp; + struct udphdr *udp; + unsigned char *hdr; + } l4; + u32 paylen, l4_offset; int err; + if (skb->ip_summed != CHECKSUM_PARTIAL) + return 0; + if (!skb_is_gso(skb)) return 0; @@ -1461,35 +1548,60 @@ static int i40e_tso(struct i40e_ring *tx_ring, struct sk_buff *skb, if (err < 0) return err; - iph = skb->encapsulation ? inner_ip_hdr(skb) : ip_hdr(skb); - ipv6h = skb->encapsulation ? inner_ipv6_hdr(skb) : ipv6_hdr(skb); - - if (iph->version == 4) { - tcph = skb->encapsulation ? inner_tcp_hdr(skb) : tcp_hdr(skb); - iph->tot_len = 0; - iph->check = 0; - tcph->check = ~csum_tcpudp_magic(iph->saddr, iph->daddr, - 0, IPPROTO_TCP, 0); - } else if (ipv6h->version == 6) { - tcph = skb->encapsulation ? inner_tcp_hdr(skb) : tcp_hdr(skb); - ipv6h->payload_len = 0; - tcph->check = ~csum_ipv6_magic(&ipv6h->saddr, &ipv6h->daddr, - 0, IPPROTO_TCP, 0); + ip.hdr = skb_network_header(skb); + l4.hdr = skb_transport_header(skb); + + /* initialize outer IP header fields */ + if (ip.v4->version == 4) { + ip.v4->tot_len = 0; + ip.v4->check = 0; + } else { + ip.v6->payload_len = 0; + } + + if (skb_shinfo(skb)->gso_type & (SKB_GSO_UDP_TUNNEL | SKB_GSO_GRE | + SKB_GSO_UDP_TUNNEL_CSUM)) { + if (skb_shinfo(skb)->gso_type & SKB_GSO_UDP_TUNNEL_CSUM) { + /* determine offset of outer transport header */ + l4_offset = l4.hdr - skb->data; + + /* remove payload length from outer checksum */ + paylen = (__force u16)l4.udp->check; + paylen += ntohs(1) * (u16)~(skb->len - l4_offset); + l4.udp->check = ~csum_fold((__force __wsum)paylen); + } + + /* reset pointers to inner headers */ + ip.hdr = skb_inner_network_header(skb); + l4.hdr = skb_inner_transport_header(skb); + + /* initialize inner IP header fields */ + if (ip.v4->version == 4) { + ip.v4->tot_len = 0; + ip.v4->check = 0; + } else { + ip.v6->payload_len = 0; + } } - l4len = skb->encapsulation ? inner_tcp_hdrlen(skb) : tcp_hdrlen(skb); - *hdr_len = (skb->encapsulation - ? (skb_inner_transport_header(skb) - skb->data) - : skb_transport_offset(skb)) + l4len; + /* determine offset of inner transport header */ + l4_offset = l4.hdr - skb->data; + + /* remove payload length from inner checksum */ + paylen = (__force u16)l4.tcp->check; + paylen += ntohs(1) * (u16)~(skb->len - l4_offset); + l4.tcp->check = ~csum_fold((__force __wsum)paylen); + + /* compute length of segmentation header */ + *hdr_len = (l4.tcp->doff * 4) + l4_offset; /* find the field values */ cd_cmd = I40E_TX_CTX_DESC_TSO; cd_tso_len = skb->len - *hdr_len; cd_mss = skb_shinfo(skb)->gso_size; - *cd_type_cmd_tso_mss |= ((u64)cd_cmd << I40E_TXD_CTX_QW1_CMD_SHIFT) | - ((u64)cd_tso_len << - I40E_TXD_CTX_QW1_TSO_LEN_SHIFT) | - ((u64)cd_mss << I40E_TXD_CTX_QW1_MSS_SHIFT); + *cd_type_cmd_tso_mss |= (cd_cmd << I40E_TXD_CTX_QW1_CMD_SHIFT) | + (cd_tso_len << I40E_TXD_CTX_QW1_TSO_LEN_SHIFT) | + (cd_mss << I40E_TXD_CTX_QW1_MSS_SHIFT); return 1; } @@ -1499,129 +1611,157 @@ static int i40e_tso(struct i40e_ring *tx_ring, struct sk_buff *skb, * @tx_flags: pointer to Tx flags currently set * @td_cmd: Tx descriptor command bits to set * @td_offset: Tx descriptor header offsets to set + * @tx_ring: Tx descriptor ring * @cd_tunneling: ptr to context desc bits **/ -static void i40e_tx_enable_csum(struct sk_buff *skb, u32 *tx_flags, - u32 *td_cmd, u32 *td_offset, - struct i40e_ring *tx_ring, - u32 *cd_tunneling) +static int i40e_tx_enable_csum(struct sk_buff *skb, u32 *tx_flags, + u32 *td_cmd, u32 *td_offset, + struct i40e_ring *tx_ring, + u32 *cd_tunneling) { - struct ipv6hdr *this_ipv6_hdr; - unsigned int this_tcp_hdrlen; - struct iphdr *this_ip_hdr; - u32 network_hdr_len; - u8 l4_hdr = 0; - struct udphdr *oudph; - struct iphdr *oiph; - u32 l4_tunnel = 0; + union { + struct iphdr *v4; + struct ipv6hdr *v6; + unsigned char *hdr; + } ip; + union { + struct tcphdr *tcp; + struct udphdr *udp; + unsigned char *hdr; + } l4; + unsigned char *exthdr; + u32 offset, cmd = 0, tunnel = 0; + __be16 frag_off; + u8 l4_proto = 0; + + if (skb->ip_summed != CHECKSUM_PARTIAL) + return 0; + + ip.hdr = skb_network_header(skb); + l4.hdr = skb_transport_header(skb); + + /* compute outer L2 header size */ + offset = ((ip.hdr - skb->data) / 2) << I40E_TX_DESC_LENGTH_MACLEN_SHIFT; if (skb->encapsulation) { - switch (ip_hdr(skb)->protocol) { + /* define outer network header type */ + if (*tx_flags & I40E_TX_FLAGS_IPV4) { + tunnel |= (*tx_flags & I40E_TX_FLAGS_TSO) ? + I40E_TX_CTX_EXT_IP_IPV4 : + I40E_TX_CTX_EXT_IP_IPV4_NO_CSUM; + + l4_proto = ip.v4->protocol; + } else if (*tx_flags & I40E_TX_FLAGS_IPV6) { + tunnel |= I40E_TX_CTX_EXT_IP_IPV6; + + exthdr = ip.hdr + sizeof(*ip.v6); + l4_proto = ip.v6->nexthdr; + if (l4.hdr != exthdr) + ipv6_skip_exthdr(skb, exthdr - skb->data, + &l4_proto, &frag_off); + } + + /* compute outer L3 header size */ + tunnel |= ((l4.hdr - ip.hdr) / 4) << + I40E_TXD_CTX_QW0_EXT_IPLEN_SHIFT; + + /* switch IP header pointer from outer to inner header */ + ip.hdr = skb_inner_network_header(skb); + + /* define outer transport */ + switch (l4_proto) { case IPPROTO_UDP: - oudph = udp_hdr(skb); - oiph = ip_hdr(skb); - l4_tunnel = I40E_TXD_CTX_UDP_TUNNELING; + tunnel |= I40E_TXD_CTX_UDP_TUNNELING; + *tx_flags |= I40E_TX_FLAGS_VXLAN_TUNNEL; + break; + case IPPROTO_GRE: + tunnel |= I40E_TXD_CTX_GRE_TUNNELING; *tx_flags |= I40E_TX_FLAGS_VXLAN_TUNNEL; break; default: - return; - } - network_hdr_len = skb_inner_network_header_len(skb); - this_ip_hdr = inner_ip_hdr(skb); - this_ipv6_hdr = inner_ipv6_hdr(skb); - this_tcp_hdrlen = inner_tcp_hdrlen(skb); - - if (*tx_flags & I40E_TX_FLAGS_IPV4) { - if (*tx_flags & I40E_TX_FLAGS_TSO) { - *cd_tunneling |= I40E_TX_CTX_EXT_IP_IPV4; - ip_hdr(skb)->check = 0; - } else { - *cd_tunneling |= - I40E_TX_CTX_EXT_IP_IPV4_NO_CSUM; - } - } else if (*tx_flags & I40E_TX_FLAGS_IPV6) { - *cd_tunneling |= I40E_TX_CTX_EXT_IP_IPV6; if (*tx_flags & I40E_TX_FLAGS_TSO) - ip_hdr(skb)->check = 0; - } + return -1; - /* Now set the ctx descriptor fields */ - *cd_tunneling |= (skb_network_header_len(skb) >> 2) << - I40E_TXD_CTX_QW0_EXT_IPLEN_SHIFT | - l4_tunnel | - ((skb_inner_network_offset(skb) - - skb_transport_offset(skb)) >> 1) << - I40E_TXD_CTX_QW0_NATLEN_SHIFT; - if (this_ip_hdr->version == 6) { - *tx_flags &= ~I40E_TX_FLAGS_IPV4; - *tx_flags |= I40E_TX_FLAGS_IPV6; + skb_checksum_help(skb); + return 0; } - if ((tx_ring->flags & I40E_TXR_FLAGS_OUTER_UDP_CSUM) && - (l4_tunnel == I40E_TXD_CTX_UDP_TUNNELING) && - (*cd_tunneling & I40E_TXD_CTX_QW0_EXT_IP_MASK)) { - oudph->check = ~csum_tcpudp_magic(oiph->saddr, - oiph->daddr, - (skb->len - skb_transport_offset(skb)), - IPPROTO_UDP, 0); - *cd_tunneling |= I40E_TXD_CTX_QW0_L4T_CS_MASK; - } - } else { - network_hdr_len = skb_network_header_len(skb); - this_ip_hdr = ip_hdr(skb); - this_ipv6_hdr = ipv6_hdr(skb); - this_tcp_hdrlen = tcp_hdrlen(skb); + /* compute tunnel header size */ + tunnel |= ((ip.hdr - l4.hdr) / 2) << + I40E_TXD_CTX_QW0_NATLEN_SHIFT; + + /* indicate if we need to offload outer UDP header */ + if ((*tx_flags & I40E_TX_FLAGS_TSO) && + (skb_shinfo(skb)->gso_type & SKB_GSO_UDP_TUNNEL_CSUM)) + tunnel |= I40E_TXD_CTX_QW0_L4T_CS_MASK; + + /* record tunnel offload values */ + *cd_tunneling |= tunnel; + + /* switch L4 header pointer from outer to inner */ + l4.hdr = skb_inner_transport_header(skb); + l4_proto = 0; + + /* reset type as we transition from outer to inner headers */ + *tx_flags &= ~(I40E_TX_FLAGS_IPV4 | I40E_TX_FLAGS_IPV6); + if (ip.v4->version == 4) + *tx_flags |= I40E_TX_FLAGS_IPV4; + if (ip.v6->version == 6) + *tx_flags |= I40E_TX_FLAGS_IPV6; } /* Enable IP checksum offloads */ if (*tx_flags & I40E_TX_FLAGS_IPV4) { - l4_hdr = this_ip_hdr->protocol; + l4_proto = ip.v4->protocol; /* the stack computes the IP header already, the only time we * need the hardware to recompute it is in the case of TSO. */ - if (*tx_flags & I40E_TX_FLAGS_TSO) { - *td_cmd |= I40E_TX_DESC_CMD_IIPT_IPV4_CSUM; - this_ip_hdr->check = 0; - } else { - *td_cmd |= I40E_TX_DESC_CMD_IIPT_IPV4; - } - /* Now set the td_offset for IP header length */ - *td_offset = (network_hdr_len >> 2) << - I40E_TX_DESC_LENGTH_IPLEN_SHIFT; + cmd |= (*tx_flags & I40E_TX_FLAGS_TSO) ? + I40E_TX_DESC_CMD_IIPT_IPV4_CSUM : + I40E_TX_DESC_CMD_IIPT_IPV4; } else if (*tx_flags & I40E_TX_FLAGS_IPV6) { - l4_hdr = this_ipv6_hdr->nexthdr; - *td_cmd |= I40E_TX_DESC_CMD_IIPT_IPV6; - /* Now set the td_offset for IP header length */ - *td_offset = (network_hdr_len >> 2) << - I40E_TX_DESC_LENGTH_IPLEN_SHIFT; + cmd |= I40E_TX_DESC_CMD_IIPT_IPV6; + + exthdr = ip.hdr + sizeof(*ip.v6); + l4_proto = ip.v6->nexthdr; + if (l4.hdr != exthdr) + ipv6_skip_exthdr(skb, exthdr - skb->data, + &l4_proto, &frag_off); } - /* words in MACLEN + dwords in IPLEN + dwords in L4Len */ - *td_offset |= (skb_network_offset(skb) >> 1) << - I40E_TX_DESC_LENGTH_MACLEN_SHIFT; + + /* compute inner L3 header size */ + offset |= ((l4.hdr - ip.hdr) / 4) << I40E_TX_DESC_LENGTH_IPLEN_SHIFT; /* Enable L4 checksum offloads */ - switch (l4_hdr) { + switch (l4_proto) { case IPPROTO_TCP: /* enable checksum offloads */ - *td_cmd |= I40E_TX_DESC_CMD_L4T_EOFT_TCP; - *td_offset |= (this_tcp_hdrlen >> 2) << - I40E_TX_DESC_LENGTH_L4_FC_LEN_SHIFT; + cmd |= I40E_TX_DESC_CMD_L4T_EOFT_TCP; + offset |= l4.tcp->doff << I40E_TX_DESC_LENGTH_L4_FC_LEN_SHIFT; break; case IPPROTO_SCTP: /* enable SCTP checksum offload */ - *td_cmd |= I40E_TX_DESC_CMD_L4T_EOFT_SCTP; - *td_offset |= (sizeof(struct sctphdr) >> 2) << - I40E_TX_DESC_LENGTH_L4_FC_LEN_SHIFT; + cmd |= I40E_TX_DESC_CMD_L4T_EOFT_SCTP; + offset |= (sizeof(struct sctphdr) >> 2) << + I40E_TX_DESC_LENGTH_L4_FC_LEN_SHIFT; break; case IPPROTO_UDP: /* enable UDP checksum offload */ - *td_cmd |= I40E_TX_DESC_CMD_L4T_EOFT_UDP; - *td_offset |= (sizeof(struct udphdr) >> 2) << - I40E_TX_DESC_LENGTH_L4_FC_LEN_SHIFT; + cmd |= I40E_TX_DESC_CMD_L4T_EOFT_UDP; + offset |= (sizeof(struct udphdr) >> 2) << + I40E_TX_DESC_LENGTH_L4_FC_LEN_SHIFT; break; default: - break; + if (*tx_flags & I40E_TX_FLAGS_TSO) + return -1; + skb_checksum_help(skb); + return 0; } + + *td_cmd |= cmd; + *td_offset |= offset; + + return 1; } /** @@ -1656,59 +1796,70 @@ static void i40e_create_tx_ctx(struct i40e_ring *tx_ring, } /** - * i40e_chk_linearize - Check if there are more than 8 fragments per packet + * __i40evf_chk_linearize - Check if there are more than 8 buffers per packet * @skb: send buffer - * @tx_flags: collected send information * - * Note: Our HW can't scatter-gather more than 8 fragments to build - * a packet on the wire and so we need to figure out the cases where we - * need to linearize the skb. + * Note: Our HW can't DMA more than 8 buffers to build a packet on the wire + * and so we need to figure out the cases where we need to linearize the skb. + * + * For TSO we need to count the TSO header and segment payload separately. + * As such we need to check cases where we have 7 fragments or more as we + * can potentially require 9 DMA transactions, 1 for the TSO header, 1 for + * the segment payload in the first descriptor, and another 7 for the + * fragments. **/ -static bool i40e_chk_linearize(struct sk_buff *skb, u32 tx_flags) +bool __i40evf_chk_linearize(struct sk_buff *skb) { - struct skb_frag_struct *frag; - bool linearize = false; - unsigned int size = 0; - u16 num_frags; - u16 gso_segs; + const struct skb_frag_struct *frag, *stale; + int nr_frags, sum; - num_frags = skb_shinfo(skb)->nr_frags; - gso_segs = skb_shinfo(skb)->gso_segs; + /* no need to check if number of frags is less than 7 */ + nr_frags = skb_shinfo(skb)->nr_frags; + if (nr_frags < (I40E_MAX_BUFFER_TXD - 1)) + return false; - if (tx_flags & (I40E_TX_FLAGS_TSO | I40E_TX_FLAGS_FSO)) { - u16 j = 0; + /* We need to walk through the list and validate that each group + * of 6 fragments totals at least gso_size. However we don't need + * to perform such validation on the last 6 since the last 6 cannot + * inherit any data from a descriptor after them. + */ + nr_frags -= I40E_MAX_BUFFER_TXD - 2; + frag = &skb_shinfo(skb)->frags[0]; + + /* Initialize size to the negative value of gso_size minus 1. We + * use this as the worst case scenerio in which the frag ahead + * of us only provides one byte which is why we are limited to 6 + * descriptors for a single transmit as the header and previous + * fragment are already consuming 2 descriptors. + */ + sum = 1 - skb_shinfo(skb)->gso_size; - if (num_frags < (I40E_MAX_BUFFER_TXD)) - goto linearize_chk_done; - /* try the simple math, if we have too many frags per segment */ - if (DIV_ROUND_UP((num_frags + gso_segs), gso_segs) > - I40E_MAX_BUFFER_TXD) { - linearize = true; - goto linearize_chk_done; - } - frag = &skb_shinfo(skb)->frags[0]; - /* we might still have more fragments per segment */ - do { - size += skb_frag_size(frag); - frag++; j++; - if ((size >= skb_shinfo(skb)->gso_size) && - (j < I40E_MAX_BUFFER_TXD)) { - size = (size % skb_shinfo(skb)->gso_size); - j = (size) ? 1 : 0; - } - if (j == I40E_MAX_BUFFER_TXD) { - linearize = true; - break; - } - num_frags--; - } while (num_frags); - } else { - if (num_frags >= I40E_MAX_BUFFER_TXD) - linearize = true; + /* Add size of frags 0 through 4 to create our initial sum */ + sum += skb_frag_size(frag++); + sum += skb_frag_size(frag++); + sum += skb_frag_size(frag++); + sum += skb_frag_size(frag++); + sum += skb_frag_size(frag++); + + /* Walk through fragments adding latest fragment, testing it, and + * then removing stale fragments from the sum. + */ + stale = &skb_shinfo(skb)->frags[0]; + for (;;) { + sum += skb_frag_size(frag++); + + /* if sum is negative we failed to make sufficient progress */ + if (sum < 0) + return true; + + /* use pre-decrement to avoid processing last fragment */ + if (!--nr_frags) + break; + + sum -= skb_frag_size(stale++); } -linearize_chk_done: - return linearize; + return false; } /** @@ -1718,7 +1869,7 @@ linearize_chk_done: * * Returns -EBUSY if a stop is needed, else 0 **/ -static inline int __i40evf_maybe_stop_tx(struct i40e_ring *tx_ring, int size) +int __i40evf_maybe_stop_tx(struct i40e_ring *tx_ring, int size) { netif_stop_subqueue(tx_ring->netdev, tx_ring->queue_index); /* Memory barrier before checking head and tail */ @@ -1734,20 +1885,6 @@ static inline int __i40evf_maybe_stop_tx(struct i40e_ring *tx_ring, int size) return 0; } -/** - * i40evf_maybe_stop_tx - 1st level check for tx stop conditions - * @tx_ring: the ring to be checked - * @size: the size buffer we want to assure is available - * - * Returns 0 if stop is not needed - **/ -static inline int i40evf_maybe_stop_tx(struct i40e_ring *tx_ring, int size) -{ - if (likely(I40E_DESC_UNUSED(tx_ring) >= size)) - return 0; - return __i40evf_maybe_stop_tx(tx_ring, size); -} - /** * i40evf_tx_map - Build the Tx descriptor * @tx_ring: ring to send buffer on @@ -1863,7 +2000,7 @@ static inline void i40evf_tx_map(struct i40e_ring *tx_ring, struct sk_buff *skb, netdev_tx_sent_queue(netdev_get_tx_queue(tx_ring->netdev, tx_ring->queue_index), first->bytecount); - i40evf_maybe_stop_tx(tx_ring, DESC_NEEDED); + i40e_maybe_stop_tx(tx_ring, DESC_NEEDED); /* Algorithm to optimize tail and RS bit setting: * if xmit_more is supported @@ -1945,38 +2082,6 @@ dma_error: tx_ring->next_to_use = i; } -/** - * i40evf_xmit_descriptor_count - calculate number of tx descriptors needed - * @skb: send buffer - * @tx_ring: ring to send buffer on - * - * Returns number of data descriptors needed for this skb. Returns 0 to indicate - * there is not enough descriptors available in this ring since we need at least - * one descriptor. - **/ -static inline int i40evf_xmit_descriptor_count(struct sk_buff *skb, - struct i40e_ring *tx_ring) -{ - unsigned int f; - int count = 0; - - /* need: 1 descriptor per page * PAGE_SIZE/I40E_MAX_DATA_PER_TXD, - * + 1 desc for skb_head_len/I40E_MAX_DATA_PER_TXD, - * + 4 desc gap to avoid the cache line where head is, - * + 1 desc for context descriptor, - * otherwise try next time - */ - for (f = 0; f < skb_shinfo(skb)->nr_frags; f++) - count += TXD_USE_COUNT(skb_shinfo(skb)->frags[f].size); - - count += TXD_USE_COUNT(skb_headlen(skb)); - if (i40evf_maybe_stop_tx(tx_ring, count + 4 + 1)) { - tx_ring->tx_stats.tx_busy++; - return 0; - } - return count; -} - /** * i40e_xmit_frame_ring - Sends buffer on Tx ring * @skb: send buffer @@ -1995,13 +2100,29 @@ static netdev_tx_t i40e_xmit_frame_ring(struct sk_buff *skb, __be16 protocol; u32 td_cmd = 0; u8 hdr_len = 0; - int tso; + int tso, count; /* prefetch the data, we'll need it later */ prefetch(skb->data); - if (0 == i40evf_xmit_descriptor_count(skb, tx_ring)) + count = i40e_xmit_descriptor_count(skb); + if (i40e_chk_linearize(skb, count)) { + if (__skb_linearize(skb)) + goto out_drop; + count = TXD_USE_COUNT(skb->len); + tx_ring->tx_stats.tx_linearize++; + } + + /* need: 1 descriptor per page * PAGE_SIZE/I40E_MAX_DATA_PER_TXD, + * + 1 desc for skb_head_len/I40E_MAX_DATA_PER_TXD, + * + 4 desc gap to avoid the cache line where head is, + * + 1 desc for context descriptor, + * otherwise try next time + */ + if (i40e_maybe_stop_tx(tx_ring, count + 4 + 1)) { + tx_ring->tx_stats.tx_busy++; return NETDEV_TX_BUSY; + } /* prepare the xmit flags */ if (i40evf_tx_prepare_vlan_flags(skb, tx_ring, &tx_flags)) @@ -2026,24 +2147,17 @@ static netdev_tx_t i40e_xmit_frame_ring(struct sk_buff *skb, else if (tso) tx_flags |= I40E_TX_FLAGS_TSO; - if (i40e_chk_linearize(skb, tx_flags)) { - if (skb_linearize(skb)) - goto out_drop; - tx_ring->tx_stats.tx_linearize++; - } + /* Always offload the checksum, since it's in the data descriptor */ + tso = i40e_tx_enable_csum(skb, &tx_flags, &td_cmd, &td_offset, + tx_ring, &cd_tunneling); + if (tso < 0) + goto out_drop; + skb_tx_timestamp(skb); /* always enable CRC insertion offload */ td_cmd |= I40E_TX_DESC_CMD_ICRC; - /* Always offload the checksum, since it's in the data descriptor */ - if (skb->ip_summed == CHECKSUM_PARTIAL) { - tx_flags |= I40E_TX_FLAGS_CSUM; - - i40e_tx_enable_csum(skb, &tx_flags, &td_cmd, &td_offset, - tx_ring, &cd_tunneling); - } - i40e_create_tx_ctx(tx_ring, cd_type_cmd_tso_mss, cd_tunneling, cd_l2tag2); diff --git a/drivers/net/ethernet/intel/i40evf/i40e_txrx.h b/drivers/net/ethernet/intel/i40evf/i40e_txrx.h index e29bb3e86..0429553fe 100644 --- a/drivers/net/ethernet/intel/i40evf/i40e_txrx.h +++ b/drivers/net/ethernet/intel/i40evf/i40e_txrx.h @@ -1,7 +1,7 @@ /******************************************************************************* * * Intel Ethernet Controller XL710 Family Linux Virtual Function Driver - * Copyright(c) 2013 - 2014 Intel Corporation. + * Copyright(c) 2013 - 2016 Intel Corporation. * * This program is free software; you can redistribute it and/or modify it * under the terms and conditions of the GNU General Public License, @@ -153,7 +153,6 @@ enum i40e_dyn_idx_t { #define DESC_NEEDED (MAX_SKB_FRAGS + 4) #define I40E_MIN_DESC_PENDING 4 -#define I40E_TX_FLAGS_CSUM BIT(0) #define I40E_TX_FLAGS_HW_VLAN BIT(1) #define I40E_TX_FLAGS_SW_VLAN BIT(2) #define I40E_TX_FLAGS_TSO BIT(3) @@ -202,12 +201,15 @@ struct i40e_tx_queue_stats { u64 tx_done_old; u64 tx_linearize; u64 tx_force_wb; + u64 tx_lost_interrupt; }; struct i40e_rx_queue_stats { u64 non_eop_descs; u64 alloc_page_failed; u64 alloc_buff_failed; + u64 page_reuse_count; + u64 realloc_count; }; enum i40e_ring_state_t { @@ -253,7 +255,6 @@ struct i40e_ring { #define I40E_RX_DTYPE_NO_SPLIT 0 #define I40E_RX_DTYPE_HEADER_SPLIT 1 #define I40E_RX_DTYPE_SPLIT_ALWAYS 2 - u8 hsplit; #define I40E_RX_SPLIT_L2 0x1 #define I40E_RX_SPLIT_IP 0x2 #define I40E_RX_SPLIT_TCP_UDP 0x4 @@ -273,7 +274,6 @@ struct i40e_ring { u16 flags; #define I40E_TXR_FLAGS_WB_ON_ITR BIT(0) -#define I40E_TXR_FLAGS_OUTER_UDP_CSUM BIT(1) /* stats structs */ struct i40e_queue_stats stats; @@ -313,8 +313,8 @@ struct i40e_ring_container { #define i40e_for_each_ring(pos, head) \ for (pos = (head).ring; pos != NULL; pos = pos->next) -void i40evf_alloc_rx_buffers_ps(struct i40e_ring *rxr, u16 cleaned_count); -void i40evf_alloc_rx_buffers_1buf(struct i40e_ring *rxr, u16 cleaned_count); +bool i40evf_alloc_rx_buffers_ps(struct i40e_ring *rxr, u16 cleaned_count); +bool i40evf_alloc_rx_buffers_1buf(struct i40e_ring *rxr, u16 cleaned_count); void i40evf_alloc_rx_headers(struct i40e_ring *rxr); netdev_tx_t i40evf_xmit_frame(struct sk_buff *skb, struct net_device *netdev); void i40evf_clean_tx_ring(struct i40e_ring *tx_ring); @@ -324,7 +324,10 @@ int i40evf_setup_rx_descriptors(struct i40e_ring *rx_ring); void i40evf_free_tx_resources(struct i40e_ring *tx_ring); void i40evf_free_rx_resources(struct i40e_ring *rx_ring); int i40evf_napi_poll(struct napi_struct *napi, int budget); -u32 i40evf_get_tx_pending(struct i40e_ring *ring); +void i40evf_force_wb(struct i40e_vsi *vsi, struct i40e_q_vector *q_vector); +u32 i40evf_get_tx_pending(struct i40e_ring *ring, bool in_sw); +int __i40evf_maybe_stop_tx(struct i40e_ring *tx_ring, int size); +bool __i40evf_chk_linearize(struct sk_buff *skb); /** * i40e_get_head - Retrieve head from head writeback @@ -339,4 +342,67 @@ static inline u32 i40e_get_head(struct i40e_ring *tx_ring) return le32_to_cpu(*(volatile __le32 *)head); } + +/** + * i40e_xmit_descriptor_count - calculate number of Tx descriptors needed + * @skb: send buffer + * @tx_ring: ring to send buffer on + * + * Returns number of data descriptors needed for this skb. Returns 0 to indicate + * there is not enough descriptors available in this ring since we need at least + * one descriptor. + **/ +static inline int i40e_xmit_descriptor_count(struct sk_buff *skb) +{ + const struct skb_frag_struct *frag = &skb_shinfo(skb)->frags[0]; + unsigned int nr_frags = skb_shinfo(skb)->nr_frags; + int count = 0, size = skb_headlen(skb); + + for (;;) { + count += TXD_USE_COUNT(size); + + if (!nr_frags--) + break; + + size = skb_frag_size(frag++); + } + + return count; +} + +/** + * i40e_maybe_stop_tx - 1st level check for Tx stop conditions + * @tx_ring: the ring to be checked + * @size: the size buffer we want to assure is available + * + * Returns 0 if stop is not needed + **/ +static inline int i40e_maybe_stop_tx(struct i40e_ring *tx_ring, int size) +{ + if (likely(I40E_DESC_UNUSED(tx_ring) >= size)) + return 0; + return __i40evf_maybe_stop_tx(tx_ring, size); +} + +/** + * i40e_chk_linearize - Check if there are more than 8 fragments per packet + * @skb: send buffer + * @count: number of buffers used + * + * Note: Our HW can't scatter-gather more than 8 fragments to build + * a packet on the wire and so we need to figure out the cases where we + * need to linearize the skb. + **/ +static inline bool i40e_chk_linearize(struct sk_buff *skb, int count) +{ + /* Both TSO and single send will work if count is less than 8 */ + if (likely(count < I40E_MAX_BUFFER_TXD)) + return false; + + if (skb_is_gso(skb)) + return __i40evf_chk_linearize(skb); + + /* we can support up to 8 data buffers for a single send */ + return count != I40E_MAX_BUFFER_TXD; +} #endif /* _I40E_TXRX_H_ */ diff --git a/drivers/net/ethernet/intel/i40evf/i40evf.h b/drivers/net/ethernet/intel/i40evf/i40evf.h index be1b72b93..e657eccd2 100644 --- a/drivers/net/ethernet/intel/i40evf/i40evf.h +++ b/drivers/net/ethernet/intel/i40evf/i40evf.h @@ -1,7 +1,7 @@ /******************************************************************************* * * Intel Ethernet Controller XL710 Family Linux Virtual Function Driver - * Copyright(c) 2013 - 2014 Intel Corporation. + * Copyright(c) 2013 - 2016 Intel Corporation. * * This program is free software; you can redistribute it and/or modify it * under the terms and conditions of the GNU General Public License, @@ -173,6 +173,7 @@ enum i40evf_state_t { __I40EVF_RESETTING, /* in reset */ /* Below here, watchdog is running */ __I40EVF_DOWN, /* ready, can be opened */ + __I40EVF_DOWN_PENDING, /* descending, waiting for watchdog */ __I40EVF_TESTING, /* in ethtool self-test */ __I40EVF_RUNNING, /* opened, working */ }; @@ -273,6 +274,9 @@ struct i40evf_adapter { }; +/* Ethtool Private Flags */ +#define I40EVF_PRIV_FLAGS_PS BIT(0) + /* needed by i40evf_ethtool.c */ extern char i40evf_driver_name[]; extern const char i40evf_driver_version[]; @@ -280,6 +284,7 @@ extern const char i40evf_driver_version[]; int i40evf_up(struct i40evf_adapter *adapter); void i40evf_down(struct i40evf_adapter *adapter); int i40evf_process_config(struct i40evf_adapter *adapter); +void i40evf_schedule_reset(struct i40evf_adapter *adapter); void i40evf_reset(struct i40evf_adapter *adapter); void i40evf_set_ethtool_ops(struct net_device *netdev); void i40evf_update_stats(struct i40evf_adapter *adapter); diff --git a/drivers/net/ethernet/intel/i40evf/i40evf_ethtool.c b/drivers/net/ethernet/intel/i40evf/i40evf_ethtool.c index a4c9feb58..dd4430aae 100644 --- a/drivers/net/ethernet/intel/i40evf/i40evf_ethtool.c +++ b/drivers/net/ethernet/intel/i40evf/i40evf_ethtool.c @@ -1,7 +1,7 @@ /******************************************************************************* * * Intel Ethernet Controller XL710 Family Linux Virtual Function Driver - * Copyright(c) 2013 - 2015 Intel Corporation. + * Copyright(c) 2013 - 2016 Intel Corporation. * * This program is free software; you can redistribute it and/or modify it * under the terms and conditions of the GNU General Public License, @@ -63,6 +63,12 @@ static const struct i40evf_stats i40evf_gstrings_stats[] = { #define I40EVF_STATS_LEN(_dev) \ (I40EVF_GLOBAL_STATS_LEN + I40EVF_QUEUE_STATS_LEN(_dev)) +static const char i40evf_priv_flags_strings[][ETH_GSTRING_LEN] = { + "packet-split", +}; + +#define I40EVF_PRIV_FLAGS_STR_LEN ARRAY_SIZE(i40evf_priv_flags_strings) + /** * i40evf_get_settings - Get Link Speed and Duplex settings * @netdev: network interface device structure @@ -97,6 +103,8 @@ static int i40evf_get_sset_count(struct net_device *netdev, int sset) { if (sset == ETH_SS_STATS) return I40EVF_STATS_LEN(netdev); + else if (sset == ETH_SS_PRIV_FLAGS) + return I40EVF_PRIV_FLAGS_STR_LEN; else return -EINVAL; } @@ -162,6 +170,12 @@ static void i40evf_get_strings(struct net_device *netdev, u32 sset, u8 *data) snprintf(p, ETH_GSTRING_LEN, "rx-%u.bytes", i); p += ETH_GSTRING_LEN; } + } else if (sset == ETH_SS_PRIV_FLAGS) { + for (i = 0; i < I40EVF_PRIV_FLAGS_STR_LEN; i++) { + memcpy(data, i40evf_priv_flags_strings[i], + ETH_GSTRING_LEN); + data += ETH_GSTRING_LEN; + } } } @@ -211,6 +225,7 @@ static void i40evf_get_drvinfo(struct net_device *netdev, strlcpy(drvinfo->version, i40evf_driver_version, 32); strlcpy(drvinfo->fw_version, "N/A", 4); strlcpy(drvinfo->bus_info, pci_name(adapter->pdev), 32); + drvinfo->n_priv_flags = I40EVF_PRIV_FLAGS_STR_LEN; } /** @@ -459,6 +474,7 @@ static int i40evf_set_rss_hash_opt(struct i40evf_adapter *adapter, struct ethtool_rxnfc *nfc) { struct i40e_hw *hw = &adapter->hw; + u32 flags = adapter->vf_res->vf_offload_flags; u64 hena = (u64)rd32(hw, I40E_VFQF_HENA(0)) | ((u64)rd32(hw, I40E_VFQF_HENA(1)) << 32); @@ -477,54 +493,50 @@ static int i40evf_set_rss_hash_opt(struct i40evf_adapter *adapter, switch (nfc->flow_type) { case TCP_V4_FLOW: - switch (nfc->data & (RXH_L4_B_0_1 | RXH_L4_B_2_3)) { - case 0: - hena &= ~BIT_ULL(I40E_FILTER_PCTYPE_NONF_IPV4_TCP); - break; - case (RXH_L4_B_0_1 | RXH_L4_B_2_3): + if (nfc->data & (RXH_L4_B_0_1 | RXH_L4_B_2_3)) { + if (flags & I40E_VIRTCHNL_VF_OFFLOAD_RSS_PCTYPE_V2) + hena |= + BIT_ULL(I40E_FILTER_PCTYPE_NONF_IPV4_TCP_SYN_NO_ACK); + hena |= BIT_ULL(I40E_FILTER_PCTYPE_NONF_IPV4_TCP); - break; - default: + } else { return -EINVAL; } break; case TCP_V6_FLOW: - switch (nfc->data & (RXH_L4_B_0_1 | RXH_L4_B_2_3)) { - case 0: - hena &= ~BIT_ULL(I40E_FILTER_PCTYPE_NONF_IPV6_TCP); - break; - case (RXH_L4_B_0_1 | RXH_L4_B_2_3): + if (nfc->data & (RXH_L4_B_0_1 | RXH_L4_B_2_3)) { + if (flags & I40E_VIRTCHNL_VF_OFFLOAD_RSS_PCTYPE_V2) + hena |= + BIT_ULL(I40E_FILTER_PCTYPE_NONF_IPV6_TCP_SYN_NO_ACK); + hena |= BIT_ULL(I40E_FILTER_PCTYPE_NONF_IPV6_TCP); - break; - default: + } else { return -EINVAL; } break; case UDP_V4_FLOW: - switch (nfc->data & (RXH_L4_B_0_1 | RXH_L4_B_2_3)) { - case 0: - hena &= ~(BIT_ULL(I40E_FILTER_PCTYPE_NONF_IPV4_UDP) | - BIT_ULL(I40E_FILTER_PCTYPE_FRAG_IPV4)); - break; - case (RXH_L4_B_0_1 | RXH_L4_B_2_3): + if (nfc->data & (RXH_L4_B_0_1 | RXH_L4_B_2_3)) { + if (flags & I40E_VIRTCHNL_VF_OFFLOAD_RSS_PCTYPE_V2) + hena |= + BIT_ULL(I40E_FILTER_PCTYPE_NONF_UNICAST_IPV4_UDP) | + BIT_ULL(I40E_FILTER_PCTYPE_NONF_MULTICAST_IPV4_UDP); + hena |= (BIT_ULL(I40E_FILTER_PCTYPE_NONF_IPV4_UDP) | BIT_ULL(I40E_FILTER_PCTYPE_FRAG_IPV4)); - break; - default: + } else { return -EINVAL; } break; case UDP_V6_FLOW: - switch (nfc->data & (RXH_L4_B_0_1 | RXH_L4_B_2_3)) { - case 0: - hena &= ~(BIT_ULL(I40E_FILTER_PCTYPE_NONF_IPV6_UDP) | - BIT_ULL(I40E_FILTER_PCTYPE_FRAG_IPV6)); - break; - case (RXH_L4_B_0_1 | RXH_L4_B_2_3): + if (nfc->data & (RXH_L4_B_0_1 | RXH_L4_B_2_3)) { + if (flags & I40E_VIRTCHNL_VF_OFFLOAD_RSS_PCTYPE_V2) + hena |= + BIT_ULL(I40E_FILTER_PCTYPE_NONF_UNICAST_IPV6_UDP) | + BIT_ULL(I40E_FILTER_PCTYPE_NONF_MULTICAST_IPV6_UDP); + hena |= (BIT_ULL(I40E_FILTER_PCTYPE_NONF_IPV6_UDP) | BIT_ULL(I40E_FILTER_PCTYPE_FRAG_IPV6)); - break; - default: + } else { return -EINVAL; } break; @@ -713,6 +725,54 @@ static int i40evf_set_rxfh(struct net_device *netdev, const u32 *indir, I40EVF_HLUT_ARRAY_SIZE); } +/** + * i40evf_get_priv_flags - report device private flags + * @dev: network interface device structure + * + * The get string set count and the string set should be matched for each + * flag returned. Add new strings for each flag to the i40e_priv_flags_strings + * array. + * + * Returns a u32 bitmap of flags. + **/ +static u32 i40evf_get_priv_flags(struct net_device *dev) +{ + struct i40evf_adapter *adapter = netdev_priv(dev); + u32 ret_flags = 0; + + ret_flags |= adapter->flags & I40EVF_FLAG_RX_PS_ENABLED ? + I40EVF_PRIV_FLAGS_PS : 0; + + return ret_flags; +} + +/** + * i40evf_set_priv_flags - set private flags + * @dev: network interface device structure + * @flags: bit flags to be set + **/ +static int i40evf_set_priv_flags(struct net_device *dev, u32 flags) +{ + struct i40evf_adapter *adapter = netdev_priv(dev); + bool reset_required = false; + + if ((flags & I40EVF_PRIV_FLAGS_PS) && + !(adapter->flags & I40EVF_FLAG_RX_PS_ENABLED)) { + adapter->flags |= I40EVF_FLAG_RX_PS_ENABLED; + reset_required = true; + } else if (!(flags & I40EVF_PRIV_FLAGS_PS) && + (adapter->flags & I40EVF_FLAG_RX_PS_ENABLED)) { + adapter->flags &= ~I40EVF_FLAG_RX_PS_ENABLED; + reset_required = true; + } + + /* if needed, issue reset to cause things to take effect */ + if (reset_required) + i40evf_schedule_reset(adapter); + + return 0; +} + static const struct ethtool_ops i40evf_ethtool_ops = { .get_settings = i40evf_get_settings, .get_drvinfo = i40evf_get_drvinfo, @@ -722,6 +782,8 @@ static const struct ethtool_ops i40evf_ethtool_ops = { .get_strings = i40evf_get_strings, .get_ethtool_stats = i40evf_get_ethtool_stats, .get_sset_count = i40evf_get_sset_count, + .get_priv_flags = i40evf_get_priv_flags, + .set_priv_flags = i40evf_set_priv_flags, .get_msglevel = i40evf_get_msglevel, .set_msglevel = i40evf_set_msglevel, .get_coalesce = i40evf_get_coalesce, diff --git a/drivers/net/ethernet/intel/i40evf/i40evf_main.c b/drivers/net/ethernet/intel/i40evf/i40evf_main.c index 94da913b1..4b70aae2f 100644 --- a/drivers/net/ethernet/intel/i40evf/i40evf_main.c +++ b/drivers/net/ethernet/intel/i40evf/i40evf_main.c @@ -1,7 +1,7 @@ /******************************************************************************* * * Intel Ethernet Controller XL710 Family Linux Virtual Function Driver - * Copyright(c) 2013 - 2015 Intel Corporation. + * Copyright(c) 2013 - 2016 Intel Corporation. * * This program is free software; you can redistribute it and/or modify it * under the terms and conditions of the GNU General Public License, @@ -32,13 +32,13 @@ static int i40evf_close(struct net_device *netdev); char i40evf_driver_name[] = "i40evf"; static const char i40evf_driver_string[] = - "Intel(R) XL710/X710 Virtual Function Network Driver"; + "Intel(R) 40-10 Gigabit Virtual Function Network Driver"; #define DRV_KERN "-k" #define DRV_VERSION_MAJOR 1 #define DRV_VERSION_MINOR 4 -#define DRV_VERSION_BUILD 4 +#define DRV_VERSION_BUILD 15 #define DRV_VERSION __stringify(DRV_VERSION_MAJOR) "." \ __stringify(DRV_VERSION_MINOR) "." \ __stringify(DRV_VERSION_BUILD) \ @@ -69,6 +69,8 @@ MODULE_DESCRIPTION("Intel(R) XL710 X710 Virtual Function Network Driver"); MODULE_LICENSE("GPL"); MODULE_VERSION(DRV_VERSION); +static struct workqueue_struct *i40evf_wq; + /** * i40evf_allocate_dma_mem_d - OS specific memory alloc for shared code * @hw: pointer to the HW structure @@ -170,6 +172,19 @@ void i40evf_debug_d(void *hw, u32 mask, char *fmt_str, ...) pr_info("%s", buf); } +/** + * i40evf_schedule_reset - Set the flags and schedule a reset event + * @adapter: board private structure + **/ +void i40evf_schedule_reset(struct i40evf_adapter *adapter) +{ + if (!(adapter->flags & + (I40EVF_FLAG_RESET_PENDING | I40EVF_FLAG_RESET_NEEDED))) { + adapter->flags |= I40EVF_FLAG_RESET_NEEDED; + schedule_work(&adapter->reset_task); + } +} + /** * i40evf_tx_timeout - Respond to a Tx Hang * @netdev: network interface device structure @@ -179,11 +194,7 @@ static void i40evf_tx_timeout(struct net_device *netdev) struct i40evf_adapter *adapter = netdev_priv(netdev); adapter->tx_timeout_count++; - if (!(adapter->flags & (I40EVF_FLAG_RESET_PENDING | - I40EVF_FLAG_RESET_NEEDED))) { - adapter->flags |= I40EVF_FLAG_RESET_NEEDED; - schedule_work(&adapter->reset_task); - } + i40evf_schedule_reset(adapter); } /** @@ -636,35 +647,22 @@ static void i40evf_configure_rx(struct i40evf_adapter *adapter) int rx_buf_len; - adapter->flags &= ~I40EVF_FLAG_RX_PS_CAPABLE; - adapter->flags |= I40EVF_FLAG_RX_1BUF_CAPABLE; - - /* Decide whether to use packet split mode or not */ - if (netdev->mtu > ETH_DATA_LEN) { - if (adapter->flags & I40EVF_FLAG_RX_PS_CAPABLE) - adapter->flags |= I40EVF_FLAG_RX_PS_ENABLED; - else - adapter->flags &= ~I40EVF_FLAG_RX_PS_ENABLED; - } else { - if (adapter->flags & I40EVF_FLAG_RX_1BUF_CAPABLE) - adapter->flags &= ~I40EVF_FLAG_RX_PS_ENABLED; - else - adapter->flags |= I40EVF_FLAG_RX_PS_ENABLED; - } - /* Set the RX buffer length according to the mode */ - if (adapter->flags & I40EVF_FLAG_RX_PS_ENABLED) { - rx_buf_len = I40E_RX_HDR_SIZE; - } else { - if (netdev->mtu <= ETH_DATA_LEN) - rx_buf_len = I40EVF_RXBUFFER_2048; - else - rx_buf_len = ALIGN(max_frame, 1024); - } + if (adapter->flags & I40EVF_FLAG_RX_PS_ENABLED || + netdev->mtu <= ETH_DATA_LEN) + rx_buf_len = I40EVF_RXBUFFER_2048; + else + rx_buf_len = ALIGN(max_frame, 1024); for (i = 0; i < adapter->num_active_queues; i++) { adapter->rx_rings[i].tail = hw->hw_addr + I40E_QRX_TAIL1(i); adapter->rx_rings[i].rx_buf_len = rx_buf_len; + if (adapter->flags & I40EVF_FLAG_RX_PS_ENABLED) { + set_ring_ps_enabled(&adapter->rx_rings[i]); + adapter->rx_rings[i].rx_hdr_len = I40E_RX_HDR_SIZE; + } else { + clear_ring_ps_enabled(&adapter->rx_rings[i]); + } } } @@ -1001,7 +999,12 @@ static void i40evf_configure(struct i40evf_adapter *adapter) for (i = 0; i < adapter->num_active_queues; i++) { struct i40e_ring *ring = &adapter->rx_rings[i]; + if (adapter->flags & I40EVF_FLAG_RX_PS_ENABLED) { + i40evf_alloc_rx_headers(ring); + i40evf_alloc_rx_buffers_ps(ring, ring->count); + } else { i40evf_alloc_rx_buffers_1buf(ring, ring->count); + } ring->next_to_use = ring->count - 1; writel(ring->next_to_use, ring->tail); } @@ -1032,7 +1035,7 @@ void i40evf_down(struct i40evf_adapter *adapter) struct net_device *netdev = adapter->netdev; struct i40evf_mac_filter *f; - if (adapter->state == __I40EVF_DOWN) + if (adapter->state <= __I40EVF_DOWN_PENDING) return; while (test_and_set_bit(__I40EVF_IN_CRITICAL_TASK, @@ -1122,7 +1125,9 @@ static void i40evf_free_queues(struct i40evf_adapter *adapter) if (!adapter->vsi_res) return; kfree(adapter->tx_rings); + adapter->tx_rings = NULL; kfree(adapter->rx_rings); + adapter->rx_rings = NULL; } /** @@ -1454,7 +1459,11 @@ static int i40evf_init_rss(struct i40evf_adapter *adapter) int ret; /* Enable PCTYPES for RSS, TCP/UDP with IPv4/IPv6 */ - hena = I40E_DEFAULT_RSS_HENA; + if (adapter->vf_res->vf_offload_flags & + I40E_VIRTCHNL_VF_OFFLOAD_RSS_PCTYPE_V2) + hena = I40E_DEFAULT_RSS_HENA_EXPANDED; + else + hena = I40E_DEFAULT_RSS_HENA; wr32(hw, I40E_VFQF_HENA(0), (u32)hena); wr32(hw, I40E_VFQF_HENA(1), (u32)(hena >> 32)); @@ -1829,6 +1838,7 @@ static void i40evf_reset_task(struct work_struct *work) break; msleep(I40EVF_RESET_WAIT_MS); } + pci_set_master(adapter->pdev); /* extra wait to make sure minimum wait is met */ msleep(I40EVF_RESET_WAIT_MS); if (i == I40EVF_RESET_WAIT_COUNT) { @@ -1873,6 +1883,7 @@ static void i40evf_reset_task(struct work_struct *work) adapter->netdev->flags &= ~IFF_UP; clear_bit(__I40EVF_IN_CRITICAL_TASK, &adapter->crit_section); adapter->flags &= ~I40EVF_FLAG_RESET_PENDING; + adapter->state = __I40EVF_DOWN; dev_info(&adapter->pdev->dev, "Reset task did not complete, VF disabled\n"); return; /* Do not attempt to reinit. It's dead, Jim. */ } @@ -2142,7 +2153,8 @@ static int i40evf_open(struct net_device *netdev) dev_err(&adapter->pdev->dev, "Unable to open device due to PF driver failure.\n"); return -EIO; } - if (adapter->state != __I40EVF_DOWN || adapter->aq_required) + + if (adapter->state != __I40EVF_DOWN) return -EBUSY; /* allocate transmit descriptors */ @@ -2197,14 +2209,14 @@ static int i40evf_close(struct net_device *netdev) { struct i40evf_adapter *adapter = netdev_priv(netdev); - if (adapter->state <= __I40EVF_DOWN) + if (adapter->state <= __I40EVF_DOWN_PENDING) return 0; set_bit(__I40E_DOWN, &adapter->vsi.state); i40evf_down(adapter); - adapter->state = __I40EVF_DOWN; + adapter->state = __I40EVF_DOWN_PENDING; i40evf_free_traffic_irqs(adapter); return 0; @@ -2325,9 +2337,24 @@ int i40evf_process_config(struct i40evf_adapter *adapter) NETIF_F_IPV6_CSUM | NETIF_F_TSO | NETIF_F_TSO6 | + NETIF_F_TSO_ECN | + NETIF_F_GSO_GRE | + NETIF_F_GSO_UDP_TUNNEL | NETIF_F_RXCSUM | NETIF_F_GRO; + netdev->hw_enc_features |= NETIF_F_IP_CSUM | + NETIF_F_IPV6_CSUM | + NETIF_F_TSO | + NETIF_F_TSO6 | + NETIF_F_TSO_ECN | + NETIF_F_GSO_GRE | + NETIF_F_GSO_UDP_TUNNEL | + NETIF_F_GSO_UDP_TUNNEL_CSUM; + + if (adapter->flags & I40EVF_FLAG_OUTER_UDP_CSUM_CAPABLE) + netdev->features |= NETIF_F_GSO_UDP_TUNNEL_CSUM; + /* copy netdev features into list of user selectable features */ netdev->hw_features |= netdev->features; netdev->hw_features &= ~NETIF_F_RXCSUM; @@ -2466,11 +2493,20 @@ static void i40evf_init_task(struct work_struct *work) default: goto err_alloc; } + + if (hw->mac.type == I40E_MAC_X722_VF) + adapter->flags |= I40EVF_FLAG_OUTER_UDP_CSUM_CAPABLE; + if (i40evf_process_config(adapter)) goto err_alloc; adapter->current_op = I40E_VIRTCHNL_OP_UNKNOWN; adapter->flags |= I40EVF_FLAG_RX_CSUM_ENABLED; + adapter->flags |= I40EVF_FLAG_RX_1BUF_CAPABLE; + adapter->flags |= I40EVF_FLAG_RX_PS_CAPABLE; + + /* Default to single buffer rx, can be changed through ethtool. */ + adapter->flags &= ~I40EVF_FLAG_RX_PS_ENABLED; netdev->netdev_ops = &i40evf_netdev_ops; i40evf_set_ethtool_ops(netdev); @@ -2502,10 +2538,9 @@ static void i40evf_init_task(struct work_struct *work) goto err_sw_init; i40evf_map_rings_to_vectors(adapter); if (adapter->vf_res->vf_offload_flags & - I40E_VIRTCHNL_VF_OFFLOAD_WB_ON_ITR) + I40E_VIRTCHNL_VF_OFFLOAD_WB_ON_ITR) adapter->flags |= I40EVF_FLAG_WB_ON_ITR_CAPABLE; - if (!RSS_AQ(adapter)) - i40evf_init_rss(adapter); + err = i40evf_request_misc_irq(adapter); if (err) goto err_sw_init; @@ -2885,6 +2920,11 @@ static int __init i40evf_init_module(void) pr_info("%s\n", i40evf_copyright); + i40evf_wq = create_singlethread_workqueue(i40evf_driver_name); + if (!i40evf_wq) { + pr_err("%s: Failed to create workqueue\n", i40evf_driver_name); + return -ENOMEM; + } ret = pci_register_driver(&i40evf_driver); return ret; } @@ -2900,6 +2940,7 @@ module_init(i40evf_init_module); static void __exit i40evf_exit_module(void) { pci_unregister_driver(&i40evf_driver); + destroy_workqueue(i40evf_wq); } module_exit(i40evf_exit_module); diff --git a/drivers/net/ethernet/intel/i40evf/i40evf_virtchnl.c b/drivers/net/ethernet/intel/i40evf/i40evf_virtchnl.c index c1c526283..488e738f7 100644 --- a/drivers/net/ethernet/intel/i40evf/i40evf_virtchnl.c +++ b/drivers/net/ethernet/intel/i40evf/i40evf_virtchnl.c @@ -270,6 +270,10 @@ void i40evf_configure_queues(struct i40evf_adapter *adapter) vqpi->rxq.max_pkt_size = adapter->netdev->mtu + ETH_HLEN + VLAN_HLEN + ETH_FCS_LEN; vqpi->rxq.databuffer_size = adapter->rx_rings[i].rx_buf_len; + if (adapter->flags & I40EVF_FLAG_RX_PS_ENABLED) { + vqpi->rxq.splithdr_enabled = true; + vqpi->rxq.hdr_size = I40E_RX_HDR_SIZE; + } vqpi++; } @@ -804,6 +808,8 @@ void i40evf_virtchnl_completion(struct i40evf_adapter *adapter, case I40E_VIRTCHNL_OP_DISABLE_QUEUES: i40evf_free_all_tx_resources(adapter); i40evf_free_all_rx_resources(adapter); + if (adapter->state == __I40EVF_DOWN_PENDING) + adapter->state = __I40EVF_DOWN; break; case I40E_VIRTCHNL_OP_VERSION: case I40E_VIRTCHNL_OP_CONFIG_IRQ_MAP: diff --git a/drivers/net/ethernet/intel/igb/e1000_82575.c b/drivers/net/ethernet/intel/igb/e1000_82575.c index adb33e2a0..a23aa6704 100644 --- a/drivers/net/ethernet/intel/igb/e1000_82575.c +++ b/drivers/net/ethernet/intel/igb/e1000_82575.c @@ -34,6 +34,7 @@ #include "e1000_mac.h" #include "e1000_82575.h" #include "e1000_i210.h" +#include "igb.h" static s32 igb_get_invariants_82575(struct e1000_hw *); static s32 igb_acquire_phy_82575(struct e1000_hw *); @@ -71,6 +72,32 @@ static s32 igb_update_nvm_checksum_i350(struct e1000_hw *hw); static const u16 e1000_82580_rxpbs_table[] = { 36, 72, 144, 1, 2, 4, 8, 16, 35, 70, 140 }; +/* Due to a hw errata, if the host tries to configure the VFTA register + * while performing queries from the BMC or DMA, then the VFTA in some + * cases won't be written. + */ + +/** + * igb_write_vfta_i350 - Write value to VLAN filter table + * @hw: pointer to the HW structure + * @offset: register offset in VLAN filter table + * @value: register value written to VLAN filter table + * + * Writes value at the given offset in the register array which stores + * the VLAN filter table. + **/ +static void igb_write_vfta_i350(struct e1000_hw *hw, u32 offset, u32 value) +{ + struct igb_adapter *adapter = hw->back; + int i; + + for (i = 10; i--;) + array_wr32(E1000_VFTA, offset, value); + + wrfl(); + adapter->shadow_vfta[offset] = value; +} + /** * igb_sgmii_uses_mdio_82575 - Determine if I2C pins are for external MDIO * @hw: pointer to the HW structure @@ -398,6 +425,8 @@ static s32 igb_init_mac_params_82575(struct e1000_hw *hw) /* Set mta register count */ mac->mta_reg_count = 128; + /* Set uta register count */ + mac->uta_reg_count = (hw->mac.type == e1000_82575) ? 0 : 128; /* Set rar entry count */ switch (mac->type) { case e1000_82576: @@ -429,6 +458,11 @@ static s32 igb_init_mac_params_82575(struct e1000_hw *hw) mac->ops.release_swfw_sync = igb_release_swfw_sync_82575; } + if ((hw->mac.type == e1000_i350) || (hw->mac.type == e1000_i354)) + mac->ops.write_vfta = igb_write_vfta_i350; + else + mac->ops.write_vfta = igb_write_vfta; + /* Set if part includes ASF firmware */ mac->asf_firmware_present = true; /* Set if manageability features are enabled. */ @@ -1517,10 +1551,7 @@ static s32 igb_init_hw_82575(struct e1000_hw *hw) /* Disabling VLAN filtering */ hw_dbg("Initializing the IEEE VLAN\n"); - if ((hw->mac.type == e1000_i350) || (hw->mac.type == e1000_i354)) - igb_clear_vfta_i350(hw); - else - igb_clear_vfta(hw); + igb_clear_vfta(hw); /* Setup the receive address */ igb_init_rx_addrs(hw, rar_count); @@ -2889,7 +2920,7 @@ static struct e1000_mac_operations e1000_mac_ops_82575 = { #endif }; -static struct e1000_phy_operations e1000_phy_ops_82575 = { +static const struct e1000_phy_operations e1000_phy_ops_82575 = { .acquire = igb_acquire_phy_82575, .get_cfg_done = igb_get_cfg_done_82575, .release = igb_release_phy_82575, diff --git a/drivers/net/ethernet/intel/igb/e1000_82575.h b/drivers/net/ethernet/intel/igb/e1000_82575.h index 2154aea7a..de8805a2a 100644 --- a/drivers/net/ethernet/intel/igb/e1000_82575.h +++ b/drivers/net/ethernet/intel/igb/e1000_82575.h @@ -56,10 +56,10 @@ s32 igb_write_i2c_byte(struct e1000_hw *hw, u8 byte_offset, u8 dev_addr, #define E1000_SRRCTL_TIMESTAMP 0x40000000 -#define E1000_MRQC_ENABLE_RSS_4Q 0x00000002 +#define E1000_MRQC_ENABLE_RSS_MQ 0x00000002 #define E1000_MRQC_ENABLE_VMDQ 0x00000003 #define E1000_MRQC_RSS_FIELD_IPV4_UDP 0x00400000 -#define E1000_MRQC_ENABLE_VMDQ_RSS_2Q 0x00000005 +#define E1000_MRQC_ENABLE_VMDQ_RSS_MQ 0x00000005 #define E1000_MRQC_RSS_FIELD_IPV6_UDP 0x00800000 #define E1000_MRQC_RSS_FIELD_IPV6_UDP_EX 0x01000000 diff --git a/drivers/net/ethernet/intel/igb/e1000_defines.h b/drivers/net/ethernet/intel/igb/e1000_defines.h index c3c598c34..e9f23ee8f 100644 --- a/drivers/net/ethernet/intel/igb/e1000_defines.h +++ b/drivers/net/ethernet/intel/igb/e1000_defines.h @@ -356,7 +356,8 @@ /* Ethertype field values */ #define ETHERNET_IEEE_VLAN_TYPE 0x8100 /* 802.3ac packet */ -#define MAX_JUMBO_FRAME_SIZE 0x3F00 +/* As per the EAS the maximum supported size is 9.5KB (9728 bytes) */ +#define MAX_JUMBO_FRAME_SIZE 0x2600 /* PBA constants */ #define E1000_PBA_34K 0x0022 diff --git a/drivers/net/ethernet/intel/igb/e1000_hw.h b/drivers/net/ethernet/intel/igb/e1000_hw.h index 4034207eb..2fb2213cd 100644 --- a/drivers/net/ethernet/intel/igb/e1000_hw.h +++ b/drivers/net/ethernet/intel/igb/e1000_hw.h @@ -325,7 +325,7 @@ struct e1000_mac_operations { s32 (*get_thermal_sensor_data)(struct e1000_hw *); s32 (*init_thermal_sensor_thresh)(struct e1000_hw *); #endif - + void (*write_vfta)(struct e1000_hw *, u32, u32); }; struct e1000_phy_operations { @@ -372,7 +372,7 @@ struct e1000_thermal_sensor_data { struct e1000_info { s32 (*get_invariants)(struct e1000_hw *); struct e1000_mac_operations *mac_ops; - struct e1000_phy_operations *phy_ops; + const struct e1000_phy_operations *phy_ops; struct e1000_nvm_operations *nvm_ops; }; diff --git a/drivers/net/ethernet/intel/igb/e1000_mac.c b/drivers/net/ethernet/intel/igb/e1000_mac.c index 2a88595f9..07cf4fe58 100644 --- a/drivers/net/ethernet/intel/igb/e1000_mac.c +++ b/drivers/net/ethernet/intel/igb/e1000_mac.c @@ -92,10 +92,8 @@ void igb_clear_vfta(struct e1000_hw *hw) { u32 offset; - for (offset = 0; offset < E1000_VLAN_FILTER_TBL_SIZE; offset++) { - array_wr32(E1000_VFTA, offset, 0); - wrfl(); - } + for (offset = E1000_VLAN_FILTER_TBL_SIZE; offset--;) + hw->mac.ops.write_vfta(hw, offset, 0); } /** @@ -107,54 +105,14 @@ void igb_clear_vfta(struct e1000_hw *hw) * Writes value at the given offset in the register array which stores * the VLAN filter table. **/ -static void igb_write_vfta(struct e1000_hw *hw, u32 offset, u32 value) +void igb_write_vfta(struct e1000_hw *hw, u32 offset, u32 value) { + struct igb_adapter *adapter = hw->back; + array_wr32(E1000_VFTA, offset, value); wrfl(); -} - -/* Due to a hw errata, if the host tries to configure the VFTA register - * while performing queries from the BMC or DMA, then the VFTA in some - * cases won't be written. - */ -/** - * igb_clear_vfta_i350 - Clear VLAN filter table - * @hw: pointer to the HW structure - * - * Clears the register array which contains the VLAN filter table by - * setting all the values to 0. - **/ -void igb_clear_vfta_i350(struct e1000_hw *hw) -{ - u32 offset; - int i; - - for (offset = 0; offset < E1000_VLAN_FILTER_TBL_SIZE; offset++) { - for (i = 0; i < 10; i++) - array_wr32(E1000_VFTA, offset, 0); - - wrfl(); - } -} - -/** - * igb_write_vfta_i350 - Write value to VLAN filter table - * @hw: pointer to the HW structure - * @offset: register offset in VLAN filter table - * @value: register value written to VLAN filter table - * - * Writes value at the given offset in the register array which stores - * the VLAN filter table. - **/ -static void igb_write_vfta_i350(struct e1000_hw *hw, u32 offset, u32 value) -{ - int i; - - for (i = 0; i < 10; i++) - array_wr32(E1000_VFTA, offset, value); - - wrfl(); + adapter->shadow_vfta[offset] = value; } /** @@ -182,41 +140,156 @@ void igb_init_rx_addrs(struct e1000_hw *hw, u16 rar_count) hw->mac.ops.rar_set(hw, mac_addr, i); } +/** + * igb_find_vlvf_slot - find the VLAN id or the first empty slot + * @hw: pointer to hardware structure + * @vlan: VLAN id to write to VLAN filter + * @vlvf_bypass: skip VLVF if no match is found + * + * return the VLVF index where this VLAN id should be placed + * + **/ +static s32 igb_find_vlvf_slot(struct e1000_hw *hw, u32 vlan, bool vlvf_bypass) +{ + s32 regindex, first_empty_slot; + u32 bits; + + /* short cut the special case */ + if (vlan == 0) + return 0; + + /* if vlvf_bypass is set we don't want to use an empty slot, we + * will simply bypass the VLVF if there are no entries present in the + * VLVF that contain our VLAN + */ + first_empty_slot = vlvf_bypass ? -E1000_ERR_NO_SPACE : 0; + + /* Search for the VLAN id in the VLVF entries. Save off the first empty + * slot found along the way. + * + * pre-decrement loop covering (IXGBE_VLVF_ENTRIES - 1) .. 1 + */ + for (regindex = E1000_VLVF_ARRAY_SIZE; --regindex > 0;) { + bits = rd32(E1000_VLVF(regindex)) & E1000_VLVF_VLANID_MASK; + if (bits == vlan) + return regindex; + if (!first_empty_slot && !bits) + first_empty_slot = regindex; + } + + return first_empty_slot ? : -E1000_ERR_NO_SPACE; +} + /** * igb_vfta_set - enable or disable vlan in VLAN filter table * @hw: pointer to the HW structure - * @vid: VLAN id to add or remove - * @add: if true add filter, if false remove + * @vlan: VLAN id to add or remove + * @vind: VMDq output index that maps queue to VLAN id + * @vlan_on: if true add filter, if false remove * * Sets or clears a bit in the VLAN filter table array based on VLAN id * and if we are adding or removing the filter **/ -s32 igb_vfta_set(struct e1000_hw *hw, u32 vid, bool add) +s32 igb_vfta_set(struct e1000_hw *hw, u32 vlan, u32 vind, + bool vlan_on, bool vlvf_bypass) { - u32 index = (vid >> E1000_VFTA_ENTRY_SHIFT) & E1000_VFTA_ENTRY_MASK; - u32 mask = 1 << (vid & E1000_VFTA_ENTRY_BIT_SHIFT_MASK); - u32 vfta; struct igb_adapter *adapter = hw->back; - s32 ret_val = 0; + u32 regidx, vfta_delta, vfta, bits; + s32 vlvf_index; - vfta = adapter->shadow_vfta[index]; + if ((vlan > 4095) || (vind > 7)) + return -E1000_ERR_PARAM; - /* bit was set/cleared before we started */ - if ((!!(vfta & mask)) == add) { - ret_val = -E1000_ERR_CONFIG; - } else { - if (add) - vfta |= mask; - else - vfta &= ~mask; + /* this is a 2 part operation - first the VFTA, then the + * VLVF and VLVFB if VT Mode is set + * We don't write the VFTA until we know the VLVF part succeeded. + */ + + /* Part 1 + * The VFTA is a bitstring made up of 128 32-bit registers + * that enable the particular VLAN id, much like the MTA: + * bits[11-5]: which register + * bits[4-0]: which bit in the register + */ + regidx = vlan / 32; + vfta_delta = 1 << (vlan % 32); + vfta = adapter->shadow_vfta[regidx]; + + /* vfta_delta represents the difference between the current value + * of vfta and the value we want in the register. Since the diff + * is an XOR mask we can just update vfta using an XOR. + */ + vfta_delta &= vlan_on ? ~vfta : vfta; + vfta ^= vfta_delta; + + /* Part 2 + * If VT Mode is set + * Either vlan_on + * make sure the VLAN is in VLVF + * set the vind bit in the matching VLVFB + * Or !vlan_on + * clear the pool bit and possibly the vind + */ + if (!adapter->vfs_allocated_count) + goto vfta_update; + + vlvf_index = igb_find_vlvf_slot(hw, vlan, vlvf_bypass); + if (vlvf_index < 0) { + if (vlvf_bypass) + goto vfta_update; + return vlvf_index; } - if ((hw->mac.type == e1000_i350) || (hw->mac.type == e1000_i354)) - igb_write_vfta_i350(hw, index, vfta); - else - igb_write_vfta(hw, index, vfta); - adapter->shadow_vfta[index] = vfta; - return ret_val; + bits = rd32(E1000_VLVF(vlvf_index)); + + /* set the pool bit */ + bits |= 1 << (E1000_VLVF_POOLSEL_SHIFT + vind); + if (vlan_on) + goto vlvf_update; + + /* clear the pool bit */ + bits ^= 1 << (E1000_VLVF_POOLSEL_SHIFT + vind); + + if (!(bits & E1000_VLVF_POOLSEL_MASK)) { + /* Clear VFTA first, then disable VLVF. Otherwise + * we run the risk of stray packets leaking into + * the PF via the default pool + */ + if (vfta_delta) + hw->mac.ops.write_vfta(hw, regidx, vfta); + + /* disable VLVF and clear remaining bit from pool */ + wr32(E1000_VLVF(vlvf_index), 0); + + return 0; + } + + /* If there are still bits set in the VLVFB registers + * for the VLAN ID indicated we need to see if the + * caller is requesting that we clear the VFTA entry bit. + * If the caller has requested that we clear the VFTA + * entry bit but there are still pools/VFs using this VLAN + * ID entry then ignore the request. We're not worried + * about the case where we're turning the VFTA VLAN ID + * entry bit on, only when requested to turn it off as + * there may be multiple pools and/or VFs using the + * VLAN ID entry. In that case we cannot clear the + * VFTA bit until all pools/VFs using that VLAN ID have also + * been cleared. This will be indicated by "bits" being + * zero. + */ + vfta_delta = 0; + +vlvf_update: + /* record pool change and enable VLAN ID if not already enabled */ + wr32(E1000_VLVF(vlvf_index), bits | vlan | E1000_VLVF_VLANID_ENABLE); + +vfta_update: + /* bit was set/cleared before we started */ + if (vfta_delta) + hw->mac.ops.write_vfta(hw, regidx, vfta); + + return 0; } /** diff --git a/drivers/net/ethernet/intel/igb/e1000_mac.h b/drivers/net/ethernet/intel/igb/e1000_mac.h index ea24961b0..90c8893c3 100644 --- a/drivers/net/ethernet/intel/igb/e1000_mac.h +++ b/drivers/net/ethernet/intel/igb/e1000_mac.h @@ -56,8 +56,9 @@ s32 igb_write_8bit_ctrl_reg(struct e1000_hw *hw, u32 reg, void igb_clear_hw_cntrs_base(struct e1000_hw *hw); void igb_clear_vfta(struct e1000_hw *hw); -void igb_clear_vfta_i350(struct e1000_hw *hw); -s32 igb_vfta_set(struct e1000_hw *hw, u32 vid, bool add); +void igb_write_vfta(struct e1000_hw *hw, u32 offset, u32 value); +s32 igb_vfta_set(struct e1000_hw *hw, u32 vid, u32 vind, + bool vlan_on, bool vlvf_bypass); void igb_config_collision_dist(struct e1000_hw *hw); void igb_init_rx_addrs(struct e1000_hw *hw, u16 rar_count); void igb_mta_set(struct e1000_hw *hw, u32 hash_value); diff --git a/drivers/net/ethernet/intel/igb/e1000_mbx.c b/drivers/net/ethernet/intel/igb/e1000_mbx.c index 162cc4934..10f5c9e01 100644 --- a/drivers/net/ethernet/intel/igb/e1000_mbx.c +++ b/drivers/net/ethernet/intel/igb/e1000_mbx.c @@ -322,14 +322,20 @@ static s32 igb_obtain_mbx_lock_pf(struct e1000_hw *hw, u16 vf_number) { s32 ret_val = -E1000_ERR_MBX; u32 p2v_mailbox; + int count = 10; - /* Take ownership of the buffer */ - wr32(E1000_P2VMAILBOX(vf_number), E1000_P2VMAILBOX_PFU); + do { + /* Take ownership of the buffer */ + wr32(E1000_P2VMAILBOX(vf_number), E1000_P2VMAILBOX_PFU); - /* reserve mailbox for vf use */ - p2v_mailbox = rd32(E1000_P2VMAILBOX(vf_number)); - if (p2v_mailbox & E1000_P2VMAILBOX_PFU) - ret_val = 0; + /* reserve mailbox for vf use */ + p2v_mailbox = rd32(E1000_P2VMAILBOX(vf_number)); + if (p2v_mailbox & E1000_P2VMAILBOX_PFU) { + ret_val = 0; + break; + } + udelay(1000); + } while (count-- > 0); return ret_val; } diff --git a/drivers/net/ethernet/intel/igb/igb.h b/drivers/net/ethernet/intel/igb/igb.h index e3cb93bdb..9413fa613 100644 --- a/drivers/net/ethernet/intel/igb/igb.h +++ b/drivers/net/ethernet/intel/igb/igb.h @@ -95,7 +95,6 @@ struct vf_data_storage { unsigned char vf_mac_addresses[ETH_ALEN]; u16 vf_mc_hashes[IGB_MAX_VF_MC_ENTRIES]; u16 num_vf_mc_hashes; - u16 vlans_enabled; u32 flags; unsigned long last_nack; u16 pf_vlan; /* When set, guest VLAN config not allowed. */ @@ -482,6 +481,7 @@ struct igb_adapter { #define IGB_FLAG_MAS_ENABLE (1 << 12) #define IGB_FLAG_HAS_MSIX (1 << 13) #define IGB_FLAG_EEE (1 << 14) +#define IGB_FLAG_VLAN_PROMISC BIT(15) /* Media Auto Sense */ #define IGB_MAS_ENABLE_0 0X0001 @@ -510,6 +510,8 @@ enum igb_boards { extern char igb_driver_name[]; extern char igb_driver_version[]; +int igb_open(struct net_device *netdev); +int igb_close(struct net_device *netdev); int igb_up(struct igb_adapter *); void igb_down(struct igb_adapter *); void igb_reinit_locked(struct igb_adapter *); diff --git a/drivers/net/ethernet/intel/igb/igb_ethtool.c b/drivers/net/ethernet/intel/igb/igb_ethtool.c index 1d329f1d0..7982243d1 100644 --- a/drivers/net/ethernet/intel/igb/igb_ethtool.c +++ b/drivers/net/ethernet/intel/igb/igb_ethtool.c @@ -2017,7 +2017,7 @@ static void igb_diag_test(struct net_device *netdev, if (if_running) /* indicate we're in test mode */ - dev_close(netdev); + igb_close(netdev); else igb_reset(adapter); @@ -2050,7 +2050,7 @@ static void igb_diag_test(struct net_device *netdev, clear_bit(__IGB_TESTING, &adapter->state); if (if_running) - dev_open(netdev); + igb_open(netdev); } else { dev_info(&adapter->pdev->dev, "online testing starting\n"); diff --git a/drivers/net/ethernet/intel/igb/igb_main.c b/drivers/net/ethernet/intel/igb/igb_main.c index 31e5f3942..55a1405cb 100644 --- a/drivers/net/ethernet/intel/igb/igb_main.c +++ b/drivers/net/ethernet/intel/igb/igb_main.c @@ -122,8 +122,8 @@ static void igb_setup_mrqc(struct igb_adapter *); static int igb_probe(struct pci_dev *, const struct pci_device_id *); static void igb_remove(struct pci_dev *pdev); static int igb_sw_init(struct igb_adapter *); -static int igb_open(struct net_device *); -static int igb_close(struct net_device *); +int igb_open(struct net_device *); +int igb_close(struct net_device *); static void igb_configure(struct igb_adapter *); static void igb_configure_tx(struct igb_adapter *); static void igb_configure_rx(struct igb_adapter *); @@ -140,7 +140,7 @@ static struct rtnl_link_stats64 *igb_get_stats64(struct net_device *dev, struct rtnl_link_stats64 *stats); static int igb_change_mtu(struct net_device *, int); static int igb_set_mac(struct net_device *, void *); -static void igb_set_uta(struct igb_adapter *adapter); +static void igb_set_uta(struct igb_adapter *adapter, bool set); static irqreturn_t igb_intr(int irq, void *); static irqreturn_t igb_intr_msi(int irq, void *); static irqreturn_t igb_msix_other(int irq, void *); @@ -1534,12 +1534,13 @@ static void igb_irq_enable(struct igb_adapter *adapter) static void igb_update_mng_vlan(struct igb_adapter *adapter) { struct e1000_hw *hw = &adapter->hw; + u16 pf_id = adapter->vfs_allocated_count; u16 vid = adapter->hw.mng_cookie.vlan_id; u16 old_vid = adapter->mng_vlan_id; if (hw->mng_cookie.status & E1000_MNG_DHCP_COOKIE_STATUS_VLAN) { /* add VID to filter table */ - igb_vfta_set(hw, vid, true); + igb_vfta_set(hw, vid, pf_id, true, true); adapter->mng_vlan_id = vid; } else { adapter->mng_vlan_id = IGB_MNG_VLAN_NONE; @@ -1549,7 +1550,7 @@ static void igb_update_mng_vlan(struct igb_adapter *adapter) (vid != old_vid) && !test_bit(old_vid, adapter->active_vlans)) { /* remove VID from filter table */ - igb_vfta_set(hw, old_vid, false); + igb_vfta_set(hw, vid, pf_id, false, true); } } @@ -1818,6 +1819,10 @@ void igb_down(struct igb_adapter *adapter) if (!pci_channel_offline(adapter->pdev)) igb_reset(adapter); + + /* clear VLAN promisc flag so VFTA will be updated if necessary */ + adapter->flags &= ~IGB_FLAG_VLAN_PROMISC; + igb_clean_all_tx_rings(adapter); igb_clean_all_rx_rings(adapter); #ifdef CONFIG_IGB_DCA @@ -1862,7 +1867,7 @@ void igb_reset(struct igb_adapter *adapter) struct e1000_hw *hw = &adapter->hw; struct e1000_mac_info *mac = &hw->mac; struct e1000_fc_info *fc = &hw->fc; - u32 pba = 0, tx_space, min_tx_space, min_rx_space, hwm; + u32 pba, hwm; /* Repartition Pba for greater than 9k mtu * To take effect CTRL.RST is required. @@ -1886,9 +1891,10 @@ void igb_reset(struct igb_adapter *adapter) break; } - if ((adapter->max_frame_size > ETH_FRAME_LEN + ETH_FCS_LEN) && - (mac->type < e1000_82576)) { - /* adjust PBA for jumbo frames */ + if (mac->type == e1000_82575) { + u32 min_rx_space, min_tx_space, needed_tx_space; + + /* write Rx PBA so that hardware can report correct Tx PBA */ wr32(E1000_PBA, pba); /* To maintain wire speed transmits, the Tx FIFO should be @@ -1898,31 +1904,26 @@ void igb_reset(struct igb_adapter *adapter) * one full receive packet and is similarly rounded up and * expressed in KB. */ - pba = rd32(E1000_PBA); - /* upper 16 bits has Tx packet buffer allocation size in KB */ - tx_space = pba >> 16; - /* lower 16 bits has Rx packet buffer allocation size in KB */ - pba &= 0xffff; - /* the Tx fifo also stores 16 bytes of information about the Tx - * but don't include ethernet FCS because hardware appends it + min_rx_space = DIV_ROUND_UP(MAX_JUMBO_FRAME_SIZE, 1024); + + /* The Tx FIFO also stores 16 bytes of information about the Tx + * but don't include Ethernet FCS because hardware appends it. + * We only need to round down to the nearest 512 byte block + * count since the value we care about is 2 frames, not 1. */ - min_tx_space = (adapter->max_frame_size + - sizeof(union e1000_adv_tx_desc) - - ETH_FCS_LEN) * 2; - min_tx_space = ALIGN(min_tx_space, 1024); - min_tx_space >>= 10; - /* software strips receive CRC, so leave room for it */ - min_rx_space = adapter->max_frame_size; - min_rx_space = ALIGN(min_rx_space, 1024); - min_rx_space >>= 10; + min_tx_space = adapter->max_frame_size; + min_tx_space += sizeof(union e1000_adv_tx_desc) - ETH_FCS_LEN; + min_tx_space = DIV_ROUND_UP(min_tx_space, 512); + + /* upper 16 bits has Tx packet buffer allocation size in KB */ + needed_tx_space = min_tx_space - (rd32(E1000_PBA) >> 16); /* If current Tx allocation is less than the min Tx FIFO size, * and the min Tx FIFO size is less than the current Rx FIFO - * allocation, take space away from current Rx allocation + * allocation, take space away from current Rx allocation. */ - if (tx_space < min_tx_space && - ((min_tx_space - tx_space) < pba)) { - pba = pba - (min_tx_space - tx_space); + if (needed_tx_space < pba) { + pba -= needed_tx_space; /* if short on Rx space, Rx wins and must trump Tx * adjustment @@ -1930,18 +1931,20 @@ void igb_reset(struct igb_adapter *adapter) if (pba < min_rx_space) pba = min_rx_space; } + + /* adjust PBA for jumbo frames */ wr32(E1000_PBA, pba); } - /* flow control settings */ - /* The high water mark must be low enough to fit one full frame - * (or the size used for early receive) above it in the Rx FIFO. - * Set it to the lower of: - * - 90% of the Rx FIFO size, or - * - the full Rx FIFO size minus one full frame + /* flow control settings + * The high water mark must be low enough to fit one full frame + * after transmitting the pause frame. As such we must have enough + * space to allow for us to complete our current transmit and then + * receive the frame that is in progress from the link partner. + * Set it to: + * - the full Rx FIFO size minus one full Tx plus one full Rx frame */ - hwm = min(((pba << 10) * 9 / 10), - ((pba << 10) - 2 * adapter->max_frame_size)); + hwm = (pba << 10) - (adapter->max_frame_size + MAX_JUMBO_FRAME_SIZE); fc->high_water = hwm & 0xFFFFFFF0; /* 16-byte granularity */ fc->low_water = fc->high_water - 16; @@ -2051,7 +2054,7 @@ static int igb_set_features(struct net_device *netdev, if (changed & NETIF_F_HW_VLAN_CTAG_RX) igb_vlan_mode(netdev, features); - if (!(changed & NETIF_F_RXALL)) + if (!(changed & (NETIF_F_RXALL | NETIF_F_NTUPLE))) return 0; netdev->features = features; @@ -2064,6 +2067,25 @@ static int igb_set_features(struct net_device *netdev, return 0; } +static int igb_ndo_fdb_add(struct ndmsg *ndm, struct nlattr *tb[], + struct net_device *dev, + const unsigned char *addr, u16 vid, + u16 flags) +{ + /* guarantee we can provide a unique filter for the unicast address */ + if (is_unicast_ether_addr(addr) || is_link_local_ether_addr(addr)) { + struct igb_adapter *adapter = netdev_priv(dev); + struct e1000_hw *hw = &adapter->hw; + int vfn = adapter->vfs_allocated_count; + int rar_entries = hw->mac.rar_entry_count - (vfn + 1); + + if (netdev_uc_count(dev) >= rar_entries) + return -ENOMEM; + } + + return ndo_dflt_fdb_add(ndm, tb, dev, addr, vid, flags); +} + static const struct net_device_ops igb_netdev_ops = { .ndo_open = igb_open, .ndo_stop = igb_close, @@ -2087,6 +2109,7 @@ static const struct net_device_ops igb_netdev_ops = { #endif .ndo_fix_features = igb_fix_features, .ndo_set_features = igb_set_features, + .ndo_fdb_add = igb_ndo_fdb_add, .ndo_features_check = passthru_features_check, }; @@ -2349,27 +2372,35 @@ static int igb_probe(struct pci_dev *pdev, const struct pci_device_id *ent) * assignment. */ netdev->features |= NETIF_F_SG | - NETIF_F_IP_CSUM | - NETIF_F_IPV6_CSUM | NETIF_F_TSO | NETIF_F_TSO6 | NETIF_F_RXHASH | NETIF_F_RXCSUM | + NETIF_F_HW_CSUM | NETIF_F_HW_VLAN_CTAG_RX | NETIF_F_HW_VLAN_CTAG_TX; + if (hw->mac.type >= e1000_82576) + netdev->features |= NETIF_F_SCTP_CRC; + /* copy netdev features into list of user selectable features */ netdev->hw_features |= netdev->features; netdev->hw_features |= NETIF_F_RXALL; + if (hw->mac.type >= e1000_i350) + netdev->hw_features |= NETIF_F_NTUPLE; + /* set this bit last since it cannot be part of hw_features */ netdev->features |= NETIF_F_HW_VLAN_CTAG_FILTER; - netdev->vlan_features |= NETIF_F_TSO | + netdev->vlan_features |= NETIF_F_SG | + NETIF_F_TSO | NETIF_F_TSO6 | - NETIF_F_IP_CSUM | - NETIF_F_IPV6_CSUM | - NETIF_F_SG; + NETIF_F_HW_CSUM | + NETIF_F_SCTP_CRC; + + netdev->mpls_features |= NETIF_F_HW_CSUM; + netdev->hw_enc_features |= NETIF_F_HW_CSUM; netdev->priv_flags |= IFF_SUPP_NOFCS; @@ -2378,11 +2409,6 @@ static int igb_probe(struct pci_dev *pdev, const struct pci_device_id *ent) netdev->vlan_features |= NETIF_F_HIGHDMA; } - if (hw->mac.type >= e1000_82576) { - netdev->hw_features |= NETIF_F_SCTP_CRC; - netdev->features |= NETIF_F_SCTP_CRC; - } - netdev->priv_flags |= IFF_UNICAST_FLT; adapter->en_mng_pt = igb_enable_mng_pass_thru(hw); @@ -2515,6 +2541,26 @@ static int igb_probe(struct pci_dev *pdev, const struct pci_device_id *ent) adapter->wol = 0; } + /* Some vendors want the ability to Use the EEPROM setting as + * enable/disable only, and not for capability + */ + if (((hw->mac.type == e1000_i350) || + (hw->mac.type == e1000_i354)) && + (pdev->subsystem_vendor == PCI_VENDOR_ID_DELL)) { + adapter->flags |= IGB_FLAG_WOL_SUPPORTED; + adapter->wol = 0; + } + if (hw->mac.type == e1000_i350) { + if (((pdev->subsystem_device == 0x5001) || + (pdev->subsystem_device == 0x5002)) && + (hw->bus.func == 0)) { + adapter->flags |= IGB_FLAG_WOL_SUPPORTED; + adapter->wol = 0; + } + if (pdev->subsystem_device == 0x1F52) + adapter->flags |= IGB_FLAG_WOL_SUPPORTED; + } + device_set_wakeup_enable(&adapter->pdev->dev, adapter->flags & IGB_FLAG_WOL_SUPPORTED); @@ -2921,14 +2967,6 @@ void igb_set_flag_queue_pairs(struct igb_adapter *adapter, /* Device supports enough interrupts without queue pairing. */ break; case e1000_82576: - /* If VFs are going to be allocated with RSS queues then we - * should pair the queues in order to conserve interrupts due - * to limited supply. - */ - if ((adapter->rss_queues > 1) && - (adapter->vfs_allocated_count > 6)) - adapter->flags |= IGB_FLAG_QUEUE_PAIRS; - /* fall through */ case e1000_82580: case e1000_i350: case e1000_i354: @@ -2939,6 +2977,8 @@ void igb_set_flag_queue_pairs(struct igb_adapter *adapter, */ if (adapter->rss_queues > (max_rss_queues / 2)) adapter->flags |= IGB_FLAG_QUEUE_PAIRS; + else + adapter->flags &= ~IGB_FLAG_QUEUE_PAIRS; break; } } @@ -3132,7 +3172,7 @@ err_setup_tx: return err; } -static int igb_open(struct net_device *netdev) +int igb_open(struct net_device *netdev) { return __igb_open(netdev, false); } @@ -3169,7 +3209,7 @@ static int __igb_close(struct net_device *netdev, bool suspending) return 0; } -static int igb_close(struct net_device *netdev) +int igb_close(struct net_device *netdev) { return __igb_close(netdev, false); } @@ -3460,12 +3500,12 @@ static void igb_setup_mrqc(struct igb_adapter *adapter) wr32(E1000_VT_CTL, vtctl); } if (adapter->rss_queues > 1) - mrqc |= E1000_MRQC_ENABLE_VMDQ_RSS_2Q; + mrqc |= E1000_MRQC_ENABLE_VMDQ_RSS_MQ; else mrqc |= E1000_MRQC_ENABLE_VMDQ; } else { if (hw->mac.type != e1000_i211) - mrqc |= E1000_MRQC_ENABLE_RSS_4Q; + mrqc |= E1000_MRQC_ENABLE_RSS_MQ; } igb_vmm_control(adapter); @@ -3498,7 +3538,7 @@ void igb_setup_rctl(struct igb_adapter *adapter) /* disable store bad packets and clear size bits. */ rctl &= ~(E1000_RCTL_SBP | E1000_RCTL_SZ_256); - /* enable LPE to prevent packets larger than max_frame_size */ + /* enable LPE to allow for reception of jumbo frames */ rctl |= E1000_RCTL_LPE; /* disable queue 0 to prevent tail write w/o re-config */ @@ -3522,8 +3562,7 @@ void igb_setup_rctl(struct igb_adapter *adapter) E1000_RCTL_BAM | /* RX All Bcast Pkts */ E1000_RCTL_PMCF); /* RX All MAC Ctrl Pkts */ - rctl &= ~(E1000_RCTL_VFE | /* Disable VLAN filter */ - E1000_RCTL_DPF | /* Allow filtered pause */ + rctl &= ~(E1000_RCTL_DPF | /* Allow filtered pause */ E1000_RCTL_CFIEN); /* Dis VLAN CFIEN Filter */ /* Do not mess with E1000_CTRL_VME, it affects transmit as well, * and that breaks VLANs. @@ -3539,12 +3578,8 @@ static inline int igb_set_vf_rlpml(struct igb_adapter *adapter, int size, struct e1000_hw *hw = &adapter->hw; u32 vmolr; - /* if it isn't the PF check to see if VFs are enabled and - * increase the size to support vlan tags - */ - if (vfn < adapter->vfs_allocated_count && - adapter->vf_data[vfn].vlans_enabled) - size += VLAN_TAG_SIZE; + if (size > MAX_JUMBO_FRAME_SIZE) + size = MAX_JUMBO_FRAME_SIZE; vmolr = rd32(E1000_VMOLR(vfn)); vmolr &= ~E1000_VMOLR_RLPML_MASK; @@ -3554,30 +3589,26 @@ static inline int igb_set_vf_rlpml(struct igb_adapter *adapter, int size, return 0; } -/** - * igb_rlpml_set - set maximum receive packet size - * @adapter: board private structure - * - * Configure maximum receivable packet size. - **/ -static void igb_rlpml_set(struct igb_adapter *adapter) +static inline void igb_set_vf_vlan_strip(struct igb_adapter *adapter, + int vfn, bool enable) { - u32 max_frame_size = adapter->max_frame_size; struct e1000_hw *hw = &adapter->hw; - u16 pf_id = adapter->vfs_allocated_count; + u32 val, reg; - if (pf_id) { - igb_set_vf_rlpml(adapter, max_frame_size, pf_id); - /* If we're in VMDQ or SR-IOV mode, then set global RLPML - * to our max jumbo frame size, in case we need to enable - * jumbo frames on one of the rings later. - * This will not pass over-length frames into the default - * queue because it's gated by the VMOLR.RLPML. - */ - max_frame_size = MAX_JUMBO_FRAME_SIZE; - } + if (hw->mac.type < e1000_82576) + return; - wr32(E1000_RLPML, max_frame_size); + if (hw->mac.type == e1000_i350) + reg = E1000_DVMOLR(vfn); + else + reg = E1000_VMOLR(vfn); + + val = rd32(reg); + if (enable) + val |= E1000_VMOLR_STRVLAN; + else + val &= ~(E1000_VMOLR_STRVLAN); + wr32(reg, val); } static inline void igb_set_vmolr(struct igb_adapter *adapter, @@ -3593,14 +3624,6 @@ static inline void igb_set_vmolr(struct igb_adapter *adapter, return; vmolr = rd32(E1000_VMOLR(vfn)); - vmolr |= E1000_VMOLR_STRVLAN; /* Strip vlan tags */ - if (hw->mac.type == e1000_i350) { - u32 dvmolr; - - dvmolr = rd32(E1000_DVMOLR(vfn)); - dvmolr |= E1000_DVMOLR_STRVLAN; - wr32(E1000_DVMOLR(vfn), dvmolr); - } if (aupe) vmolr |= E1000_VMOLR_AUPE; /* Accept untagged packets */ else @@ -3684,9 +3707,6 @@ static void igb_configure_rx(struct igb_adapter *adapter) { int i; - /* set UTA to appropriate mode */ - igb_set_uta(adapter); - /* set the correct pool for the PF default MAC address in entry 0 */ igb_rar_set_qsel(adapter, adapter->hw.mac.addr, 0, adapter->vfs_allocated_count); @@ -4004,6 +4024,130 @@ static int igb_write_uc_addr_list(struct net_device *netdev) return count; } +static int igb_vlan_promisc_enable(struct igb_adapter *adapter) +{ + struct e1000_hw *hw = &adapter->hw; + u32 i, pf_id; + + switch (hw->mac.type) { + case e1000_i210: + case e1000_i211: + case e1000_i350: + /* VLAN filtering needed for VLAN prio filter */ + if (adapter->netdev->features & NETIF_F_NTUPLE) + break; + /* fall through */ + case e1000_82576: + case e1000_82580: + case e1000_i354: + /* VLAN filtering needed for pool filtering */ + if (adapter->vfs_allocated_count) + break; + /* fall through */ + default: + return 1; + } + + /* We are already in VLAN promisc, nothing to do */ + if (adapter->flags & IGB_FLAG_VLAN_PROMISC) + return 0; + + if (!adapter->vfs_allocated_count) + goto set_vfta; + + /* Add PF to all active pools */ + pf_id = adapter->vfs_allocated_count + E1000_VLVF_POOLSEL_SHIFT; + + for (i = E1000_VLVF_ARRAY_SIZE; --i;) { + u32 vlvf = rd32(E1000_VLVF(i)); + + vlvf |= 1 << pf_id; + wr32(E1000_VLVF(i), vlvf); + } + +set_vfta: + /* Set all bits in the VLAN filter table array */ + for (i = E1000_VLAN_FILTER_TBL_SIZE; i--;) + hw->mac.ops.write_vfta(hw, i, ~0U); + + /* Set flag so we don't redo unnecessary work */ + adapter->flags |= IGB_FLAG_VLAN_PROMISC; + + return 0; +} + +#define VFTA_BLOCK_SIZE 8 +static void igb_scrub_vfta(struct igb_adapter *adapter, u32 vfta_offset) +{ + struct e1000_hw *hw = &adapter->hw; + u32 vfta[VFTA_BLOCK_SIZE] = { 0 }; + u32 vid_start = vfta_offset * 32; + u32 vid_end = vid_start + (VFTA_BLOCK_SIZE * 32); + u32 i, vid, word, bits, pf_id; + + /* guarantee that we don't scrub out management VLAN */ + vid = adapter->mng_vlan_id; + if (vid >= vid_start && vid < vid_end) + vfta[(vid - vid_start) / 32] |= 1 << (vid % 32); + + if (!adapter->vfs_allocated_count) + goto set_vfta; + + pf_id = adapter->vfs_allocated_count + E1000_VLVF_POOLSEL_SHIFT; + + for (i = E1000_VLVF_ARRAY_SIZE; --i;) { + u32 vlvf = rd32(E1000_VLVF(i)); + + /* pull VLAN ID from VLVF */ + vid = vlvf & VLAN_VID_MASK; + + /* only concern ourselves with a certain range */ + if (vid < vid_start || vid >= vid_end) + continue; + + if (vlvf & E1000_VLVF_VLANID_ENABLE) { + /* record VLAN ID in VFTA */ + vfta[(vid - vid_start) / 32] |= 1 << (vid % 32); + + /* if PF is part of this then continue */ + if (test_bit(vid, adapter->active_vlans)) + continue; + } + + /* remove PF from the pool */ + bits = ~(1 << pf_id); + bits &= rd32(E1000_VLVF(i)); + wr32(E1000_VLVF(i), bits); + } + +set_vfta: + /* extract values from active_vlans and write back to VFTA */ + for (i = VFTA_BLOCK_SIZE; i--;) { + vid = (vfta_offset + i) * 32; + word = vid / BITS_PER_LONG; + bits = vid % BITS_PER_LONG; + + vfta[i] |= adapter->active_vlans[word] >> bits; + + hw->mac.ops.write_vfta(hw, vfta_offset + i, vfta[i]); + } +} + +static void igb_vlan_promisc_disable(struct igb_adapter *adapter) +{ + u32 i; + + /* We are not in VLAN promisc, nothing to do */ + if (!(adapter->flags & IGB_FLAG_VLAN_PROMISC)) + return; + + /* Set flag so we don't redo unnecessary work */ + adapter->flags &= ~IGB_FLAG_VLAN_PROMISC; + + for (i = 0; i < E1000_VLAN_FILTER_TBL_SIZE; i += VFTA_BLOCK_SIZE) + igb_scrub_vfta(adapter, i); +} + /** * igb_set_rx_mode - Secondary Unicast, Multicast and Promiscuous mode set * @netdev: network interface device structure @@ -4018,21 +4162,17 @@ static void igb_set_rx_mode(struct net_device *netdev) struct igb_adapter *adapter = netdev_priv(netdev); struct e1000_hw *hw = &adapter->hw; unsigned int vfn = adapter->vfs_allocated_count; - u32 rctl, vmolr = 0; + u32 rctl = 0, vmolr = 0; int count; /* Check for Promiscuous and All Multicast modes */ - rctl = rd32(E1000_RCTL); - - /* clear the effected bits */ - rctl &= ~(E1000_RCTL_UPE | E1000_RCTL_MPE | E1000_RCTL_VFE); - if (netdev->flags & IFF_PROMISC) { - /* retain VLAN HW filtering if in VT mode */ - if (adapter->vfs_allocated_count) - rctl |= E1000_RCTL_VFE; - rctl |= (E1000_RCTL_UPE | E1000_RCTL_MPE); - vmolr |= (E1000_VMOLR_ROPE | E1000_VMOLR_MPME); + rctl |= E1000_RCTL_UPE | E1000_RCTL_MPE; + vmolr |= E1000_VMOLR_MPME; + + /* enable use of UTA filter to force packets to default pool */ + if (hw->mac.type == e1000_82576) + vmolr |= E1000_VMOLR_ROPE; } else { if (netdev->flags & IFF_ALLMULTI) { rctl |= E1000_RCTL_MPE; @@ -4050,17 +4190,34 @@ static void igb_set_rx_mode(struct net_device *netdev) vmolr |= E1000_VMOLR_ROMPE; } } - /* Write addresses to available RAR registers, if there is not - * sufficient space to store all the addresses then enable - * unicast promiscuous mode - */ - count = igb_write_uc_addr_list(netdev); - if (count < 0) { - rctl |= E1000_RCTL_UPE; - vmolr |= E1000_VMOLR_ROPE; - } - rctl |= E1000_RCTL_VFE; } + + /* Write addresses to available RAR registers, if there is not + * sufficient space to store all the addresses then enable + * unicast promiscuous mode + */ + count = igb_write_uc_addr_list(netdev); + if (count < 0) { + rctl |= E1000_RCTL_UPE; + vmolr |= E1000_VMOLR_ROPE; + } + + /* enable VLAN filtering by default */ + rctl |= E1000_RCTL_VFE; + + /* disable VLAN filtering for modes that require it */ + if ((netdev->flags & IFF_PROMISC) || + (netdev->features & NETIF_F_RXALL)) { + /* if we fail to set all rules then just clear VFE */ + if (igb_vlan_promisc_enable(adapter)) + rctl &= ~E1000_RCTL_VFE; + } else { + igb_vlan_promisc_disable(adapter); + } + + /* update state of unicast, multicast, and VLAN filtering modes */ + rctl |= rd32(E1000_RCTL) & ~(E1000_RCTL_UPE | E1000_RCTL_MPE | + E1000_RCTL_VFE); wr32(E1000_RCTL, rctl); /* In order to support SR-IOV and eventually VMDq it is necessary to set @@ -4071,9 +4228,19 @@ static void igb_set_rx_mode(struct net_device *netdev) if ((hw->mac.type < e1000_82576) || (hw->mac.type > e1000_i350)) return; + /* set UTA to appropriate mode */ + igb_set_uta(adapter, !!(vmolr & E1000_VMOLR_ROPE)); + vmolr |= rd32(E1000_VMOLR(vfn)) & ~(E1000_VMOLR_ROPE | E1000_VMOLR_MPME | E1000_VMOLR_ROMPE); + + /* enable Rx jumbo frames, no need for restriction */ + vmolr &= ~E1000_VMOLR_RLPML_MASK; + vmolr |= MAX_JUMBO_FRAME_SIZE | E1000_VMOLR_LPE; + wr32(E1000_VMOLR(vfn), vmolr); + wr32(E1000_RLPML, MAX_JUMBO_FRAME_SIZE); + igb_restore_vf_multicasts(adapter); } @@ -4227,6 +4394,7 @@ static void igb_watchdog_task(struct work_struct *work) u32 link; int i; u32 connsw; + u16 phy_data, retry_count = 20; link = igb_has_link(adapter); @@ -4305,6 +4473,25 @@ static void igb_watchdog_task(struct work_struct *work) break; } + if (adapter->link_speed != SPEED_1000) + goto no_wait; + + /* wait for Remote receiver status OK */ +retry_read_status: + if (!igb_read_phy_reg(hw, PHY_1000T_STATUS, + &phy_data)) { + if (!(phy_data & SR_1000T_REMOTE_RX_STATUS) && + retry_count) { + msleep(100); + retry_count--; + goto retry_read_status; + } else if (!retry_count) { + dev_err(&adapter->pdev->dev, "exceed max 2 second\n"); + } + } else { + dev_err(&adapter->pdev->dev, "read 1000Base-T Status Reg\n"); + } +no_wait: netif_carrier_on(netdev); igb_ping_all_vfs(adapter); @@ -4713,70 +4900,57 @@ static int igb_tso(struct igb_ring *tx_ring, return 1; } +static inline bool igb_ipv6_csum_is_sctp(struct sk_buff *skb) +{ + unsigned int offset = 0; + + ipv6_find_hdr(skb, &offset, IPPROTO_SCTP, NULL, NULL); + + return offset == skb_checksum_start_offset(skb); +} + static void igb_tx_csum(struct igb_ring *tx_ring, struct igb_tx_buffer *first) { struct sk_buff *skb = first->skb; u32 vlan_macip_lens = 0; - u32 mss_l4len_idx = 0; u32 type_tucmd = 0; if (skb->ip_summed != CHECKSUM_PARTIAL) { +csum_failed: if (!(first->tx_flags & IGB_TX_FLAGS_VLAN)) return; - } else { - u8 l4_hdr = 0; - - switch (first->protocol) { - case htons(ETH_P_IP): - vlan_macip_lens |= skb_network_header_len(skb); - type_tucmd |= E1000_ADVTXD_TUCMD_IPV4; - l4_hdr = ip_hdr(skb)->protocol; - break; - case htons(ETH_P_IPV6): - vlan_macip_lens |= skb_network_header_len(skb); - l4_hdr = ipv6_hdr(skb)->nexthdr; - break; - default: - if (unlikely(net_ratelimit())) { - dev_warn(tx_ring->dev, - "partial checksum but proto=%x!\n", - first->protocol); - } - break; - } + goto no_csum; + } - switch (l4_hdr) { - case IPPROTO_TCP: - type_tucmd |= E1000_ADVTXD_TUCMD_L4T_TCP; - mss_l4len_idx = tcp_hdrlen(skb) << - E1000_ADVTXD_L4LEN_SHIFT; - break; - case IPPROTO_SCTP: - type_tucmd |= E1000_ADVTXD_TUCMD_L4T_SCTP; - mss_l4len_idx = sizeof(struct sctphdr) << - E1000_ADVTXD_L4LEN_SHIFT; - break; - case IPPROTO_UDP: - mss_l4len_idx = sizeof(struct udphdr) << - E1000_ADVTXD_L4LEN_SHIFT; - break; - default: - if (unlikely(net_ratelimit())) { - dev_warn(tx_ring->dev, - "partial checksum but l4 proto=%x!\n", - l4_hdr); - } + switch (skb->csum_offset) { + case offsetof(struct tcphdr, check): + type_tucmd = E1000_ADVTXD_TUCMD_L4T_TCP; + /* fall through */ + case offsetof(struct udphdr, check): + break; + case offsetof(struct sctphdr, checksum): + /* validate that this is actually an SCTP request */ + if (((first->protocol == htons(ETH_P_IP)) && + (ip_hdr(skb)->protocol == IPPROTO_SCTP)) || + ((first->protocol == htons(ETH_P_IPV6)) && + igb_ipv6_csum_is_sctp(skb))) { + type_tucmd = E1000_ADVTXD_TUCMD_L4T_SCTP; break; } - - /* update TX checksum flag */ - first->tx_flags |= IGB_TX_FLAGS_CSUM; + default: + skb_checksum_help(skb); + goto csum_failed; } + /* update TX checksum flag */ + first->tx_flags |= IGB_TX_FLAGS_CSUM; + vlan_macip_lens = skb_checksum_start_offset(skb) - + skb_network_offset(skb); +no_csum: vlan_macip_lens |= skb_network_offset(skb) << E1000_ADVTXD_MACLEN_SHIFT; vlan_macip_lens |= first->tx_flags & IGB_TX_FLAGS_VLAN_MASK; - igb_tx_ctxtdesc(tx_ring, vlan_macip_lens, type_tucmd, mss_l4len_idx); + igb_tx_ctxtdesc(tx_ring, vlan_macip_lens, type_tucmd, 0); } #define IGB_SET_FLAG(_input, _flag, _result) \ @@ -5088,16 +5262,6 @@ static netdev_tx_t igb_xmit_frame(struct sk_buff *skb, { struct igb_adapter *adapter = netdev_priv(netdev); - if (test_bit(__IGB_DOWN, &adapter->state)) { - dev_kfree_skb_any(skb); - return NETDEV_TX_OK; - } - - if (skb->len <= 0) { - dev_kfree_skb_any(skb); - return NETDEV_TX_OK; - } - /* The minimum packet size with TCTL.PSP set is 17 so pad the skb * in order to meet this minimum size requirement. */ @@ -5792,125 +5956,132 @@ static void igb_restore_vf_multicasts(struct igb_adapter *adapter) static void igb_clear_vf_vfta(struct igb_adapter *adapter, u32 vf) { struct e1000_hw *hw = &adapter->hw; - u32 pool_mask, reg, vid; - int i; + u32 pool_mask, vlvf_mask, i; - pool_mask = 1 << (E1000_VLVF_POOLSEL_SHIFT + vf); + /* create mask for VF and other pools */ + pool_mask = E1000_VLVF_POOLSEL_MASK; + vlvf_mask = 1 << (E1000_VLVF_POOLSEL_SHIFT + vf); + + /* drop PF from pool bits */ + pool_mask &= ~(1 << (E1000_VLVF_POOLSEL_SHIFT + + adapter->vfs_allocated_count)); /* Find the vlan filter for this id */ - for (i = 0; i < E1000_VLVF_ARRAY_SIZE; i++) { - reg = rd32(E1000_VLVF(i)); + for (i = E1000_VLVF_ARRAY_SIZE; i--;) { + u32 vlvf = rd32(E1000_VLVF(i)); + u32 vfta_mask, vid, vfta; /* remove the vf from the pool */ - reg &= ~pool_mask; - - /* if pool is empty then remove entry from vfta */ - if (!(reg & E1000_VLVF_POOLSEL_MASK) && - (reg & E1000_VLVF_VLANID_ENABLE)) { - reg = 0; - vid = reg & E1000_VLVF_VLANID_MASK; - igb_vfta_set(hw, vid, false); - } + if (!(vlvf & vlvf_mask)) + continue; + + /* clear out bit from VLVF */ + vlvf ^= vlvf_mask; + + /* if other pools are present, just remove ourselves */ + if (vlvf & pool_mask) + goto update_vlvfb; - wr32(E1000_VLVF(i), reg); + /* if PF is present, leave VFTA */ + if (vlvf & E1000_VLVF_POOLSEL_MASK) + goto update_vlvf; + + vid = vlvf & E1000_VLVF_VLANID_MASK; + vfta_mask = 1 << (vid % 32); + + /* clear bit from VFTA */ + vfta = adapter->shadow_vfta[vid / 32]; + if (vfta & vfta_mask) + hw->mac.ops.write_vfta(hw, vid / 32, vfta ^ vfta_mask); +update_vlvf: + /* clear pool selection enable */ + if (adapter->flags & IGB_FLAG_VLAN_PROMISC) + vlvf &= E1000_VLVF_POOLSEL_MASK; + else + vlvf = 0; +update_vlvfb: + /* clear pool bits */ + wr32(E1000_VLVF(i), vlvf); } +} - adapter->vf_data[vf].vlans_enabled = 0; +static int igb_find_vlvf_entry(struct e1000_hw *hw, u32 vlan) +{ + u32 vlvf; + int idx; + + /* short cut the special case */ + if (vlan == 0) + return 0; + + /* Search for the VLAN id in the VLVF entries */ + for (idx = E1000_VLVF_ARRAY_SIZE; --idx;) { + vlvf = rd32(E1000_VLVF(idx)); + if ((vlvf & VLAN_VID_MASK) == vlan) + break; + } + + return idx; } -static s32 igb_vlvf_set(struct igb_adapter *adapter, u32 vid, bool add, u32 vf) +void igb_update_pf_vlvf(struct igb_adapter *adapter, u32 vid) { struct e1000_hw *hw = &adapter->hw; - u32 reg, i; - - /* The vlvf table only exists on 82576 hardware and newer */ - if (hw->mac.type < e1000_82576) - return -1; + u32 bits, pf_id; + int idx; - /* we only need to do this if VMDq is enabled */ - if (!adapter->vfs_allocated_count) - return -1; + idx = igb_find_vlvf_entry(hw, vid); + if (!idx) + return; - /* Find the vlan filter for this id */ - for (i = 0; i < E1000_VLVF_ARRAY_SIZE; i++) { - reg = rd32(E1000_VLVF(i)); - if ((reg & E1000_VLVF_VLANID_ENABLE) && - vid == (reg & E1000_VLVF_VLANID_MASK)) - break; + /* See if any other pools are set for this VLAN filter + * entry other than the PF. + */ + pf_id = adapter->vfs_allocated_count + E1000_VLVF_POOLSEL_SHIFT; + bits = ~(1 << pf_id) & E1000_VLVF_POOLSEL_MASK; + bits &= rd32(E1000_VLVF(idx)); + + /* Disable the filter so this falls into the default pool. */ + if (!bits) { + if (adapter->flags & IGB_FLAG_VLAN_PROMISC) + wr32(E1000_VLVF(idx), 1 << pf_id); + else + wr32(E1000_VLVF(idx), 0); } +} - if (add) { - if (i == E1000_VLVF_ARRAY_SIZE) { - /* Did not find a matching VLAN ID entry that was - * enabled. Search for a free filter entry, i.e. - * one without the enable bit set - */ - for (i = 0; i < E1000_VLVF_ARRAY_SIZE; i++) { - reg = rd32(E1000_VLVF(i)); - if (!(reg & E1000_VLVF_VLANID_ENABLE)) - break; - } - } - if (i < E1000_VLVF_ARRAY_SIZE) { - /* Found an enabled/available entry */ - reg |= 1 << (E1000_VLVF_POOLSEL_SHIFT + vf); - - /* if !enabled we need to set this up in vfta */ - if (!(reg & E1000_VLVF_VLANID_ENABLE)) { - /* add VID to filter table */ - igb_vfta_set(hw, vid, true); - reg |= E1000_VLVF_VLANID_ENABLE; - } - reg &= ~E1000_VLVF_VLANID_MASK; - reg |= vid; - wr32(E1000_VLVF(i), reg); - - /* do not modify RLPML for PF devices */ - if (vf >= adapter->vfs_allocated_count) - return 0; - - if (!adapter->vf_data[vf].vlans_enabled) { - u32 size; - - reg = rd32(E1000_VMOLR(vf)); - size = reg & E1000_VMOLR_RLPML_MASK; - size += 4; - reg &= ~E1000_VMOLR_RLPML_MASK; - reg |= size; - wr32(E1000_VMOLR(vf), reg); - } +static s32 igb_set_vf_vlan(struct igb_adapter *adapter, u32 vid, + bool add, u32 vf) +{ + int pf_id = adapter->vfs_allocated_count; + struct e1000_hw *hw = &adapter->hw; + int err; - adapter->vf_data[vf].vlans_enabled++; - } - } else { - if (i < E1000_VLVF_ARRAY_SIZE) { - /* remove vf from the pool */ - reg &= ~(1 << (E1000_VLVF_POOLSEL_SHIFT + vf)); - /* if pool is empty then remove entry from vfta */ - if (!(reg & E1000_VLVF_POOLSEL_MASK)) { - reg = 0; - igb_vfta_set(hw, vid, false); - } - wr32(E1000_VLVF(i), reg); - - /* do not modify RLPML for PF devices */ - if (vf >= adapter->vfs_allocated_count) - return 0; - - adapter->vf_data[vf].vlans_enabled--; - if (!adapter->vf_data[vf].vlans_enabled) { - u32 size; - - reg = rd32(E1000_VMOLR(vf)); - size = reg & E1000_VMOLR_RLPML_MASK; - size -= 4; - reg &= ~E1000_VMOLR_RLPML_MASK; - reg |= size; - wr32(E1000_VMOLR(vf), reg); - } - } + /* If VLAN overlaps with one the PF is currently monitoring make + * sure that we are able to allocate a VLVF entry. This may be + * redundant but it guarantees PF will maintain visibility to + * the VLAN. + */ + if (add && test_bit(vid, adapter->active_vlans)) { + err = igb_vfta_set(hw, vid, pf_id, true, false); + if (err) + return err; } - return 0; + + err = igb_vfta_set(hw, vid, vf, add, false); + + if (add && !err) + return err; + + /* If we failed to add the VF VLAN or we are removing the VF VLAN + * we may need to drop the PF pool bit in order to allow us to free + * up the VLVF resources. + */ + if (test_bit(vid, adapter->active_vlans) || + (adapter->flags & IGB_FLAG_VLAN_PROMISC)) + igb_update_pf_vlvf(adapter, vid); + + return err; } static void igb_set_vmvir(struct igb_adapter *adapter, u32 vid, u32 vf) @@ -5923,130 +6094,104 @@ static void igb_set_vmvir(struct igb_adapter *adapter, u32 vid, u32 vf) wr32(E1000_VMVIR(vf), 0); } -static int igb_ndo_set_vf_vlan(struct net_device *netdev, - int vf, u16 vlan, u8 qos) +static int igb_enable_port_vlan(struct igb_adapter *adapter, int vf, + u16 vlan, u8 qos) { - int err = 0; - struct igb_adapter *adapter = netdev_priv(netdev); + int err; - if ((vf >= adapter->vfs_allocated_count) || (vlan > 4095) || (qos > 7)) - return -EINVAL; - if (vlan || qos) { - err = igb_vlvf_set(adapter, vlan, !!vlan, vf); - if (err) - goto out; - igb_set_vmvir(adapter, vlan | (qos << VLAN_PRIO_SHIFT), vf); - igb_set_vmolr(adapter, vf, !vlan); - adapter->vf_data[vf].pf_vlan = vlan; - adapter->vf_data[vf].pf_qos = qos; - dev_info(&adapter->pdev->dev, - "Setting VLAN %d, QOS 0x%x on VF %d\n", vlan, qos, vf); - if (test_bit(__IGB_DOWN, &adapter->state)) { - dev_warn(&adapter->pdev->dev, - "The VF VLAN has been set, but the PF device is not up.\n"); - dev_warn(&adapter->pdev->dev, - "Bring the PF device up before attempting to use the VF device.\n"); - } - } else { - igb_vlvf_set(adapter, adapter->vf_data[vf].pf_vlan, - false, vf); - igb_set_vmvir(adapter, vlan, vf); - igb_set_vmolr(adapter, vf, true); - adapter->vf_data[vf].pf_vlan = 0; - adapter->vf_data[vf].pf_qos = 0; + err = igb_set_vf_vlan(adapter, vlan, true, vf); + if (err) + return err; + + igb_set_vmvir(adapter, vlan | (qos << VLAN_PRIO_SHIFT), vf); + igb_set_vmolr(adapter, vf, !vlan); + + /* revoke access to previous VLAN */ + if (vlan != adapter->vf_data[vf].pf_vlan) + igb_set_vf_vlan(adapter, adapter->vf_data[vf].pf_vlan, + false, vf); + + adapter->vf_data[vf].pf_vlan = vlan; + adapter->vf_data[vf].pf_qos = qos; + igb_set_vf_vlan_strip(adapter, vf, true); + dev_info(&adapter->pdev->dev, + "Setting VLAN %d, QOS 0x%x on VF %d\n", vlan, qos, vf); + if (test_bit(__IGB_DOWN, &adapter->state)) { + dev_warn(&adapter->pdev->dev, + "The VF VLAN has been set, but the PF device is not up.\n"); + dev_warn(&adapter->pdev->dev, + "Bring the PF device up before attempting to use the VF device.\n"); } -out: + return err; } -static int igb_find_vlvf_entry(struct igb_adapter *adapter, int vid) +static int igb_disable_port_vlan(struct igb_adapter *adapter, int vf) { - struct e1000_hw *hw = &adapter->hw; - int i; - u32 reg; + /* Restore tagless access via VLAN 0 */ + igb_set_vf_vlan(adapter, 0, true, vf); - /* Find the vlan filter for this id */ - for (i = 0; i < E1000_VLVF_ARRAY_SIZE; i++) { - reg = rd32(E1000_VLVF(i)); - if ((reg & E1000_VLVF_VLANID_ENABLE) && - vid == (reg & E1000_VLVF_VLANID_MASK)) - break; - } + igb_set_vmvir(adapter, 0, vf); + igb_set_vmolr(adapter, vf, true); + + /* Remove any PF assigned VLAN */ + if (adapter->vf_data[vf].pf_vlan) + igb_set_vf_vlan(adapter, adapter->vf_data[vf].pf_vlan, + false, vf); - if (i >= E1000_VLVF_ARRAY_SIZE) - i = -1; + adapter->vf_data[vf].pf_vlan = 0; + adapter->vf_data[vf].pf_qos = 0; + igb_set_vf_vlan_strip(adapter, vf, false); - return i; + return 0; } -static int igb_set_vf_vlan(struct igb_adapter *adapter, u32 *msgbuf, u32 vf) +static int igb_ndo_set_vf_vlan(struct net_device *netdev, + int vf, u16 vlan, u8 qos) { - struct e1000_hw *hw = &adapter->hw; - int add = (msgbuf[0] & E1000_VT_MSGINFO_MASK) >> E1000_VT_MSGINFO_SHIFT; - int vid = (msgbuf[1] & E1000_VLVF_VLANID_MASK); - int err = 0; + struct igb_adapter *adapter = netdev_priv(netdev); - /* If in promiscuous mode we need to make sure the PF also has - * the VLAN filter set. - */ - if (add && (adapter->netdev->flags & IFF_PROMISC)) - err = igb_vlvf_set(adapter, vid, add, - adapter->vfs_allocated_count); - if (err) - goto out; + if ((vf >= adapter->vfs_allocated_count) || (vlan > 4095) || (qos > 7)) + return -EINVAL; - err = igb_vlvf_set(adapter, vid, add, vf); + return (vlan || qos) ? igb_enable_port_vlan(adapter, vf, vlan, qos) : + igb_disable_port_vlan(adapter, vf); +} - if (err) - goto out; +static int igb_set_vf_vlan_msg(struct igb_adapter *adapter, u32 *msgbuf, u32 vf) +{ + int add = (msgbuf[0] & E1000_VT_MSGINFO_MASK) >> E1000_VT_MSGINFO_SHIFT; + int vid = (msgbuf[1] & E1000_VLVF_VLANID_MASK); + int ret; - /* Go through all the checks to see if the VLAN filter should - * be wiped completely. - */ - if (!add && (adapter->netdev->flags & IFF_PROMISC)) { - u32 vlvf, bits; - int regndx = igb_find_vlvf_entry(adapter, vid); - - if (regndx < 0) - goto out; - /* See if any other pools are set for this VLAN filter - * entry other than the PF. - */ - vlvf = bits = rd32(E1000_VLVF(regndx)); - bits &= 1 << (E1000_VLVF_POOLSEL_SHIFT + - adapter->vfs_allocated_count); - /* If the filter was removed then ensure PF pool bit - * is cleared if the PF only added itself to the pool - * because the PF is in promiscuous mode. - */ - if ((vlvf & VLAN_VID_MASK) == vid && - !test_bit(vid, adapter->active_vlans) && - !bits) - igb_vlvf_set(adapter, vid, add, - adapter->vfs_allocated_count); - } + if (adapter->vf_data[vf].pf_vlan) + return -1; -out: - return err; + /* VLAN 0 is a special case, don't allow it to be removed */ + if (!vid && !add) + return 0; + + ret = igb_set_vf_vlan(adapter, vid, !!add, vf); + if (!ret) + igb_set_vf_vlan_strip(adapter, vf, !!vid); + return ret; } static inline void igb_vf_reset(struct igb_adapter *adapter, u32 vf) { - /* clear flags - except flag that indicates PF has set the MAC */ - adapter->vf_data[vf].flags &= IGB_VF_FLAG_PF_SET_MAC; - adapter->vf_data[vf].last_nack = jiffies; + struct vf_data_storage *vf_data = &adapter->vf_data[vf]; - /* reset offloads to defaults */ - igb_set_vmolr(adapter, vf, true); + /* clear flags - except flag that indicates PF has set the MAC */ + vf_data->flags &= IGB_VF_FLAG_PF_SET_MAC; + vf_data->last_nack = jiffies; /* reset vlans for device */ igb_clear_vf_vfta(adapter, vf); - if (adapter->vf_data[vf].pf_vlan) - igb_ndo_set_vf_vlan(adapter->netdev, vf, - adapter->vf_data[vf].pf_vlan, - adapter->vf_data[vf].pf_qos); - else - igb_clear_vf_vfta(adapter, vf); + igb_set_vf_vlan(adapter, vf_data->pf_vlan, true, vf); + igb_set_vmvir(adapter, vf_data->pf_vlan | + (vf_data->pf_qos << VLAN_PRIO_SHIFT), vf); + igb_set_vmolr(adapter, vf, !vf_data->pf_vlan); + igb_set_vf_vlan_strip(adapter, vf, !!(vf_data->pf_vlan)); /* reset multicast table array for vf */ adapter->vf_data[vf].num_vf_mc_hashes = 0; @@ -6191,7 +6336,7 @@ static void igb_rcv_msg_from_vf(struct igb_adapter *adapter, u32 vf) "VF %d attempted to override administratively set VLAN tag\nReload the VF driver to resume operations\n", vf); else - retval = igb_set_vf_vlan(adapter, msgbuf, vf); + retval = igb_set_vf_vlan_msg(adapter, msgbuf, vf); break; default: dev_err(&pdev->dev, "Unhandled Msg %08x\n", msgbuf[0]); @@ -6233,6 +6378,7 @@ static void igb_msg_task(struct igb_adapter *adapter) /** * igb_set_uta - Set unicast filter table address * @adapter: board private structure + * @set: boolean indicating if we are setting or clearing bits * * The unicast table address is a register array of 32-bit registers. * The table is meant to be used in a way similar to how the MTA is used @@ -6240,21 +6386,18 @@ static void igb_msg_task(struct igb_adapter *adapter) * set all the hash bits to 1 and use the VMOLR ROPE bit as a promiscuous * enable bit to allow vlan tag stripping when promiscuous mode is enabled **/ -static void igb_set_uta(struct igb_adapter *adapter) +static void igb_set_uta(struct igb_adapter *adapter, bool set) { struct e1000_hw *hw = &adapter->hw; + u32 uta = set ? ~0 : 0; int i; - /* The UTA table only exists on 82576 hardware and newer */ - if (hw->mac.type < e1000_82576) - return; - /* we only need to do this if VMDq is enabled */ if (!adapter->vfs_allocated_count) return; - for (i = 0; i < hw->mac.uta_reg_count; i++) - array_wr32(E1000_UTA, i, ~0); + for (i = hw->mac.uta_reg_count; i--;) + array_wr32(E1000_UTA, i, uta); } /** @@ -6630,7 +6773,7 @@ static bool igb_can_reuse_rx_page(struct igb_rx_buffer *rx_buffer, /* Even if we own the page, we are not allowed to use atomic_set() * This would break get_page_unless_zero() users. */ - atomic_inc(&page->_count); + page_ref_inc(page); return true; } @@ -7202,7 +7345,7 @@ static void igb_vlan_mode(struct net_device *netdev, netdev_features_t features) wr32(E1000_CTRL, ctrl); } - igb_rlpml_set(adapter); + igb_set_vf_vlan_strip(adapter, adapter->vfs_allocated_count, enable); } static int igb_vlan_rx_add_vid(struct net_device *netdev, @@ -7212,11 +7355,9 @@ static int igb_vlan_rx_add_vid(struct net_device *netdev, struct e1000_hw *hw = &adapter->hw; int pf_id = adapter->vfs_allocated_count; - /* attempt to add filter to vlvf array */ - igb_vlvf_set(adapter, vid, true, pf_id); - /* add the filter since PF can receive vlans w/o entry in vlvf */ - igb_vfta_set(hw, vid, true); + if (!vid || !(adapter->flags & IGB_FLAG_VLAN_PROMISC)) + igb_vfta_set(hw, vid, pf_id, true, !!vid); set_bit(vid, adapter->active_vlans); @@ -7227,16 +7368,12 @@ static int igb_vlan_rx_kill_vid(struct net_device *netdev, __be16 proto, u16 vid) { struct igb_adapter *adapter = netdev_priv(netdev); - struct e1000_hw *hw = &adapter->hw; int pf_id = adapter->vfs_allocated_count; - s32 err; - - /* remove vlan from VLVF table array */ - err = igb_vlvf_set(adapter, vid, false, pf_id); + struct e1000_hw *hw = &adapter->hw; - /* if vid was not present in VLVF just remove it from table */ - if (err) - igb_vfta_set(hw, vid, false); + /* remove VID from filter table */ + if (vid && !(adapter->flags & IGB_FLAG_VLAN_PROMISC)) + igb_vfta_set(hw, vid, pf_id, false, true); clear_bit(vid, adapter->active_vlans); @@ -7245,11 +7382,12 @@ static int igb_vlan_rx_kill_vid(struct net_device *netdev, static void igb_restore_vlan(struct igb_adapter *adapter) { - u16 vid; + u16 vid = 1; igb_vlan_mode(adapter->netdev, adapter->netdev->features); + igb_vlan_rx_add_vid(adapter->netdev, htons(ETH_P_8021Q), 0); - for_each_set_bit(vid, adapter->active_vlans, VLAN_N_VID) + for_each_set_bit_from(vid, adapter->active_vlans, VLAN_N_VID) igb_vlan_rx_add_vid(adapter->netdev, htons(ETH_P_8021Q), vid); } @@ -7704,15 +7842,14 @@ static void igb_io_resume(struct pci_dev *pdev) static void igb_rar_set_qsel(struct igb_adapter *adapter, u8 *addr, u32 index, u8 qsel) { - u32 rar_low, rar_high; struct e1000_hw *hw = &adapter->hw; + u32 rar_low, rar_high; /* HW expects these in little endian so we reverse the byte order - * from network order (big endian) to little endian + * from network order (big endian) to CPU endian */ - rar_low = ((u32) addr[0] | ((u32) addr[1] << 8) | - ((u32) addr[2] << 16) | ((u32) addr[3] << 24)); - rar_high = ((u32) addr[4] | ((u32) addr[5] << 8)); + rar_low = le32_to_cpup((__be32 *)(addr)); + rar_high = le16_to_cpup((__be16 *)(addr + 4)); /* Indicate to hardware the Address is Valid. */ rar_high |= E1000_RAH_AV; @@ -7959,9 +8096,7 @@ static void igb_init_dmac(struct igb_adapter *adapter, u32 pba) * than the Rx threshold. Set hwm to PBA - max frame * size in 16B units, capping it at PBA - 6KB. */ - hwm = 64 * pba - adapter->max_frame_size / 16; - if (hwm < 64 * (pba - 6)) - hwm = 64 * (pba - 6); + hwm = 64 * (pba - 6); reg = rd32(E1000_FCRTC); reg &= ~E1000_FCRTC_RTH_COAL_MASK; reg |= ((hwm << E1000_FCRTC_RTH_COAL_SHIFT) @@ -7971,9 +8106,7 @@ static void igb_init_dmac(struct igb_adapter *adapter, u32 pba) /* Set the DMA Coalescing Rx threshold to PBA - 2 * max * frame size, capping it at PBA - 10KB. */ - dmac_thr = pba - adapter->max_frame_size / 512; - if (dmac_thr < pba - 10) - dmac_thr = pba - 10; + dmac_thr = pba - 10; reg = rd32(E1000_DMACR); reg &= ~E1000_DMACR_DMACTHR_MASK; reg |= ((dmac_thr << E1000_DMACR_DMACTHR_SHIFT) diff --git a/drivers/net/ethernet/intel/igb/igb_ptp.c b/drivers/net/ethernet/intel/igb/igb_ptp.c index c44df87c3..22a8a2989 100644 --- a/drivers/net/ethernet/intel/igb/igb_ptp.c +++ b/drivers/net/ethernet/intel/igb/igb_ptp.c @@ -525,7 +525,8 @@ static int igb_ptp_feature_enable_i210(struct ptp_clock_info *ptp, ts.tv_nsec = rq->perout.period.nsec; ns = timespec64_to_ns(&ts); ns = ns >> 1; - if (on && ns <= 70000000LL) { + if (on && ((ns <= 70000000LL) || (ns == 125000000LL) || + (ns == 250000000LL) || (ns == 500000000LL))) { if (ns < 8LL) return -EINVAL; use_freq = 1; diff --git a/drivers/net/ethernet/intel/igbvf/mbx.c b/drivers/net/ethernet/intel/igbvf/mbx.c index 7b6cb4c37..01752f44a 100644 --- a/drivers/net/ethernet/intel/igbvf/mbx.c +++ b/drivers/net/ethernet/intel/igbvf/mbx.c @@ -234,13 +234,19 @@ static s32 e1000_check_for_rst_vf(struct e1000_hw *hw) static s32 e1000_obtain_mbx_lock_vf(struct e1000_hw *hw) { s32 ret_val = -E1000_ERR_MBX; - - /* Take ownership of the buffer */ - ew32(V2PMAILBOX(0), E1000_V2PMAILBOX_VFU); - - /* reserve mailbox for VF use */ - if (e1000_read_v2p_mailbox(hw) & E1000_V2PMAILBOX_VFU) - ret_val = E1000_SUCCESS; + int count = 10; + + do { + /* Take ownership of the buffer */ + ew32(V2PMAILBOX(0), E1000_V2PMAILBOX_VFU); + + /* reserve mailbox for VF use */ + if (e1000_read_v2p_mailbox(hw) & E1000_V2PMAILBOX_VFU) { + ret_val = 0; + break; + } + udelay(1000); + } while (count-- > 0); return ret_val; } diff --git a/drivers/net/ethernet/intel/igbvf/netdev.c b/drivers/net/ethernet/intel/igbvf/netdev.c index 297af801f..c12442252 100644 --- a/drivers/net/ethernet/intel/igbvf/netdev.c +++ b/drivers/net/ethernet/intel/igbvf/netdev.c @@ -43,6 +43,7 @@ #include <linux/ethtool.h> #include <linux/if_vlan.h> #include <linux/prefetch.h> +#include <linux/sctp.h> #include "igbvf.h" @@ -876,7 +877,6 @@ static irqreturn_t igbvf_msix_other(int irq, void *data) adapter->int_counter1++; - netif_carrier_off(netdev); hw->mac.get_link_status = 1; if (!test_bit(__IGBVF_DOWN, &adapter->state)) mod_timer(&adapter->watchdog_timer, jiffies + 1); @@ -1908,6 +1908,31 @@ static void igbvf_watchdog_task(struct work_struct *work) #define IGBVF_TX_FLAGS_VLAN_MASK 0xffff0000 #define IGBVF_TX_FLAGS_VLAN_SHIFT 16 +static void igbvf_tx_ctxtdesc(struct igbvf_ring *tx_ring, u32 vlan_macip_lens, + u32 type_tucmd, u32 mss_l4len_idx) +{ + struct e1000_adv_tx_context_desc *context_desc; + struct igbvf_buffer *buffer_info; + u16 i = tx_ring->next_to_use; + + context_desc = IGBVF_TX_CTXTDESC_ADV(*tx_ring, i); + buffer_info = &tx_ring->buffer_info[i]; + + i++; + tx_ring->next_to_use = (i < tx_ring->count) ? i : 0; + + /* set bits to identify this as an advanced context descriptor */ + type_tucmd |= E1000_TXD_CMD_DEXT | E1000_ADVTXD_DTYP_CTXT; + + context_desc->vlan_macip_lens = cpu_to_le32(vlan_macip_lens); + context_desc->seqnum_seed = 0; + context_desc->type_tucmd_mlhl = cpu_to_le32(type_tucmd); + context_desc->mss_l4len_idx = cpu_to_le32(mss_l4len_idx); + + buffer_info->time_stamp = jiffies; + buffer_info->dma = 0; +} + static int igbvf_tso(struct igbvf_adapter *adapter, struct igbvf_ring *tx_ring, struct sk_buff *skb, u32 tx_flags, u8 *hdr_len, @@ -1987,65 +2012,56 @@ static int igbvf_tso(struct igbvf_adapter *adapter, return true; } -static inline bool igbvf_tx_csum(struct igbvf_adapter *adapter, - struct igbvf_ring *tx_ring, - struct sk_buff *skb, u32 tx_flags, - __be16 protocol) +static inline bool igbvf_ipv6_csum_is_sctp(struct sk_buff *skb) { - struct e1000_adv_tx_context_desc *context_desc; - unsigned int i; - struct igbvf_buffer *buffer_info; - u32 info = 0, tu_cmd = 0; - - if ((skb->ip_summed == CHECKSUM_PARTIAL) || - (tx_flags & IGBVF_TX_FLAGS_VLAN)) { - i = tx_ring->next_to_use; - buffer_info = &tx_ring->buffer_info[i]; - context_desc = IGBVF_TX_CTXTDESC_ADV(*tx_ring, i); + unsigned int offset = 0; - if (tx_flags & IGBVF_TX_FLAGS_VLAN) - info |= (tx_flags & IGBVF_TX_FLAGS_VLAN_MASK); + ipv6_find_hdr(skb, &offset, IPPROTO_SCTP, NULL, NULL); - info |= (skb_network_offset(skb) << E1000_ADVTXD_MACLEN_SHIFT); - if (skb->ip_summed == CHECKSUM_PARTIAL) - info |= (skb_transport_header(skb) - - skb_network_header(skb)); + return offset == skb_checksum_start_offset(skb); +} - context_desc->vlan_macip_lens = cpu_to_le32(info); +static bool igbvf_tx_csum(struct igbvf_ring *tx_ring, struct sk_buff *skb, + u32 tx_flags, __be16 protocol) +{ + u32 vlan_macip_lens = 0; + u32 type_tucmd = 0; - tu_cmd |= (E1000_TXD_CMD_DEXT | E1000_ADVTXD_DTYP_CTXT); + if (skb->ip_summed != CHECKSUM_PARTIAL) { +csum_failed: + if (!(tx_flags & IGBVF_TX_FLAGS_VLAN)) + return false; + goto no_csum; + } - if (skb->ip_summed == CHECKSUM_PARTIAL) { - switch (protocol) { - case htons(ETH_P_IP): - tu_cmd |= E1000_ADVTXD_TUCMD_IPV4; - if (ip_hdr(skb)->protocol == IPPROTO_TCP) - tu_cmd |= E1000_ADVTXD_TUCMD_L4T_TCP; - break; - case htons(ETH_P_IPV6): - if (ipv6_hdr(skb)->nexthdr == IPPROTO_TCP) - tu_cmd |= E1000_ADVTXD_TUCMD_L4T_TCP; - break; - default: - break; - } + switch (skb->csum_offset) { + case offsetof(struct tcphdr, check): + type_tucmd = E1000_ADVTXD_TUCMD_L4T_TCP; + /* fall through */ + case offsetof(struct udphdr, check): + break; + case offsetof(struct sctphdr, checksum): + /* validate that this is actually an SCTP request */ + if (((protocol == htons(ETH_P_IP)) && + (ip_hdr(skb)->protocol == IPPROTO_SCTP)) || + ((protocol == htons(ETH_P_IPV6)) && + igbvf_ipv6_csum_is_sctp(skb))) { + type_tucmd = E1000_ADVTXD_TUCMD_L4T_SCTP; + break; } - - context_desc->type_tucmd_mlhl = cpu_to_le32(tu_cmd); - context_desc->seqnum_seed = 0; - context_desc->mss_l4len_idx = 0; - - buffer_info->time_stamp = jiffies; - buffer_info->dma = 0; - i++; - if (i == tx_ring->count) - i = 0; - tx_ring->next_to_use = i; - - return true; + default: + skb_checksum_help(skb); + goto csum_failed; } - return false; + vlan_macip_lens = skb_checksum_start_offset(skb) - + skb_network_offset(skb); +no_csum: + vlan_macip_lens |= skb_network_offset(skb) << E1000_ADVTXD_MACLEN_SHIFT; + vlan_macip_lens |= tx_flags & IGBVF_TX_FLAGS_VLAN_MASK; + + igbvf_tx_ctxtdesc(tx_ring, vlan_macip_lens, type_tucmd, 0); + return true; } static int igbvf_maybe_stop_tx(struct net_device *netdev, int size) @@ -2264,7 +2280,7 @@ static netdev_tx_t igbvf_xmit_frame_ring_adv(struct sk_buff *skb, if (tso) tx_flags |= IGBVF_TX_FLAGS_TSO; - else if (igbvf_tx_csum(adapter, tx_ring, skb, tx_flags, protocol) && + else if (igbvf_tx_csum(tx_ring, skb, tx_flags, protocol) && (skb->ip_summed == CHECKSUM_PARTIAL)) tx_flags |= IGBVF_TX_FLAGS_CSUM; @@ -2717,11 +2733,11 @@ static int igbvf_probe(struct pci_dev *pdev, const struct pci_device_id *ent) adapter->bd_number = cards_found++; netdev->hw_features = NETIF_F_SG | - NETIF_F_IP_CSUM | - NETIF_F_IPV6_CSUM | - NETIF_F_TSO | - NETIF_F_TSO6 | - NETIF_F_RXCSUM; + NETIF_F_TSO | + NETIF_F_TSO6 | + NETIF_F_RXCSUM | + NETIF_F_HW_CSUM | + NETIF_F_SCTP_CRC; netdev->features = netdev->hw_features | NETIF_F_HW_VLAN_CTAG_TX | @@ -2731,11 +2747,14 @@ static int igbvf_probe(struct pci_dev *pdev, const struct pci_device_id *ent) if (pci_using_dac) netdev->features |= NETIF_F_HIGHDMA; - netdev->vlan_features |= NETIF_F_TSO; - netdev->vlan_features |= NETIF_F_TSO6; - netdev->vlan_features |= NETIF_F_IP_CSUM; - netdev->vlan_features |= NETIF_F_IPV6_CSUM; - netdev->vlan_features |= NETIF_F_SG; + netdev->vlan_features |= NETIF_F_SG | + NETIF_F_TSO | + NETIF_F_TSO6 | + NETIF_F_HW_CSUM | + NETIF_F_SCTP_CRC; + + netdev->mpls_features |= NETIF_F_HW_CSUM; + netdev->hw_enc_features |= NETIF_F_HW_CSUM; /*reset the controller to put the device in a known good state */ err = hw->mac.ops.reset_hw(hw); diff --git a/drivers/net/ethernet/intel/igbvf/vf.h b/drivers/net/ethernet/intel/igbvf/vf.h index 0f1eca639..f00a41d9a 100644 --- a/drivers/net/ethernet/intel/igbvf/vf.h +++ b/drivers/net/ethernet/intel/igbvf/vf.h @@ -126,6 +126,7 @@ struct e1000_adv_tx_context_desc { #define E1000_ADVTXD_MACLEN_SHIFT 9 /* Adv ctxt desc mac len shift */ #define E1000_ADVTXD_TUCMD_IPV4 0x00000400 /* IP Packet Type: 1=IPv4 */ #define E1000_ADVTXD_TUCMD_L4T_TCP 0x00000800 /* L4 Packet TYPE of TCP */ +#define E1000_ADVTXD_TUCMD_L4T_SCTP 0x00001000 /* L4 packet TYPE of SCTP */ #define E1000_ADVTXD_L4LEN_SHIFT 8 /* Adv ctxt L4LEN shift */ #define E1000_ADVTXD_MSS_SHIFT 16 /* Adv ctxt MSS shift */ diff --git a/drivers/net/ethernet/intel/ixgbe/ixgbe.h b/drivers/net/ethernet/intel/ixgbe/ixgbe.h index 4b9156cd8..e4949af7d 100644 --- a/drivers/net/ethernet/intel/ixgbe/ixgbe.h +++ b/drivers/net/ethernet/intel/ixgbe/ixgbe.h @@ -661,9 +661,7 @@ struct ixgbe_adapter { #define IXGBE_FLAG2_RSS_FIELD_IPV6_UDP (u32)(1 << 9) #define IXGBE_FLAG2_PTP_PPS_ENABLED (u32)(1 << 10) #define IXGBE_FLAG2_PHY_INTERRUPT (u32)(1 << 11) -#ifdef CONFIG_IXGBE_VXLAN #define IXGBE_FLAG2_VXLAN_REREG_NEEDED BIT(12) -#endif #define IXGBE_FLAG2_VLAN_PROMISC BIT(13) /* Tx fast path data */ @@ -675,6 +673,9 @@ struct ixgbe_adapter { int num_rx_queues; u16 rx_itr_setting; + /* Port number used to identify VXLAN traffic */ + __be16 vxlan_port; + /* TX */ struct ixgbe_ring *tx_ring[MAX_TX_QUEUES] ____cacheline_aligned_in_smp; @@ -782,9 +783,6 @@ struct ixgbe_adapter { u32 timer_event_accumulator; u32 vferr_refcount; struct ixgbe_mac_addr *mac_table; -#ifdef CONFIG_IXGBE_VXLAN - u16 vxlan_port; -#endif struct kobject *info_kobj; #ifdef CONFIG_IXGBE_HWMON struct hwmon_buff *ixgbe_hwmon_buff; @@ -796,6 +794,10 @@ struct ixgbe_adapter { u8 default_up; unsigned long fwd_bitmask; /* Bitmask indicating in use pools */ +#define IXGBE_MAX_LINK_HANDLE 10 + struct ixgbe_mat_field *jump_tables[IXGBE_MAX_LINK_HANDLE]; + unsigned long tables; + /* maximum number of RETA entries among all devices supported by ixgbe * driver: currently it's x550 device in non-SRIOV mode */ @@ -875,6 +877,8 @@ extern const char ixgbe_driver_version[]; extern char ixgbe_default_device_descr[]; #endif /* IXGBE_FCOE */ +int ixgbe_open(struct net_device *netdev); +int ixgbe_close(struct net_device *netdev); void ixgbe_up(struct ixgbe_adapter *adapter); void ixgbe_down(struct ixgbe_adapter *adapter); void ixgbe_reinit_locked(struct ixgbe_adapter *adapter); @@ -925,6 +929,9 @@ s32 ixgbe_fdir_erase_perfect_filter_82599(struct ixgbe_hw *hw, u16 soft_id); void ixgbe_atr_compute_perfect_hash_82599(union ixgbe_atr_input *input, union ixgbe_atr_input *mask); +int ixgbe_update_ethtool_fdir_entry(struct ixgbe_adapter *adapter, + struct ixgbe_fdir_filter *input, + u16 sw_idx); void ixgbe_set_rx_mode(struct net_device *netdev); #ifdef CONFIG_IXGBE_DCB void ixgbe_set_rx_drop_en(struct ixgbe_adapter *adapter); diff --git a/drivers/net/ethernet/intel/ixgbe/ixgbe_ethtool.c b/drivers/net/ethernet/intel/ixgbe/ixgbe_ethtool.c index bea96b3bc..b3530e1e3 100644 --- a/drivers/net/ethernet/intel/ixgbe/ixgbe_ethtool.c +++ b/drivers/net/ethernet/intel/ixgbe/ixgbe_ethtool.c @@ -2053,7 +2053,7 @@ static void ixgbe_diag_test(struct net_device *netdev, if (if_running) /* indicate we're in test mode */ - dev_close(netdev); + ixgbe_close(netdev); else ixgbe_reset(adapter); @@ -2091,7 +2091,7 @@ skip_loopback: /* clear testing bit and return adapter to previous state */ clear_bit(__IXGBE_TESTING, &adapter->state); if (if_running) - dev_open(netdev); + ixgbe_open(netdev); else if (hw->mac.ops.disable_tx_laser) hw->mac.ops.disable_tx_laser(hw); } else { @@ -2520,9 +2520,9 @@ static int ixgbe_get_rxnfc(struct net_device *dev, struct ethtool_rxnfc *cmd, return ret; } -static int ixgbe_update_ethtool_fdir_entry(struct ixgbe_adapter *adapter, - struct ixgbe_fdir_filter *input, - u16 sw_idx) +int ixgbe_update_ethtool_fdir_entry(struct ixgbe_adapter *adapter, + struct ixgbe_fdir_filter *input, + u16 sw_idx) { struct ixgbe_hw *hw = &adapter->hw; struct hlist_node *node2; diff --git a/drivers/net/ethernet/intel/ixgbe/ixgbe_main.c b/drivers/net/ethernet/intel/ixgbe/ixgbe_main.c index c4003a88b..7df3fe29b 100644 --- a/drivers/net/ethernet/intel/ixgbe/ixgbe_main.c +++ b/drivers/net/ethernet/intel/ixgbe/ixgbe_main.c @@ -51,6 +51,8 @@ #include <linux/prefetch.h> #include <scsi/fc/fc_fcoe.h> #include <net/vxlan.h> +#include <net/pkt_cls.h> +#include <net/tc_act/tc_gact.h> #ifdef CONFIG_OF #include <linux/of_net.h> @@ -65,6 +67,7 @@ #include "ixgbe_common.h" #include "ixgbe_dcb_82599.h" #include "ixgbe_sriov.h" +#include "ixgbe_model.h" char ixgbe_driver_name[] = "ixgbe"; static const char ixgbe_driver_string[] = @@ -1089,7 +1092,7 @@ static void ixgbe_tx_timeout_reset(struct ixgbe_adapter *adapter) * @tx_ring: tx ring to clean **/ static bool ixgbe_clean_tx_irq(struct ixgbe_q_vector *q_vector, - struct ixgbe_ring *tx_ring) + struct ixgbe_ring *tx_ring, int napi_budget) { struct ixgbe_adapter *adapter = q_vector->adapter; struct ixgbe_tx_buffer *tx_buffer; @@ -1127,7 +1130,7 @@ static bool ixgbe_clean_tx_irq(struct ixgbe_q_vector *q_vector, total_packets += tx_buffer->gso_segs; /* free the skb */ - dev_consume_skb_any(tx_buffer->skb); + napi_consume_skb(tx_buffer->skb, napi_budget); /* unmap skb header data */ dma_unmap_single(tx_ring->dev, @@ -1942,7 +1945,7 @@ static bool ixgbe_add_rx_frag(struct ixgbe_ring *rx_ring, /* Even if we own the page, we are not allowed to use atomic_set() * This would break get_page_unless_zero() users. */ - atomic_inc(&page->_count); + page_ref_inc(page); return true; } @@ -2784,7 +2787,7 @@ int ixgbe_poll(struct napi_struct *napi, int budget) #endif ixgbe_for_each_ring(ring, q_vector->tx) - clean_complete &= !!ixgbe_clean_tx_irq(q_vector, ring); + clean_complete &= !!ixgbe_clean_tx_irq(q_vector, ring, budget); /* Exit if we are called by netpoll or busy polling is active */ if ((budget <= 0) || !ixgbe_qv_lock_napi(q_vector)) @@ -4528,9 +4531,7 @@ static void ixgbe_clear_vxlan_port(struct ixgbe_adapter *adapter) case ixgbe_mac_X550: case ixgbe_mac_X550EM_x: IXGBE_WRITE_REG(&adapter->hw, IXGBE_VXLANCTRL, 0); -#ifdef CONFIG_IXGBE_VXLAN adapter->vxlan_port = 0; -#endif break; default: break; @@ -5545,6 +5546,9 @@ static int ixgbe_sw_init(struct ixgbe_adapter *adapter) #endif /* CONFIG_IXGBE_DCB */ #endif /* IXGBE_FCOE */ + /* initialize static ixgbe jump table entries */ + adapter->jump_tables[0] = ixgbe_ipv4_fields; + adapter->mac_table = kzalloc(sizeof(struct ixgbe_mac_addr) * hw->mac.num_rar_entries, GFP_ATOMIC); @@ -5988,7 +5992,7 @@ static int ixgbe_change_mtu(struct net_device *netdev, int new_mtu) * handler is registered with the OS, the watchdog timer is started, * and the stack is notified that the interface is ready. **/ -static int ixgbe_open(struct net_device *netdev) +int ixgbe_open(struct net_device *netdev) { struct ixgbe_adapter *adapter = netdev_priv(netdev); struct ixgbe_hw *hw = &adapter->hw; @@ -6090,7 +6094,7 @@ static void ixgbe_close_suspend(struct ixgbe_adapter *adapter) * needs to be disabled. A global MAC reset is issued to stop the * hardware, and all transmit and receive resources are freed. **/ -static int ixgbe_close(struct net_device *netdev) +int ixgbe_close(struct net_device *netdev) { struct ixgbe_adapter *adapter = netdev_priv(netdev); @@ -7554,11 +7558,10 @@ static void ixgbe_atr(struct ixgbe_ring *ring, struct ipv6hdr *ipv6; } hdr; struct tcphdr *th; + unsigned int hlen; struct sk_buff *skb; -#ifdef CONFIG_IXGBE_VXLAN - u8 encap = false; -#endif /* CONFIG_IXGBE_VXLAN */ __be16 vlan_id; + int l4_proto; /* if ring doesn't have a interrupt vector, cannot perform ATR */ if (!q_vector) @@ -7570,62 +7573,50 @@ static void ixgbe_atr(struct ixgbe_ring *ring, ring->atr_count++; + /* currently only IPv4/IPv6 with TCP is supported */ + if ((first->protocol != htons(ETH_P_IP)) && + (first->protocol != htons(ETH_P_IPV6))) + return; + /* snag network header to get L4 type and address */ skb = first->skb; hdr.network = skb_network_header(skb); - if (!skb->encapsulation) { - th = tcp_hdr(skb); - } else { #ifdef CONFIG_IXGBE_VXLAN + if (skb->encapsulation && + first->protocol == htons(ETH_P_IP) && + hdr.ipv4->protocol != IPPROTO_UDP) { struct ixgbe_adapter *adapter = q_vector->adapter; - if (!adapter->vxlan_port) - return; - if (first->protocol != htons(ETH_P_IP) || - hdr.ipv4->version != IPVERSION || - hdr.ipv4->protocol != IPPROTO_UDP) { - return; - } - if (ntohs(udp_hdr(skb)->dest) != adapter->vxlan_port) - return; - encap = true; - hdr.network = skb_inner_network_header(skb); - th = inner_tcp_hdr(skb); -#else - return; -#endif /* CONFIG_IXGBE_VXLAN */ + /* verify the port is recognized as VXLAN */ + if (adapter->vxlan_port && + udp_hdr(skb)->dest == adapter->vxlan_port) + hdr.network = skb_inner_network_header(skb); } +#endif /* CONFIG_IXGBE_VXLAN */ /* Currently only IPv4/IPv6 with TCP is supported */ switch (hdr.ipv4->version) { case IPVERSION: - if (hdr.ipv4->protocol != IPPROTO_TCP) - return; + /* access ihl as u8 to avoid unaligned access on ia64 */ + hlen = (hdr.network[0] & 0x0F) << 2; + l4_proto = hdr.ipv4->protocol; break; case 6: - if (likely((unsigned char *)th - hdr.network == - sizeof(struct ipv6hdr))) { - if (hdr.ipv6->nexthdr != IPPROTO_TCP) - return; - } else { - __be16 frag_off; - u8 l4_hdr; - - ipv6_skip_exthdr(skb, hdr.network - skb->data + - sizeof(struct ipv6hdr), - &l4_hdr, &frag_off); - if (unlikely(frag_off)) - return; - if (l4_hdr != IPPROTO_TCP) - return; - } + hlen = hdr.network - skb->data; + l4_proto = ipv6_find_hdr(skb, &hlen, IPPROTO_TCP, NULL, NULL); + hlen -= hdr.network - skb->data; break; default: return; } - /* skip this packet since it is invalid or the socket is closing */ - if (!th || th->fin) + if (l4_proto != IPPROTO_TCP) + return; + + th = (struct tcphdr *)(hdr.network + hlen); + + /* skip this packet since the socket is closing */ + if (th->fin) return; /* sample on all syn packets or once every atr sample count */ @@ -7676,10 +7667,8 @@ static void ixgbe_atr(struct ixgbe_ring *ring, break; } -#ifdef CONFIG_IXGBE_VXLAN - if (encap) + if (hdr.network != skb_network_header(skb)) input.formatted.flow_type |= IXGBE_ATR_L4TYPE_TUNNEL_MASK; -#endif /* CONFIG_IXGBE_VXLAN */ /* This assumes the Rx queue and Tx queue are bound to the same CPU */ ixgbe_fdir_add_signature_filter_82599(&q_vector->adapter->hw, @@ -8200,6 +8189,240 @@ int ixgbe_setup_tc(struct net_device *dev, u8 tc) return 0; } +static int ixgbe_delete_clsu32(struct ixgbe_adapter *adapter, + struct tc_cls_u32_offload *cls) +{ + u32 uhtid = TC_U32_USERHTID(cls->knode.handle); + u32 loc; + int err; + + if ((uhtid != 0x800) && (uhtid >= IXGBE_MAX_LINK_HANDLE)) + return -EINVAL; + + loc = cls->knode.handle & 0xfffff; + + spin_lock(&adapter->fdir_perfect_lock); + err = ixgbe_update_ethtool_fdir_entry(adapter, NULL, loc); + spin_unlock(&adapter->fdir_perfect_lock); + return err; +} + +static int ixgbe_configure_clsu32_add_hnode(struct ixgbe_adapter *adapter, + __be16 protocol, + struct tc_cls_u32_offload *cls) +{ + u32 uhtid = TC_U32_USERHTID(cls->hnode.handle); + + if (uhtid >= IXGBE_MAX_LINK_HANDLE) + return -EINVAL; + + /* This ixgbe devices do not support hash tables at the moment + * so abort when given hash tables. + */ + if (cls->hnode.divisor > 0) + return -EINVAL; + + set_bit(uhtid - 1, &adapter->tables); + return 0; +} + +static int ixgbe_configure_clsu32_del_hnode(struct ixgbe_adapter *adapter, + struct tc_cls_u32_offload *cls) +{ + u32 uhtid = TC_U32_USERHTID(cls->hnode.handle); + + if (uhtid >= IXGBE_MAX_LINK_HANDLE) + return -EINVAL; + + clear_bit(uhtid - 1, &adapter->tables); + return 0; +} + +static int ixgbe_configure_clsu32(struct ixgbe_adapter *adapter, + __be16 protocol, + struct tc_cls_u32_offload *cls) +{ + u32 loc = cls->knode.handle & 0xfffff; + struct ixgbe_hw *hw = &adapter->hw; + struct ixgbe_mat_field *field_ptr; + struct ixgbe_fdir_filter *input; + union ixgbe_atr_input mask; +#ifdef CONFIG_NET_CLS_ACT + const struct tc_action *a; +#endif + int i, err = 0; + u8 queue; + u32 uhtid, link_uhtid; + + memset(&mask, 0, sizeof(union ixgbe_atr_input)); + uhtid = TC_U32_USERHTID(cls->knode.handle); + link_uhtid = TC_U32_USERHTID(cls->knode.link_handle); + + /* At the moment cls_u32 jumps to network layer and skips past + * L2 headers. The canonical method to match L2 frames is to use + * negative values. However this is error prone at best but really + * just broken because there is no way to "know" what sort of hdr + * is in front of the network layer. Fix cls_u32 to support L2 + * headers when needed. + */ + if (protocol != htons(ETH_P_IP)) + return -EINVAL; + + if (link_uhtid) { + struct ixgbe_nexthdr *nexthdr = ixgbe_ipv4_jumps; + + if (link_uhtid >= IXGBE_MAX_LINK_HANDLE) + return -EINVAL; + + if (!test_bit(link_uhtid - 1, &adapter->tables)) + return -EINVAL; + + for (i = 0; nexthdr[i].jump; i++) { + if (nexthdr->o != cls->knode.sel->offoff || + nexthdr->s != cls->knode.sel->offshift || + nexthdr->m != cls->knode.sel->offmask || + /* do not support multiple key jumps its just mad */ + cls->knode.sel->nkeys > 1) + return -EINVAL; + + if (nexthdr->off != cls->knode.sel->keys[0].off || + nexthdr->val != cls->knode.sel->keys[0].val || + nexthdr->mask != cls->knode.sel->keys[0].mask) + return -EINVAL; + + adapter->jump_tables[link_uhtid] = nexthdr->jump; + } + return 0; + } + + if (loc >= ((1024 << adapter->fdir_pballoc) - 2)) { + e_err(drv, "Location out of range\n"); + return -EINVAL; + } + + /* cls u32 is a graph starting at root node 0x800. The driver tracks + * links and also the fields used to advance the parser across each + * link (e.g. nexthdr/eat parameters from 'tc'). This way we can map + * the u32 graph onto the hardware parse graph denoted in ixgbe_model.h + * To add support for new nodes update ixgbe_model.h parse structures + * this function _should_ be generic try not to hardcode values here. + */ + if (uhtid == 0x800) { + field_ptr = adapter->jump_tables[0]; + } else { + if (uhtid >= IXGBE_MAX_LINK_HANDLE) + return -EINVAL; + + field_ptr = adapter->jump_tables[uhtid]; + } + + if (!field_ptr) + return -EINVAL; + + input = kzalloc(sizeof(*input), GFP_KERNEL); + if (!input) + return -ENOMEM; + + for (i = 0; i < cls->knode.sel->nkeys; i++) { + int off = cls->knode.sel->keys[i].off; + __be32 val = cls->knode.sel->keys[i].val; + __be32 m = cls->knode.sel->keys[i].mask; + bool found_entry = false; + int j; + + for (j = 0; field_ptr[j].val; j++) { + if (field_ptr[j].off == off) { + field_ptr[j].val(input, &mask, val, m); + input->filter.formatted.flow_type |= + field_ptr[j].type; + found_entry = true; + break; + } + } + + if (!found_entry) + goto err_out; + } + + mask.formatted.flow_type = IXGBE_ATR_L4TYPE_IPV6_MASK | + IXGBE_ATR_L4TYPE_MASK; + + if (input->filter.formatted.flow_type == IXGBE_ATR_FLOW_TYPE_IPV4) + mask.formatted.flow_type &= IXGBE_ATR_L4TYPE_IPV6_MASK; + +#ifdef CONFIG_NET_CLS_ACT + if (list_empty(&cls->knode.exts->actions)) + goto err_out; + + list_for_each_entry(a, &cls->knode.exts->actions, list) { + if (!is_tcf_gact_shot(a)) + goto err_out; + } +#endif + + input->action = IXGBE_FDIR_DROP_QUEUE; + queue = IXGBE_FDIR_DROP_QUEUE; + input->sw_idx = loc; + + spin_lock(&adapter->fdir_perfect_lock); + + if (hlist_empty(&adapter->fdir_filter_list)) { + memcpy(&adapter->fdir_mask, &mask, sizeof(mask)); + err = ixgbe_fdir_set_input_mask_82599(hw, &mask); + if (err) + goto err_out_w_lock; + } else if (memcmp(&adapter->fdir_mask, &mask, sizeof(mask))) { + err = -EINVAL; + goto err_out_w_lock; + } + + ixgbe_atr_compute_perfect_hash_82599(&input->filter, &mask); + err = ixgbe_fdir_write_perfect_filter_82599(hw, &input->filter, + input->sw_idx, queue); + if (!err) + ixgbe_update_ethtool_fdir_entry(adapter, input, input->sw_idx); + spin_unlock(&adapter->fdir_perfect_lock); + + return err; +err_out_w_lock: + spin_unlock(&adapter->fdir_perfect_lock); +err_out: + kfree(input); + return -EINVAL; +} + +static int __ixgbe_setup_tc(struct net_device *dev, u32 handle, __be16 proto, + struct tc_to_netdev *tc) +{ + struct ixgbe_adapter *adapter = netdev_priv(dev); + + if (TC_H_MAJ(handle) == TC_H_MAJ(TC_H_INGRESS) && + tc->type == TC_SETUP_CLSU32) { + switch (tc->cls_u32->command) { + case TC_CLSU32_NEW_KNODE: + case TC_CLSU32_REPLACE_KNODE: + return ixgbe_configure_clsu32(adapter, + proto, tc->cls_u32); + case TC_CLSU32_DELETE_KNODE: + return ixgbe_delete_clsu32(adapter, tc->cls_u32); + case TC_CLSU32_NEW_HNODE: + case TC_CLSU32_REPLACE_HNODE: + return ixgbe_configure_clsu32_add_hnode(adapter, proto, + tc->cls_u32); + case TC_CLSU32_DELETE_HNODE: + return ixgbe_configure_clsu32_del_hnode(adapter, + tc->cls_u32); + default: + return -EINVAL; + } + } + + if (tc->type != TC_SETUP_MQPRIO) + return -EINVAL; + + return ixgbe_setup_tc(dev, tc->tc); +} + #ifdef CONFIG_PCI_IOV void ixgbe_sriov_reinit(struct ixgbe_adapter *adapter) { @@ -8262,19 +8485,17 @@ static int ixgbe_set_features(struct net_device *netdev, } /* - * Check if Flow Director n-tuple support was enabled or disabled. If - * the state changed, we need to reset. + * Check if Flow Director n-tuple support or hw_tc support was + * enabled or disabled. If the state changed, we need to reset. */ - switch (features & NETIF_F_NTUPLE) { - case NETIF_F_NTUPLE: + if ((features & NETIF_F_NTUPLE) || (features & NETIF_F_HW_TC)) { /* turn off ATR, enable perfect filters and reset */ if (!(adapter->flags & IXGBE_FLAG_FDIR_PERFECT_CAPABLE)) need_reset = true; adapter->flags &= ~IXGBE_FLAG_FDIR_HASH_CAPABLE; adapter->flags |= IXGBE_FLAG_FDIR_PERFECT_CAPABLE; - break; - default: + } else { /* turn off perfect filters, enable ATR and reset */ if (adapter->flags & IXGBE_FLAG_FDIR_PERFECT_CAPABLE) need_reset = true; @@ -8282,23 +8503,16 @@ static int ixgbe_set_features(struct net_device *netdev, adapter->flags &= ~IXGBE_FLAG_FDIR_PERFECT_CAPABLE; /* We cannot enable ATR if SR-IOV is enabled */ - if (adapter->flags & IXGBE_FLAG_SRIOV_ENABLED) - break; - - /* We cannot enable ATR if we have 2 or more traffic classes */ - if (netdev_get_num_tc(netdev) > 1) - break; - - /* We cannot enable ATR if RSS is disabled */ - if (adapter->ring_feature[RING_F_RSS].limit <= 1) - break; - - /* A sample rate of 0 indicates ATR disabled */ - if (!adapter->atr_sample_rate) - break; - - adapter->flags |= IXGBE_FLAG_FDIR_HASH_CAPABLE; - break; + if (adapter->flags & IXGBE_FLAG_SRIOV_ENABLED || + /* We cannot enable ATR if we have 2 or more tcs */ + (netdev_get_num_tc(netdev) > 1) || + /* We cannot enable ATR if RSS is disabled */ + (adapter->ring_feature[RING_F_RSS].limit <= 1) || + /* A sample rate of 0 indicates ATR disabled */ + (!adapter->atr_sample_rate)) + ; /* do nothing not supported */ + else /* otherwise supported and set the flag */ + adapter->flags |= IXGBE_FLAG_FDIR_HASH_CAPABLE; } if (features & NETIF_F_HW_VLAN_CTAG_RX) @@ -8338,7 +8552,6 @@ static void ixgbe_add_vxlan_port(struct net_device *dev, sa_family_t sa_family, { struct ixgbe_adapter *adapter = netdev_priv(dev); struct ixgbe_hw *hw = &adapter->hw; - u16 new_port = ntohs(port); if (!(adapter->flags & IXGBE_FLAG_VXLAN_OFFLOAD_CAPABLE)) return; @@ -8346,18 +8559,18 @@ static void ixgbe_add_vxlan_port(struct net_device *dev, sa_family_t sa_family, if (sa_family == AF_INET6) return; - if (adapter->vxlan_port == new_port) + if (adapter->vxlan_port == port) return; if (adapter->vxlan_port) { netdev_info(dev, "Hit Max num of VXLAN ports, not adding port %d\n", - new_port); + ntohs(port)); return; } - adapter->vxlan_port = new_port; - IXGBE_WRITE_REG(hw, IXGBE_VXLANCTRL, new_port); + adapter->vxlan_port = port; + IXGBE_WRITE_REG(hw, IXGBE_VXLANCTRL, ntohs(port)); } /** @@ -8370,7 +8583,6 @@ static void ixgbe_del_vxlan_port(struct net_device *dev, sa_family_t sa_family, __be16 port) { struct ixgbe_adapter *adapter = netdev_priv(dev); - u16 new_port = ntohs(port); if (!(adapter->flags & IXGBE_FLAG_VXLAN_OFFLOAD_CAPABLE)) return; @@ -8378,9 +8590,9 @@ static void ixgbe_del_vxlan_port(struct net_device *dev, sa_family_t sa_family, if (sa_family == AF_INET6) return; - if (adapter->vxlan_port != new_port) { + if (adapter->vxlan_port != port) { netdev_info(dev, "Port %d was not found, not deleting\n", - new_port); + ntohs(port)); return; } @@ -8657,9 +8869,7 @@ static const struct net_device_ops ixgbe_netdev_ops = { .ndo_set_vf_trust = ixgbe_ndo_set_vf_trust, .ndo_get_vf_config = ixgbe_ndo_get_vf_config, .ndo_get_stats64 = ixgbe_get_stats64, -#ifdef CONFIG_IXGBE_DCB - .ndo_setup_tc = ixgbe_setup_tc, -#endif + .ndo_setup_tc = __ixgbe_setup_tc, #ifdef CONFIG_NET_POLL_CONTROLLER .ndo_poll_controller = ixgbe_netpoll, #endif @@ -9030,7 +9240,8 @@ skip_sriov: case ixgbe_mac_X550EM_x: netdev->features |= NETIF_F_SCTP_CRC; netdev->hw_features |= NETIF_F_SCTP_CRC | - NETIF_F_NTUPLE; + NETIF_F_NTUPLE | + NETIF_F_HW_TC; break; default: break; @@ -9050,17 +9261,6 @@ skip_sriov: netdev->priv_flags |= IFF_UNICAST_FLT; netdev->priv_flags |= IFF_SUPP_NOFCS; -#ifdef CONFIG_IXGBE_VXLAN - switch (adapter->hw.mac.type) { - case ixgbe_mac_X550: - case ixgbe_mac_X550EM_x: - netdev->hw_enc_features |= NETIF_F_RXCSUM; - break; - default: - break; - } -#endif /* CONFIG_IXGBE_VXLAN */ - #ifdef CONFIG_IXGBE_DCB netdev->dcbnl_ops = &dcbnl_ops; #endif @@ -9114,6 +9314,8 @@ skip_sriov: goto err_sw_init; } + /* Set hw->mac.addr to permanent MAC address */ + ether_addr_copy(hw->mac.addr, hw->mac.perm_addr); ixgbe_mac_set_default_filter(adapter); setup_timer(&adapter->service_timer, &ixgbe_service_timer, diff --git a/drivers/net/ethernet/intel/ixgbe/ixgbe_model.h b/drivers/net/ethernet/intel/ixgbe/ixgbe_model.h new file mode 100644 index 000000000..74c53ad9d --- /dev/null +++ b/drivers/net/ethernet/intel/ixgbe/ixgbe_model.h @@ -0,0 +1,103 @@ +/******************************************************************************* + * + * Intel 10 Gigabit PCI Express Linux drive + * Copyright(c) 2016 Intel Corporation. + * + * This program is free software; you can redistribute it and/or modify it + * under the terms and conditions of the GNU General Public License, + * version 2, as published by the Free Software Foundation. + * + * This program is distributed in the hope it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for + * more details. + * + * You should have received a copy of the GNU General Public License along + * with this program. If not, see <http://www.gnu.org/licenses/>. + * + * The full GNU General Public License is included in this distribution in + * the file called "COPYING". + * + * Contact Information: + * e1000-devel Mailing List <e1000-devel@lists.sourceforge.net> + * Intel Corporation, 5200 N.E. Elam Young Parkway, Hillsboro, OR 97124-6497 + * + ******************************************************************************/ + +#ifndef _IXGBE_MODEL_H_ +#define _IXGBE_MODEL_H_ + +#include "ixgbe.h" +#include "ixgbe_type.h" + +struct ixgbe_mat_field { + unsigned int off; + int (*val)(struct ixgbe_fdir_filter *input, + union ixgbe_atr_input *mask, + u32 val, u32 m); + unsigned int type; +}; + +static inline int ixgbe_mat_prgm_sip(struct ixgbe_fdir_filter *input, + union ixgbe_atr_input *mask, + u32 val, u32 m) +{ + input->filter.formatted.src_ip[0] = val; + mask->formatted.src_ip[0] = m; + return 0; +} + +static inline int ixgbe_mat_prgm_dip(struct ixgbe_fdir_filter *input, + union ixgbe_atr_input *mask, + u32 val, u32 m) +{ + input->filter.formatted.dst_ip[0] = val; + mask->formatted.dst_ip[0] = m; + return 0; +} + +static struct ixgbe_mat_field ixgbe_ipv4_fields[] = { + { .off = 12, .val = ixgbe_mat_prgm_sip, + .type = IXGBE_ATR_FLOW_TYPE_IPV4}, + { .off = 16, .val = ixgbe_mat_prgm_dip, + .type = IXGBE_ATR_FLOW_TYPE_IPV4}, + { .val = NULL } /* terminal node */ +}; + +static inline int ixgbe_mat_prgm_ports(struct ixgbe_fdir_filter *input, + union ixgbe_atr_input *mask, + u32 val, u32 m) +{ + input->filter.formatted.src_port = val & 0xffff; + mask->formatted.src_port = m & 0xffff; + input->filter.formatted.dst_port = val >> 16; + mask->formatted.dst_port = m >> 16; + + return 0; +}; + +static struct ixgbe_mat_field ixgbe_tcp_fields[] = { + {.off = 0, .val = ixgbe_mat_prgm_ports, + .type = IXGBE_ATR_FLOW_TYPE_TCPV4}, + { .val = NULL } /* terminal node */ +}; + +struct ixgbe_nexthdr { + /* offset, shift, and mask of position to next header */ + unsigned int o; + u32 s; + u32 m; + /* match criteria to make this jump*/ + unsigned int off; + u32 val; + u32 mask; + /* location of jump to make */ + struct ixgbe_mat_field *jump; +}; + +static struct ixgbe_nexthdr ixgbe_ipv4_jumps[] = { + { .o = 0, .s = 6, .m = 0xf, + .off = 8, .val = 0x600, .mask = 0xff00, .jump = ixgbe_tcp_fields}, + { .jump = NULL } /* terminal node */ +}; +#endif /* _IXGBE_MODEL_H_ */ diff --git a/drivers/net/ethernet/intel/ixgbe/ixgbe_x550.c b/drivers/net/ethernet/intel/ixgbe/ixgbe_x550.c index 87aca3f7c..68a9c6464 100644 --- a/drivers/net/ethernet/intel/ixgbe/ixgbe_x550.c +++ b/drivers/net/ethernet/intel/ixgbe/ixgbe_x550.c @@ -355,7 +355,7 @@ static s32 ixgbe_iosf_wait(struct ixgbe_hw *hw, u32 *ctrl) command = IXGBE_READ_REG(hw, IXGBE_SB_IOSF_INDIRECT_CTRL); if (!(command & IXGBE_SB_IOSF_CTRL_BUSY)) break; - usleep_range(10, 20); + udelay(10); } if (ctrl) *ctrl = command; diff --git a/drivers/net/ethernet/intel/ixgbevf/ethtool.c b/drivers/net/ethernet/intel/ixgbevf/ethtool.c index c48aef613..d7aa4b203 100644 --- a/drivers/net/ethernet/intel/ixgbevf/ethtool.c +++ b/drivers/net/ethernet/intel/ixgbevf/ethtool.c @@ -680,7 +680,7 @@ static void ixgbevf_diag_test(struct net_device *netdev, if (if_running) /* indicate we're in test mode */ - dev_close(netdev); + ixgbevf_close(netdev); else ixgbevf_reset(adapter); @@ -692,7 +692,7 @@ static void ixgbevf_diag_test(struct net_device *netdev, clear_bit(__IXGBEVF_TESTING, &adapter->state); if (if_running) - dev_open(netdev); + ixgbevf_open(netdev); } else { hw_dbg(&adapter->hw, "online testing starting\n"); /* Online tests */ diff --git a/drivers/net/ethernet/intel/ixgbevf/ixgbevf.h b/drivers/net/ethernet/intel/ixgbevf/ixgbevf.h index 68ec7daa0..991eeae81 100644 --- a/drivers/net/ethernet/intel/ixgbevf/ixgbevf.h +++ b/drivers/net/ethernet/intel/ixgbevf/ixgbevf.h @@ -486,6 +486,8 @@ extern const struct ixgbe_mbx_operations ixgbevf_mbx_ops; extern const char ixgbevf_driver_name[]; extern const char ixgbevf_driver_version[]; +int ixgbevf_open(struct net_device *netdev); +int ixgbevf_close(struct net_device *netdev); void ixgbevf_up(struct ixgbevf_adapter *adapter); void ixgbevf_down(struct ixgbevf_adapter *adapter); void ixgbevf_reinit_locked(struct ixgbevf_adapter *adapter); diff --git a/drivers/net/ethernet/intel/ixgbevf/ixgbevf_main.c b/drivers/net/ethernet/intel/ixgbevf/ixgbevf_main.c index 3558f019b..b0edae94d 100644 --- a/drivers/net/ethernet/intel/ixgbevf/ixgbevf_main.c +++ b/drivers/net/ethernet/intel/ixgbevf/ixgbevf_main.c @@ -837,7 +837,7 @@ add_tail_frag: /* Even if we own the page, we are not allowed to use atomic_set() * This would break get_page_unless_zero() users. */ - atomic_inc(&page->_count); + page_ref_inc(page); return true; } @@ -3122,7 +3122,7 @@ static void ixgbevf_free_all_rx_resources(struct ixgbevf_adapter *adapter) * handler is registered with the OS, the watchdog timer is started, * and the stack is notified that the interface is ready. **/ -static int ixgbevf_open(struct net_device *netdev) +int ixgbevf_open(struct net_device *netdev) { struct ixgbevf_adapter *adapter = netdev_priv(netdev); struct ixgbe_hw *hw = &adapter->hw; @@ -3205,7 +3205,7 @@ err_setup_reset: * needs to be disabled. A global MAC reset is issued to stop the * hardware, and all transmit and receive resources are freed. **/ -static int ixgbevf_close(struct net_device *netdev) +int ixgbevf_close(struct net_device *netdev) { struct ixgbevf_adapter *adapter = netdev_priv(netdev); @@ -3692,19 +3692,23 @@ static int ixgbevf_set_mac(struct net_device *netdev, void *p) struct ixgbevf_adapter *adapter = netdev_priv(netdev); struct ixgbe_hw *hw = &adapter->hw; struct sockaddr *addr = p; + int err; if (!is_valid_ether_addr(addr->sa_data)) return -EADDRNOTAVAIL; - ether_addr_copy(netdev->dev_addr, addr->sa_data); - ether_addr_copy(hw->mac.addr, addr->sa_data); - spin_lock_bh(&adapter->mbx_lock); - hw->mac.ops.set_rar(hw, 0, hw->mac.addr, 0); + err = hw->mac.ops.set_rar(hw, 0, addr->sa_data, 0); spin_unlock_bh(&adapter->mbx_lock); + if (err) + return -EPERM; + + ether_addr_copy(hw->mac.addr, addr->sa_data); + ether_addr_copy(netdev->dev_addr, addr->sa_data); + return 0; } diff --git a/drivers/net/ethernet/intel/ixgbevf/vf.c b/drivers/net/ethernet/intel/ixgbevf/vf.c index 61a98f4c5..4d613a4f2 100644 --- a/drivers/net/ethernet/intel/ixgbevf/vf.c +++ b/drivers/net/ethernet/intel/ixgbevf/vf.c @@ -408,8 +408,10 @@ static s32 ixgbevf_set_rar_vf(struct ixgbe_hw *hw, u32 index, u8 *addr, /* if nacked the address was rejected, use "perm_addr" */ if (!ret_val && - (msgbuf[0] == (IXGBE_VF_SET_MAC_ADDR | IXGBE_VT_MSGTYPE_NACK))) + (msgbuf[0] == (IXGBE_VF_SET_MAC_ADDR | IXGBE_VT_MSGTYPE_NACK))) { ixgbevf_get_mac_addr_vf(hw, hw->mac.addr); + return IXGBE_ERR_MBX; + } return ret_val; } -- cgit v1.2.3-54-g00ecf