From d635711daa98be86d4c7fd01499c34f566b54ccb Mon Sep 17 00:00:00 2001 From: André Fabian Silva Delgado Date: Fri, 10 Jun 2016 05:30:17 -0300 Subject: Linux-libre 4.6.2-gnu --- drivers/net/ethernet/intel/i40e/i40e_txrx.c | 878 +++++++++++++++------------- 1 file changed, 477 insertions(+), 401 deletions(-) (limited to 'drivers/net/ethernet/intel/i40e/i40e_txrx.c') diff --git a/drivers/net/ethernet/intel/i40e/i40e_txrx.c b/drivers/net/ethernet/intel/i40e/i40e_txrx.c index 47bd8b314..6a49b7ae5 100644 --- a/drivers/net/ethernet/intel/i40e/i40e_txrx.c +++ b/drivers/net/ethernet/intel/i40e/i40e_txrx.c @@ -1,7 +1,7 @@ /******************************************************************************* * * Intel Ethernet Controller XL710 Family Linux Driver - * Copyright(c) 2013 - 2014 Intel Corporation. + * Copyright(c) 2013 - 2016 Intel Corporation. * * This program is free software; you can redistribute it and/or modify it * under the terms and conditions of the GNU General Public License, @@ -610,15 +610,19 @@ void i40e_free_tx_resources(struct i40e_ring *tx_ring) /** * i40e_get_tx_pending - how many tx descriptors not processed * @tx_ring: the ring of descriptors + * @in_sw: is tx_pending being checked in SW or HW * * Since there is no access to the ring head register * in XL710, we need to use our local copies **/ -u32 i40e_get_tx_pending(struct i40e_ring *ring) +u32 i40e_get_tx_pending(struct i40e_ring *ring, bool in_sw) { u32 head, tail; - head = i40e_get_head(ring); + if (!in_sw) + head = i40e_get_head(ring); + else + head = ring->next_to_clean; tail = readl(ring->tail); if (head != tail) @@ -741,7 +745,7 @@ static bool i40e_clean_tx_irq(struct i40e_ring *tx_ring, int budget) * them to be written back in case we stay in NAPI. * In this mode on X722 we do not enable Interrupt. */ - j = i40e_get_tx_pending(tx_ring); + j = i40e_get_tx_pending(tx_ring, false); if (budget && ((j / (WB_STRIDE + 1)) == 0) && (j != 0) && @@ -774,29 +778,48 @@ static bool i40e_clean_tx_irq(struct i40e_ring *tx_ring, int budget) } /** - * i40e_force_wb - Arm hardware to do a wb on noncache aligned descriptors + * i40e_enable_wb_on_itr - Arm hardware to do a wb, interrupts are not enabled * @vsi: the VSI we care about - * @q_vector: the vector on which to force writeback + * @q_vector: the vector on which to enable writeback * **/ -void i40e_force_wb(struct i40e_vsi *vsi, struct i40e_q_vector *q_vector) +static void i40e_enable_wb_on_itr(struct i40e_vsi *vsi, + struct i40e_q_vector *q_vector) { u16 flags = q_vector->tx.ring[0].flags; + u32 val; - if (flags & I40E_TXR_FLAGS_WB_ON_ITR) { - u32 val; + if (!(flags & I40E_TXR_FLAGS_WB_ON_ITR)) + return; - if (q_vector->arm_wb_state) - return; + if (q_vector->arm_wb_state) + return; - val = I40E_PFINT_DYN_CTLN_WB_ON_ITR_MASK; + if (vsi->back->flags & I40E_FLAG_MSIX_ENABLED) { + val = I40E_PFINT_DYN_CTLN_WB_ON_ITR_MASK | + I40E_PFINT_DYN_CTLN_ITR_INDX_MASK; /* set noitr */ wr32(&vsi->back->hw, - I40E_PFINT_DYN_CTLN(q_vector->v_idx + - vsi->base_vector - 1), + I40E_PFINT_DYN_CTLN(q_vector->v_idx + vsi->base_vector - 1), val); - q_vector->arm_wb_state = true; - } else if (vsi->back->flags & I40E_FLAG_MSIX_ENABLED) { + } else { + val = I40E_PFINT_DYN_CTL0_WB_ON_ITR_MASK | + I40E_PFINT_DYN_CTL0_ITR_INDX_MASK; /* set noitr */ + + wr32(&vsi->back->hw, I40E_PFINT_DYN_CTL0, val); + } + q_vector->arm_wb_state = true; +} + +/** + * i40e_force_wb - Issue SW Interrupt so HW does a wb + * @vsi: the VSI we care about + * @q_vector: the vector on which to force writeback + * + **/ +void i40e_force_wb(struct i40e_vsi *vsi, struct i40e_q_vector *q_vector) +{ + if (vsi->back->flags & I40E_FLAG_MSIX_ENABLED) { u32 val = I40E_PFINT_DYN_CTLN_INTENA_MASK | I40E_PFINT_DYN_CTLN_ITR_INDX_MASK | /* set noitr */ I40E_PFINT_DYN_CTLN_SWINT_TRIG_MASK | @@ -1041,7 +1064,7 @@ void i40e_clean_rx_ring(struct i40e_ring *rx_ring) if (rx_bi->page_dma) { dma_unmap_page(dev, rx_bi->page_dma, - PAGE_SIZE / 2, + PAGE_SIZE, DMA_FROM_DEVICE); rx_bi->page_dma = 0; } @@ -1176,16 +1199,19 @@ static inline void i40e_release_rx_desc(struct i40e_ring *rx_ring, u32 val) * i40e_alloc_rx_buffers_ps - Replace used receive buffers; packet split * @rx_ring: ring to place buffers on * @cleaned_count: number of buffers to replace + * + * Returns true if any errors on allocation **/ -void i40e_alloc_rx_buffers_ps(struct i40e_ring *rx_ring, u16 cleaned_count) +bool i40e_alloc_rx_buffers_ps(struct i40e_ring *rx_ring, u16 cleaned_count) { u16 i = rx_ring->next_to_use; union i40e_rx_desc *rx_desc; struct i40e_rx_buffer *bi; + const int current_node = numa_node_id(); /* do nothing if no valid netdev defined */ if (!rx_ring->netdev || !cleaned_count) - return; + return false; while (cleaned_count--) { rx_desc = I40E_RX_DESC(rx_ring, i); @@ -1193,56 +1219,79 @@ void i40e_alloc_rx_buffers_ps(struct i40e_ring *rx_ring, u16 cleaned_count) if (bi->skb) /* desc is in use */ goto no_buffers; + + /* If we've been moved to a different NUMA node, release the + * page so we can get a new one on the current node. + */ + if (bi->page && page_to_nid(bi->page) != current_node) { + dma_unmap_page(rx_ring->dev, + bi->page_dma, + PAGE_SIZE, + DMA_FROM_DEVICE); + __free_page(bi->page); + bi->page = NULL; + bi->page_dma = 0; + rx_ring->rx_stats.realloc_count++; + } else if (bi->page) { + rx_ring->rx_stats.page_reuse_count++; + } + if (!bi->page) { bi->page = alloc_page(GFP_ATOMIC); if (!bi->page) { rx_ring->rx_stats.alloc_page_failed++; goto no_buffers; } - } - - if (!bi->page_dma) { - /* use a half page if we're re-using */ - bi->page_offset ^= PAGE_SIZE / 2; bi->page_dma = dma_map_page(rx_ring->dev, bi->page, - bi->page_offset, - PAGE_SIZE / 2, + 0, + PAGE_SIZE, DMA_FROM_DEVICE); - if (dma_mapping_error(rx_ring->dev, - bi->page_dma)) { + if (dma_mapping_error(rx_ring->dev, bi->page_dma)) { rx_ring->rx_stats.alloc_page_failed++; + __free_page(bi->page); + bi->page = NULL; bi->page_dma = 0; + bi->page_offset = 0; goto no_buffers; } + bi->page_offset = 0; } - dma_sync_single_range_for_device(rx_ring->dev, - bi->dma, - 0, - rx_ring->rx_hdr_len, - DMA_FROM_DEVICE); /* Refresh the desc even if buffer_addrs didn't change * because each write-back erases this info. */ - rx_desc->read.pkt_addr = cpu_to_le64(bi->page_dma); + rx_desc->read.pkt_addr = + cpu_to_le64(bi->page_dma + bi->page_offset); rx_desc->read.hdr_addr = cpu_to_le64(bi->dma); i++; if (i == rx_ring->count) i = 0; } + if (rx_ring->next_to_use != i) + i40e_release_rx_desc(rx_ring, i); + + return false; + no_buffers: if (rx_ring->next_to_use != i) i40e_release_rx_desc(rx_ring, i); + + /* make sure to come back via polling to try again after + * allocation failure + */ + return true; } /** * i40e_alloc_rx_buffers_1buf - Replace used receive buffers; single buffer * @rx_ring: ring to place buffers on * @cleaned_count: number of buffers to replace + * + * Returns true if any errors on allocation **/ -void i40e_alloc_rx_buffers_1buf(struct i40e_ring *rx_ring, u16 cleaned_count) +bool i40e_alloc_rx_buffers_1buf(struct i40e_ring *rx_ring, u16 cleaned_count) { u16 i = rx_ring->next_to_use; union i40e_rx_desc *rx_desc; @@ -1251,7 +1300,7 @@ void i40e_alloc_rx_buffers_1buf(struct i40e_ring *rx_ring, u16 cleaned_count) /* do nothing if no valid netdev defined */ if (!rx_ring->netdev || !cleaned_count) - return; + return false; while (cleaned_count--) { rx_desc = I40E_RX_DESC(rx_ring, i); @@ -1259,8 +1308,10 @@ void i40e_alloc_rx_buffers_1buf(struct i40e_ring *rx_ring, u16 cleaned_count) skb = bi->skb; if (!skb) { - skb = netdev_alloc_skb_ip_align(rx_ring->netdev, - rx_ring->rx_buf_len); + skb = __netdev_alloc_skb_ip_align(rx_ring->netdev, + rx_ring->rx_buf_len, + GFP_ATOMIC | + __GFP_NOWARN); if (!skb) { rx_ring->rx_stats.alloc_buff_failed++; goto no_buffers; @@ -1278,6 +1329,8 @@ void i40e_alloc_rx_buffers_1buf(struct i40e_ring *rx_ring, u16 cleaned_count) if (dma_mapping_error(rx_ring->dev, bi->dma)) { rx_ring->rx_stats.alloc_buff_failed++; bi->dma = 0; + dev_kfree_skb(bi->skb); + bi->skb = NULL; goto no_buffers; } } @@ -1289,9 +1342,19 @@ void i40e_alloc_rx_buffers_1buf(struct i40e_ring *rx_ring, u16 cleaned_count) i = 0; } + if (rx_ring->next_to_use != i) + i40e_release_rx_desc(rx_ring, i); + + return false; + no_buffers: if (rx_ring->next_to_use != i) i40e_release_rx_desc(rx_ring, i); + + /* make sure to come back via polling to try again after + * allocation failure + */ + return true; } /** @@ -1326,16 +1389,7 @@ static inline void i40e_rx_checksum(struct i40e_vsi *vsi, u16 rx_ptype) { struct i40e_rx_ptype_decoded decoded = decode_rx_desc_ptype(rx_ptype); - bool ipv4 = false, ipv6 = false; - bool ipv4_tunnel, ipv6_tunnel; - __wsum rx_udp_csum; - struct iphdr *iph; - __sum16 csum; - - ipv4_tunnel = (rx_ptype >= I40E_RX_PTYPE_GRENAT4_MAC_PAY3) && - (rx_ptype <= I40E_RX_PTYPE_GRENAT4_MACVLAN_IPV6_ICMP_PAY4); - ipv6_tunnel = (rx_ptype >= I40E_RX_PTYPE_GRENAT6_MAC_PAY3) && - (rx_ptype <= I40E_RX_PTYPE_GRENAT6_MACVLAN_IPV6_ICMP_PAY4); + bool ipv4, ipv6, ipv4_tunnel, ipv6_tunnel; skb->ip_summed = CHECKSUM_NONE; @@ -1351,12 +1405,10 @@ static inline void i40e_rx_checksum(struct i40e_vsi *vsi, if (!(decoded.known && decoded.outer_ip)) return; - if (decoded.outer_ip == I40E_RX_PTYPE_OUTER_IP && - decoded.outer_ip_ver == I40E_RX_PTYPE_OUTER_IPV4) - ipv4 = true; - else if (decoded.outer_ip == I40E_RX_PTYPE_OUTER_IP && - decoded.outer_ip_ver == I40E_RX_PTYPE_OUTER_IPV6) - ipv6 = true; + ipv4 = (decoded.outer_ip == I40E_RX_PTYPE_OUTER_IP) && + (decoded.outer_ip_ver == I40E_RX_PTYPE_OUTER_IPV4); + ipv6 = (decoded.outer_ip == I40E_RX_PTYPE_OUTER_IP) && + (decoded.outer_ip_ver == I40E_RX_PTYPE_OUTER_IPV6); if (ipv4 && (rx_error & (BIT(I40E_RX_DESC_ERROR_IPE_SHIFT) | @@ -1380,37 +1432,17 @@ static inline void i40e_rx_checksum(struct i40e_vsi *vsi, if (rx_error & BIT(I40E_RX_DESC_ERROR_PPRS_SHIFT)) return; - /* If VXLAN/GENEVE traffic has an outer UDPv4 checksum we need to check - * it in the driver, hardware does not do it for us. - * Since L3L4P bit was set we assume a valid IHL value (>=5) - * so the total length of IPv4 header is IHL*4 bytes - * The UDP_0 bit *may* bet set if the *inner* header is UDP + /* The hardware supported by this driver does not validate outer + * checksums for tunneled VXLAN or GENEVE frames. I don't agree + * with it but the specification states that you "MAY validate", it + * doesn't make it a hard requirement so if we have validated the + * inner checksum report CHECKSUM_UNNECESSARY. */ - if (!(vsi->back->flags & I40E_FLAG_OUTER_UDP_CSUM_CAPABLE) && - (ipv4_tunnel)) { - skb->transport_header = skb->mac_header + - sizeof(struct ethhdr) + - (ip_hdr(skb)->ihl * 4); - - /* Add 4 bytes for VLAN tagged packets */ - skb->transport_header += (skb->protocol == htons(ETH_P_8021Q) || - skb->protocol == htons(ETH_P_8021AD)) - ? VLAN_HLEN : 0; - - if ((ip_hdr(skb)->protocol == IPPROTO_UDP) && - (udp_hdr(skb)->check != 0)) { - rx_udp_csum = udp_csum(skb); - iph = ip_hdr(skb); - csum = csum_tcpudp_magic( - iph->saddr, iph->daddr, - (skb->len - skb_transport_offset(skb)), - IPPROTO_UDP, rx_udp_csum); - - if (udp_hdr(skb)->check != csum) - goto checksum_fail; - - } /* else its GRE and so no outer UDP header */ - } + + ipv4_tunnel = (rx_ptype >= I40E_RX_PTYPE_GRENAT4_MAC_PAY3) && + (rx_ptype <= I40E_RX_PTYPE_GRENAT4_MACVLAN_IPV6_ICMP_PAY4); + ipv6_tunnel = (rx_ptype >= I40E_RX_PTYPE_GRENAT6_MAC_PAY3) && + (rx_ptype <= I40E_RX_PTYPE_GRENAT6_MACVLAN_IPV6_ICMP_PAY4); skb->ip_summed = CHECKSUM_UNNECESSARY; skb->csum_level = ipv4_tunnel || ipv6_tunnel; @@ -1475,18 +1507,19 @@ static inline void i40e_rx_hash(struct i40e_ring *ring, * * Returns true if there's any budget left (e.g. the clean is finished) **/ -static int i40e_clean_rx_irq_ps(struct i40e_ring *rx_ring, int budget) +static int i40e_clean_rx_irq_ps(struct i40e_ring *rx_ring, const int budget) { unsigned int total_rx_bytes = 0, total_rx_packets = 0; u16 rx_packet_len, rx_header_len, rx_sph, rx_hbo; u16 cleaned_count = I40E_DESC_UNUSED(rx_ring); - const int current_node = numa_mem_id(); struct i40e_vsi *vsi = rx_ring->vsi; u16 i = rx_ring->next_to_clean; union i40e_rx_desc *rx_desc; u32 rx_error, rx_status; + bool failure = false; u8 rx_ptype; u64 qword; + u32 copysize; if (budget <= 0) return 0; @@ -1497,7 +1530,9 @@ static int i40e_clean_rx_irq_ps(struct i40e_ring *rx_ring, int budget) u16 vlan_tag; /* return some buffers to hardware, one at a time is too slow */ if (cleaned_count >= I40E_RX_BUFFER_WRITE) { - i40e_alloc_rx_buffers_ps(rx_ring, cleaned_count); + failure = failure || + i40e_alloc_rx_buffers_ps(rx_ring, + cleaned_count); cleaned_count = 0; } @@ -1515,6 +1550,12 @@ static int i40e_clean_rx_irq_ps(struct i40e_ring *rx_ring, int budget) * DD bit is set. */ dma_rmb(); + /* sync header buffer for reading */ + dma_sync_single_range_for_cpu(rx_ring->dev, + rx_ring->rx_bi[0].dma, + i * rx_ring->rx_hdr_len, + rx_ring->rx_hdr_len, + DMA_FROM_DEVICE); if (i40e_rx_is_programming_status(qword)) { i40e_clean_programming_status(rx_ring, rx_desc); I40E_RX_INCREMENT(rx_ring, i); @@ -1523,10 +1564,13 @@ static int i40e_clean_rx_irq_ps(struct i40e_ring *rx_ring, int budget) rx_bi = &rx_ring->rx_bi[i]; skb = rx_bi->skb; if (likely(!skb)) { - skb = netdev_alloc_skb_ip_align(rx_ring->netdev, - rx_ring->rx_hdr_len); + skb = __netdev_alloc_skb_ip_align(rx_ring->netdev, + rx_ring->rx_hdr_len, + GFP_ATOMIC | + __GFP_NOWARN); if (!skb) { rx_ring->rx_stats.alloc_buff_failed++; + failure = true; break; } @@ -1534,8 +1578,8 @@ static int i40e_clean_rx_irq_ps(struct i40e_ring *rx_ring, int budget) skb_record_rx_queue(skb, rx_ring->queue_index); /* we are reusing so sync this buffer for CPU use */ dma_sync_single_range_for_cpu(rx_ring->dev, - rx_bi->dma, - 0, + rx_ring->rx_bi[0].dma, + i * rx_ring->rx_hdr_len, rx_ring->rx_hdr_len, DMA_FROM_DEVICE); } @@ -1553,9 +1597,16 @@ static int i40e_clean_rx_irq_ps(struct i40e_ring *rx_ring, int budget) rx_ptype = (qword & I40E_RXD_QW1_PTYPE_MASK) >> I40E_RXD_QW1_PTYPE_SHIFT; - prefetch(rx_bi->page); + /* sync half-page for reading */ + dma_sync_single_range_for_cpu(rx_ring->dev, + rx_bi->page_dma, + rx_bi->page_offset, + PAGE_SIZE / 2, + DMA_FROM_DEVICE); + prefetch(page_address(rx_bi->page) + rx_bi->page_offset); rx_bi->skb = NULL; cleaned_count++; + copysize = 0; if (rx_hbo || rx_sph) { int len; @@ -1566,38 +1617,50 @@ static int i40e_clean_rx_irq_ps(struct i40e_ring *rx_ring, int budget) memcpy(__skb_put(skb, len), rx_bi->hdr_buf, len); } else if (skb->len == 0) { int len; + unsigned char *va = page_address(rx_bi->page) + + rx_bi->page_offset; - len = (rx_packet_len > skb_headlen(skb) ? - skb_headlen(skb) : rx_packet_len); - memcpy(__skb_put(skb, len), - rx_bi->page + rx_bi->page_offset, - len); - rx_bi->page_offset += len; + len = min(rx_packet_len, rx_ring->rx_hdr_len); + memcpy(__skb_put(skb, len), va, len); + copysize = len; rx_packet_len -= len; } - /* Get the rest of the data if this was a header split */ if (rx_packet_len) { - skb_fill_page_desc(skb, skb_shinfo(skb)->nr_frags, - rx_bi->page, - rx_bi->page_offset, - rx_packet_len); - - skb->len += rx_packet_len; - skb->data_len += rx_packet_len; - skb->truesize += rx_packet_len; - - if ((page_count(rx_bi->page) == 1) && - (page_to_nid(rx_bi->page) == current_node)) - get_page(rx_bi->page); - else + skb_add_rx_frag(skb, skb_shinfo(skb)->nr_frags, + rx_bi->page, + rx_bi->page_offset + copysize, + rx_packet_len, I40E_RXBUFFER_2048); + + /* If the page count is more than 2, then both halves + * of the page are used and we need to free it. Do it + * here instead of in the alloc code. Otherwise one + * of the half-pages might be released between now and + * then, and we wouldn't know which one to use. + * Don't call get_page and free_page since those are + * both expensive atomic operations that just change + * the refcount in opposite directions. Just give the + * page to the stack; he can have our refcount. + */ + if (page_count(rx_bi->page) > 2) { + dma_unmap_page(rx_ring->dev, + rx_bi->page_dma, + PAGE_SIZE, + DMA_FROM_DEVICE); rx_bi->page = NULL; + rx_bi->page_dma = 0; + rx_ring->rx_stats.realloc_count++; + } else { + get_page(rx_bi->page); + /* switch to the other half-page here; the + * allocation code programs the right addr + * into HW. If we haven't used this half-page, + * the address won't be changed, and HW can + * just use it next time through. + */ + rx_bi->page_offset ^= PAGE_SIZE / 2; + } - dma_unmap_page(rx_ring->dev, - rx_bi->page_dma, - PAGE_SIZE / 2, - DMA_FROM_DEVICE); - rx_bi->page_dma = 0; } I40E_RX_INCREMENT(rx_ring, i); @@ -1656,7 +1719,7 @@ static int i40e_clean_rx_irq_ps(struct i40e_ring *rx_ring, int budget) rx_ring->q_vector->rx.total_packets += total_rx_packets; rx_ring->q_vector->rx.total_bytes += total_rx_bytes; - return total_rx_packets; + return failure ? budget : total_rx_packets; } /** @@ -1674,6 +1737,7 @@ static int i40e_clean_rx_irq_1buf(struct i40e_ring *rx_ring, int budget) union i40e_rx_desc *rx_desc; u32 rx_error, rx_status; u16 rx_packet_len; + bool failure = false; u8 rx_ptype; u64 qword; u16 i; @@ -1684,7 +1748,9 @@ static int i40e_clean_rx_irq_1buf(struct i40e_ring *rx_ring, int budget) u16 vlan_tag; /* return some buffers to hardware, one at a time is too slow */ if (cleaned_count >= I40E_RX_BUFFER_WRITE) { - i40e_alloc_rx_buffers_1buf(rx_ring, cleaned_count); + failure = failure || + i40e_alloc_rx_buffers_1buf(rx_ring, + cleaned_count); cleaned_count = 0; } @@ -1783,7 +1849,7 @@ static int i40e_clean_rx_irq_1buf(struct i40e_ring *rx_ring, int budget) rx_ring->q_vector->rx.total_packets += total_rx_packets; rx_ring->q_vector->rx.total_bytes += total_rx_bytes; - return total_rx_packets; + return failure ? budget : total_rx_packets; } static u32 i40e_buildreg_itr(const int type, const u16 itr) @@ -1791,7 +1857,9 @@ static u32 i40e_buildreg_itr(const int type, const u16 itr) u32 val; val = I40E_PFINT_DYN_CTLN_INTENA_MASK | - I40E_PFINT_DYN_CTLN_CLEARPBA_MASK | + /* Don't clear PBA because that can cause lost interrupts that + * came in while we were cleaning/polling + */ (type << I40E_PFINT_DYN_CTLN_ITR_INDX_SHIFT) | (itr << I40E_PFINT_DYN_CTLN_INTERVAL_SHIFT); @@ -1814,6 +1882,7 @@ static inline void i40e_update_enable_itr(struct i40e_vsi *vsi, bool rx = false, tx = false; u32 rxval, txval; int vector; + int idx = q_vector->v_idx; vector = (q_vector->v_idx + vsi->base_vector); @@ -1823,17 +1892,17 @@ static inline void i40e_update_enable_itr(struct i40e_vsi *vsi, rxval = txval = i40e_buildreg_itr(I40E_ITR_NONE, 0); if (q_vector->itr_countdown > 0 || - (!ITR_IS_DYNAMIC(vsi->rx_itr_setting) && - !ITR_IS_DYNAMIC(vsi->tx_itr_setting))) { + (!ITR_IS_DYNAMIC(vsi->rx_rings[idx]->rx_itr_setting) && + !ITR_IS_DYNAMIC(vsi->tx_rings[idx]->tx_itr_setting))) { goto enable_int; } - if (ITR_IS_DYNAMIC(vsi->rx_itr_setting)) { + if (ITR_IS_DYNAMIC(vsi->rx_rings[idx]->rx_itr_setting)) { rx = i40e_set_new_dynamic_itr(&q_vector->rx); rxval = i40e_buildreg_itr(I40E_RX_ITR, q_vector->rx.itr); } - if (ITR_IS_DYNAMIC(vsi->tx_itr_setting)) { + if (ITR_IS_DYNAMIC(vsi->tx_rings[idx]->tx_itr_setting)) { tx = i40e_set_new_dynamic_itr(&q_vector->tx); txval = i40e_buildreg_itr(I40E_TX_ITR, q_vector->tx.itr); } @@ -1906,7 +1975,8 @@ int i40e_napi_poll(struct napi_struct *napi, int budget) * budget and be more aggressive about cleaning up the Tx descriptors. */ i40e_for_each_ring(ring, q_vector->tx) { - clean_complete &= i40e_clean_tx_irq(ring, vsi->work_limit); + clean_complete = clean_complete && + i40e_clean_tx_irq(ring, vsi->work_limit); arm_wb = arm_wb || ring->arm_wb; ring->arm_wb = false; } @@ -1930,7 +2000,7 @@ int i40e_napi_poll(struct napi_struct *napi, int budget) work_done += cleaned; /* if we didn't clean as many as budgeted, we must be done */ - clean_complete &= (budget_per_ring != cleaned); + clean_complete = clean_complete && (budget_per_ring > cleaned); } /* If work not completed, return budget and polling will return */ @@ -1938,7 +2008,7 @@ int i40e_napi_poll(struct napi_struct *napi, int budget) tx_only: if (arm_wb) { q_vector->tx.ring[0].tx_stats.tx_force_wb++; - i40e_force_wb(vsi, q_vector); + i40e_enable_wb_on_itr(vsi, q_vector); } return budget; } @@ -1951,20 +2021,7 @@ tx_only: if (vsi->back->flags & I40E_FLAG_MSIX_ENABLED) { i40e_update_enable_itr(vsi, q_vector); } else { /* Legacy mode */ - struct i40e_hw *hw = &vsi->back->hw; - /* We re-enable the queue 0 cause, but - * don't worry about dynamic_enable - * because we left it on for the other - * possible interrupts during napi - */ - u32 qval = rd32(hw, I40E_QINT_RQCTL(0)) | - I40E_QINT_RQCTL_CAUSE_ENA_MASK; - - wr32(hw, I40E_QINT_RQCTL(0), qval); - qval = rd32(hw, I40E_QINT_TQCTL(0)) | - I40E_QINT_TQCTL_CAUSE_ENA_MASK; - wr32(hw, I40E_QINT_TQCTL(0), qval); - i40e_irq_dynamic_enable_icr0(vsi->back); + i40e_irq_dynamic_enable_icr0(vsi->back, false); } return 0; } @@ -1974,10 +2031,9 @@ tx_only: * @tx_ring: ring to add programming descriptor to * @skb: send buffer * @tx_flags: send tx flags - * @protocol: wire protocol **/ static void i40e_atr(struct i40e_ring *tx_ring, struct sk_buff *skb, - u32 tx_flags, __be16 protocol) + u32 tx_flags) { struct i40e_filter_program_desc *fdir_desc; struct i40e_pf *pf = tx_ring->vsi->back; @@ -1989,6 +2045,7 @@ static void i40e_atr(struct i40e_ring *tx_ring, struct sk_buff *skb, struct tcphdr *th; unsigned int hlen; u32 flex_ptype, dtype_cmd; + int l4_proto; u16 i; /* make sure ATR is enabled */ @@ -2002,36 +2059,28 @@ static void i40e_atr(struct i40e_ring *tx_ring, struct sk_buff *skb, if (!tx_ring->atr_sample_rate) return; + /* Currently only IPv4/IPv6 with TCP is supported */ if (!(tx_flags & (I40E_TX_FLAGS_IPV4 | I40E_TX_FLAGS_IPV6))) return; - if (!(tx_flags & I40E_TX_FLAGS_UDP_TUNNEL)) { - /* snag network header to get L4 type and address */ - hdr.network = skb_network_header(skb); + /* snag network header to get L4 type and address */ + hdr.network = (tx_flags & I40E_TX_FLAGS_UDP_TUNNEL) ? + skb_inner_network_header(skb) : skb_network_header(skb); - /* Currently only IPv4/IPv6 with TCP is supported - * access ihl as u8 to avoid unaligned access on ia64 - */ - if (tx_flags & I40E_TX_FLAGS_IPV4) - hlen = (hdr.network[0] & 0x0F) << 2; - else if (protocol == htons(ETH_P_IPV6)) - hlen = sizeof(struct ipv6hdr); - else - return; + /* Note: tx_flags gets modified to reflect inner protocols in + * tx_enable_csum function if encap is enabled. + */ + if (tx_flags & I40E_TX_FLAGS_IPV4) { + /* access ihl as u8 to avoid unaligned access on ia64 */ + hlen = (hdr.network[0] & 0x0F) << 2; + l4_proto = hdr.ipv4->protocol; } else { - hdr.network = skb_inner_network_header(skb); - hlen = skb_inner_network_header_len(skb); + hlen = hdr.network - skb->data; + l4_proto = ipv6_find_hdr(skb, &hlen, IPPROTO_TCP, NULL, NULL); + hlen -= hdr.network - skb->data; } - /* Currently only IPv4/IPv6 with TCP is supported - * Note: tx_flags gets modified to reflect inner protocols in - * tx_enable_csum function if encap is enabled. - */ - if ((tx_flags & I40E_TX_FLAGS_IPV4) && - (hdr.ipv4->protocol != IPPROTO_TCP)) - return; - else if ((tx_flags & I40E_TX_FLAGS_IPV6) && - (hdr.ipv6->nexthdr != IPPROTO_TCP)) + if (l4_proto != IPPROTO_TCP) return; th = (struct tcphdr *)(hdr.network + hlen); @@ -2039,7 +2088,8 @@ static void i40e_atr(struct i40e_ring *tx_ring, struct sk_buff *skb, /* Due to lack of space, no more new filters can be programmed */ if (th->syn && (pf->auto_disable_flags & I40E_FLAG_FD_ATR_ENABLED)) return; - if (pf->flags & I40E_FLAG_HW_ATR_EVICT_CAPABLE) { + if ((pf->flags & I40E_FLAG_HW_ATR_EVICT_CAPABLE) && + (!(pf->auto_disable_flags & I40E_FLAG_HW_ATR_EVICT_CAPABLE))) { /* HW ATR eviction will take care of removing filters on FIN * and RST packets. */ @@ -2067,7 +2117,7 @@ static void i40e_atr(struct i40e_ring *tx_ring, struct sk_buff *skb, flex_ptype = (tx_ring->queue_index << I40E_TXD_FLTR_QW0_QINDEX_SHIFT) & I40E_TXD_FLTR_QW0_QINDEX_MASK; - flex_ptype |= (protocol == htons(ETH_P_IP)) ? + flex_ptype |= (tx_flags & I40E_TX_FLAGS_IPV4) ? (I40E_FILTER_PCTYPE_NONF_IPV4_TCP << I40E_TXD_FLTR_QW0_PCTYPE_SHIFT) : (I40E_FILTER_PCTYPE_NONF_IPV6_TCP << @@ -2101,7 +2151,8 @@ static void i40e_atr(struct i40e_ring *tx_ring, struct sk_buff *skb, I40E_TXD_FLTR_QW1_CNTINDEX_SHIFT) & I40E_TXD_FLTR_QW1_CNTINDEX_MASK; - if (pf->flags & I40E_FLAG_HW_ATR_EVICT_CAPABLE) + if ((pf->flags & I40E_FLAG_HW_ATR_EVICT_CAPABLE) && + (!(pf->auto_disable_flags & I40E_FLAG_HW_ATR_EVICT_CAPABLE))) dtype_cmd |= I40E_TXD_FLTR_QW1_ATR_MASK; fdir_desc->qindex_flex_ptype_vsi = cpu_to_le32(flex_ptype); @@ -2206,13 +2257,23 @@ out: static int i40e_tso(struct i40e_ring *tx_ring, struct sk_buff *skb, u8 *hdr_len, u64 *cd_type_cmd_tso_mss) { - u32 cd_cmd, cd_tso_len, cd_mss; - struct ipv6hdr *ipv6h; - struct tcphdr *tcph; - struct iphdr *iph; - u32 l4len; + u64 cd_cmd, cd_tso_len, cd_mss; + union { + struct iphdr *v4; + struct ipv6hdr *v6; + unsigned char *hdr; + } ip; + union { + struct tcphdr *tcp; + struct udphdr *udp; + unsigned char *hdr; + } l4; + u32 paylen, l4_offset; int err; + if (skb->ip_summed != CHECKSUM_PARTIAL) + return 0; + if (!skb_is_gso(skb)) return 0; @@ -2220,35 +2281,60 @@ static int i40e_tso(struct i40e_ring *tx_ring, struct sk_buff *skb, if (err < 0) return err; - iph = skb->encapsulation ? inner_ip_hdr(skb) : ip_hdr(skb); - ipv6h = skb->encapsulation ? inner_ipv6_hdr(skb) : ipv6_hdr(skb); - - if (iph->version == 4) { - tcph = skb->encapsulation ? inner_tcp_hdr(skb) : tcp_hdr(skb); - iph->tot_len = 0; - iph->check = 0; - tcph->check = ~csum_tcpudp_magic(iph->saddr, iph->daddr, - 0, IPPROTO_TCP, 0); - } else if (ipv6h->version == 6) { - tcph = skb->encapsulation ? inner_tcp_hdr(skb) : tcp_hdr(skb); - ipv6h->payload_len = 0; - tcph->check = ~csum_ipv6_magic(&ipv6h->saddr, &ipv6h->daddr, - 0, IPPROTO_TCP, 0); + ip.hdr = skb_network_header(skb); + l4.hdr = skb_transport_header(skb); + + /* initialize outer IP header fields */ + if (ip.v4->version == 4) { + ip.v4->tot_len = 0; + ip.v4->check = 0; + } else { + ip.v6->payload_len = 0; } - l4len = skb->encapsulation ? inner_tcp_hdrlen(skb) : tcp_hdrlen(skb); - *hdr_len = (skb->encapsulation - ? (skb_inner_transport_header(skb) - skb->data) - : skb_transport_offset(skb)) + l4len; + if (skb_shinfo(skb)->gso_type & (SKB_GSO_UDP_TUNNEL | SKB_GSO_GRE | + SKB_GSO_UDP_TUNNEL_CSUM)) { + if (skb_shinfo(skb)->gso_type & SKB_GSO_UDP_TUNNEL_CSUM) { + /* determine offset of outer transport header */ + l4_offset = l4.hdr - skb->data; + + /* remove payload length from outer checksum */ + paylen = (__force u16)l4.udp->check; + paylen += ntohs(1) * (u16)~(skb->len - l4_offset); + l4.udp->check = ~csum_fold((__force __wsum)paylen); + } + + /* reset pointers to inner headers */ + ip.hdr = skb_inner_network_header(skb); + l4.hdr = skb_inner_transport_header(skb); + + /* initialize inner IP header fields */ + if (ip.v4->version == 4) { + ip.v4->tot_len = 0; + ip.v4->check = 0; + } else { + ip.v6->payload_len = 0; + } + } + + /* determine offset of inner transport header */ + l4_offset = l4.hdr - skb->data; + + /* remove payload length from inner checksum */ + paylen = (__force u16)l4.tcp->check; + paylen += ntohs(1) * (u16)~(skb->len - l4_offset); + l4.tcp->check = ~csum_fold((__force __wsum)paylen); + + /* compute length of segmentation header */ + *hdr_len = (l4.tcp->doff * 4) + l4_offset; /* find the field values */ cd_cmd = I40E_TX_CTX_DESC_TSO; cd_tso_len = skb->len - *hdr_len; cd_mss = skb_shinfo(skb)->gso_size; - *cd_type_cmd_tso_mss |= ((u64)cd_cmd << I40E_TXD_CTX_QW1_CMD_SHIFT) | - ((u64)cd_tso_len << - I40E_TXD_CTX_QW1_TSO_LEN_SHIFT) | - ((u64)cd_mss << I40E_TXD_CTX_QW1_MSS_SHIFT); + *cd_type_cmd_tso_mss |= (cd_cmd << I40E_TXD_CTX_QW1_CMD_SHIFT) | + (cd_tso_len << I40E_TXD_CTX_QW1_TSO_LEN_SHIFT) | + (cd_mss << I40E_TXD_CTX_QW1_MSS_SHIFT); return 1; } @@ -2303,129 +2389,154 @@ static int i40e_tsyn(struct i40e_ring *tx_ring, struct sk_buff *skb, * @tx_ring: Tx descriptor ring * @cd_tunneling: ptr to context desc bits **/ -static void i40e_tx_enable_csum(struct sk_buff *skb, u32 *tx_flags, - u32 *td_cmd, u32 *td_offset, - struct i40e_ring *tx_ring, - u32 *cd_tunneling) +static int i40e_tx_enable_csum(struct sk_buff *skb, u32 *tx_flags, + u32 *td_cmd, u32 *td_offset, + struct i40e_ring *tx_ring, + u32 *cd_tunneling) { - struct ipv6hdr *this_ipv6_hdr; - unsigned int this_tcp_hdrlen; - struct iphdr *this_ip_hdr; - u32 network_hdr_len; - u8 l4_hdr = 0; - struct udphdr *oudph = NULL; - struct iphdr *oiph = NULL; - u32 l4_tunnel = 0; + union { + struct iphdr *v4; + struct ipv6hdr *v6; + unsigned char *hdr; + } ip; + union { + struct tcphdr *tcp; + struct udphdr *udp; + unsigned char *hdr; + } l4; + unsigned char *exthdr; + u32 offset, cmd = 0, tunnel = 0; + __be16 frag_off; + u8 l4_proto = 0; + + if (skb->ip_summed != CHECKSUM_PARTIAL) + return 0; + + ip.hdr = skb_network_header(skb); + l4.hdr = skb_transport_header(skb); + + /* compute outer L2 header size */ + offset = ((ip.hdr - skb->data) / 2) << I40E_TX_DESC_LENGTH_MACLEN_SHIFT; if (skb->encapsulation) { - switch (ip_hdr(skb)->protocol) { + /* define outer network header type */ + if (*tx_flags & I40E_TX_FLAGS_IPV4) { + tunnel |= (*tx_flags & I40E_TX_FLAGS_TSO) ? + I40E_TX_CTX_EXT_IP_IPV4 : + I40E_TX_CTX_EXT_IP_IPV4_NO_CSUM; + + l4_proto = ip.v4->protocol; + } else if (*tx_flags & I40E_TX_FLAGS_IPV6) { + tunnel |= I40E_TX_CTX_EXT_IP_IPV6; + + exthdr = ip.hdr + sizeof(*ip.v6); + l4_proto = ip.v6->nexthdr; + if (l4.hdr != exthdr) + ipv6_skip_exthdr(skb, exthdr - skb->data, + &l4_proto, &frag_off); + } + + /* compute outer L3 header size */ + tunnel |= ((l4.hdr - ip.hdr) / 4) << + I40E_TXD_CTX_QW0_EXT_IPLEN_SHIFT; + + /* switch IP header pointer from outer to inner header */ + ip.hdr = skb_inner_network_header(skb); + + /* define outer transport */ + switch (l4_proto) { case IPPROTO_UDP: - oudph = udp_hdr(skb); - oiph = ip_hdr(skb); - l4_tunnel = I40E_TXD_CTX_UDP_TUNNELING; + tunnel |= I40E_TXD_CTX_UDP_TUNNELING; *tx_flags |= I40E_TX_FLAGS_UDP_TUNNEL; break; case IPPROTO_GRE: - l4_tunnel = I40E_TXD_CTX_GRE_TUNNELING; + tunnel |= I40E_TXD_CTX_GRE_TUNNELING; + *tx_flags |= I40E_TX_FLAGS_UDP_TUNNEL; break; default: - return; - } - network_hdr_len = skb_inner_network_header_len(skb); - this_ip_hdr = inner_ip_hdr(skb); - this_ipv6_hdr = inner_ipv6_hdr(skb); - this_tcp_hdrlen = inner_tcp_hdrlen(skb); - - if (*tx_flags & I40E_TX_FLAGS_IPV4) { - if (*tx_flags & I40E_TX_FLAGS_TSO) { - *cd_tunneling |= I40E_TX_CTX_EXT_IP_IPV4; - ip_hdr(skb)->check = 0; - } else { - *cd_tunneling |= - I40E_TX_CTX_EXT_IP_IPV4_NO_CSUM; - } - } else if (*tx_flags & I40E_TX_FLAGS_IPV6) { - *cd_tunneling |= I40E_TX_CTX_EXT_IP_IPV6; if (*tx_flags & I40E_TX_FLAGS_TSO) - ip_hdr(skb)->check = 0; + return -1; + + skb_checksum_help(skb); + return 0; } - /* Now set the ctx descriptor fields */ - *cd_tunneling |= (skb_network_header_len(skb) >> 2) << - I40E_TXD_CTX_QW0_EXT_IPLEN_SHIFT | - l4_tunnel | - ((skb_inner_network_offset(skb) - - skb_transport_offset(skb)) >> 1) << - I40E_TXD_CTX_QW0_NATLEN_SHIFT; - if (this_ip_hdr->version == 6) { - *tx_flags &= ~I40E_TX_FLAGS_IPV4; + /* compute tunnel header size */ + tunnel |= ((ip.hdr - l4.hdr) / 2) << + I40E_TXD_CTX_QW0_NATLEN_SHIFT; + + /* indicate if we need to offload outer UDP header */ + if ((*tx_flags & I40E_TX_FLAGS_TSO) && + (skb_shinfo(skb)->gso_type & SKB_GSO_UDP_TUNNEL_CSUM)) + tunnel |= I40E_TXD_CTX_QW0_L4T_CS_MASK; + + /* record tunnel offload values */ + *cd_tunneling |= tunnel; + + /* switch L4 header pointer from outer to inner */ + l4.hdr = skb_inner_transport_header(skb); + l4_proto = 0; + + /* reset type as we transition from outer to inner headers */ + *tx_flags &= ~(I40E_TX_FLAGS_IPV4 | I40E_TX_FLAGS_IPV6); + if (ip.v4->version == 4) + *tx_flags |= I40E_TX_FLAGS_IPV4; + if (ip.v6->version == 6) *tx_flags |= I40E_TX_FLAGS_IPV6; - } - if ((tx_ring->flags & I40E_TXR_FLAGS_OUTER_UDP_CSUM) && - (l4_tunnel == I40E_TXD_CTX_UDP_TUNNELING) && - (*cd_tunneling & I40E_TXD_CTX_QW0_EXT_IP_MASK)) { - oudph->check = ~csum_tcpudp_magic(oiph->saddr, - oiph->daddr, - (skb->len - skb_transport_offset(skb)), - IPPROTO_UDP, 0); - *cd_tunneling |= I40E_TXD_CTX_QW0_L4T_CS_MASK; - } - } else { - network_hdr_len = skb_network_header_len(skb); - this_ip_hdr = ip_hdr(skb); - this_ipv6_hdr = ipv6_hdr(skb); - this_tcp_hdrlen = tcp_hdrlen(skb); } /* Enable IP checksum offloads */ if (*tx_flags & I40E_TX_FLAGS_IPV4) { - l4_hdr = this_ip_hdr->protocol; + l4_proto = ip.v4->protocol; /* the stack computes the IP header already, the only time we * need the hardware to recompute it is in the case of TSO. */ - if (*tx_flags & I40E_TX_FLAGS_TSO) { - *td_cmd |= I40E_TX_DESC_CMD_IIPT_IPV4_CSUM; - this_ip_hdr->check = 0; - } else { - *td_cmd |= I40E_TX_DESC_CMD_IIPT_IPV4; - } - /* Now set the td_offset for IP header length */ - *td_offset = (network_hdr_len >> 2) << - I40E_TX_DESC_LENGTH_IPLEN_SHIFT; + cmd |= (*tx_flags & I40E_TX_FLAGS_TSO) ? + I40E_TX_DESC_CMD_IIPT_IPV4_CSUM : + I40E_TX_DESC_CMD_IIPT_IPV4; } else if (*tx_flags & I40E_TX_FLAGS_IPV6) { - l4_hdr = this_ipv6_hdr->nexthdr; - *td_cmd |= I40E_TX_DESC_CMD_IIPT_IPV6; - /* Now set the td_offset for IP header length */ - *td_offset = (network_hdr_len >> 2) << - I40E_TX_DESC_LENGTH_IPLEN_SHIFT; + cmd |= I40E_TX_DESC_CMD_IIPT_IPV6; + + exthdr = ip.hdr + sizeof(*ip.v6); + l4_proto = ip.v6->nexthdr; + if (l4.hdr != exthdr) + ipv6_skip_exthdr(skb, exthdr - skb->data, + &l4_proto, &frag_off); } - /* words in MACLEN + dwords in IPLEN + dwords in L4Len */ - *td_offset |= (skb_network_offset(skb) >> 1) << - I40E_TX_DESC_LENGTH_MACLEN_SHIFT; + + /* compute inner L3 header size */ + offset |= ((l4.hdr - ip.hdr) / 4) << I40E_TX_DESC_LENGTH_IPLEN_SHIFT; /* Enable L4 checksum offloads */ - switch (l4_hdr) { + switch (l4_proto) { case IPPROTO_TCP: /* enable checksum offloads */ - *td_cmd |= I40E_TX_DESC_CMD_L4T_EOFT_TCP; - *td_offset |= (this_tcp_hdrlen >> 2) << - I40E_TX_DESC_LENGTH_L4_FC_LEN_SHIFT; + cmd |= I40E_TX_DESC_CMD_L4T_EOFT_TCP; + offset |= l4.tcp->doff << I40E_TX_DESC_LENGTH_L4_FC_LEN_SHIFT; break; case IPPROTO_SCTP: /* enable SCTP checksum offload */ - *td_cmd |= I40E_TX_DESC_CMD_L4T_EOFT_SCTP; - *td_offset |= (sizeof(struct sctphdr) >> 2) << - I40E_TX_DESC_LENGTH_L4_FC_LEN_SHIFT; + cmd |= I40E_TX_DESC_CMD_L4T_EOFT_SCTP; + offset |= (sizeof(struct sctphdr) >> 2) << + I40E_TX_DESC_LENGTH_L4_FC_LEN_SHIFT; break; case IPPROTO_UDP: /* enable UDP checksum offload */ - *td_cmd |= I40E_TX_DESC_CMD_L4T_EOFT_UDP; - *td_offset |= (sizeof(struct udphdr) >> 2) << - I40E_TX_DESC_LENGTH_L4_FC_LEN_SHIFT; + cmd |= I40E_TX_DESC_CMD_L4T_EOFT_UDP; + offset |= (sizeof(struct udphdr) >> 2) << + I40E_TX_DESC_LENGTH_L4_FC_LEN_SHIFT; break; default: - break; + if (*tx_flags & I40E_TX_FLAGS_TSO) + return -1; + skb_checksum_help(skb); + return 0; } + + *td_cmd |= cmd; + *td_offset |= offset; + + return 1; } /** @@ -2466,7 +2577,7 @@ static void i40e_create_tx_ctx(struct i40e_ring *tx_ring, * * Returns -EBUSY if a stop is needed, else 0 **/ -static inline int __i40e_maybe_stop_tx(struct i40e_ring *tx_ring, int size) +int __i40e_maybe_stop_tx(struct i40e_ring *tx_ring, int size) { netif_stop_subqueue(tx_ring->netdev, tx_ring->queue_index); /* Memory barrier before checking head and tail */ @@ -2483,77 +2594,70 @@ static inline int __i40e_maybe_stop_tx(struct i40e_ring *tx_ring, int size) } /** - * i40e_maybe_stop_tx - 1st level check for tx stop conditions - * @tx_ring: the ring to be checked - * @size: the size buffer we want to assure is available - * - * Returns 0 if stop is not needed - **/ -#ifdef I40E_FCOE -inline int i40e_maybe_stop_tx(struct i40e_ring *tx_ring, int size) -#else -static inline int i40e_maybe_stop_tx(struct i40e_ring *tx_ring, int size) -#endif -{ - if (likely(I40E_DESC_UNUSED(tx_ring) >= size)) - return 0; - return __i40e_maybe_stop_tx(tx_ring, size); -} - -/** - * i40e_chk_linearize - Check if there are more than 8 fragments per packet + * __i40e_chk_linearize - Check if there are more than 8 buffers per packet * @skb: send buffer - * @tx_flags: collected send information * - * Note: Our HW can't scatter-gather more than 8 fragments to build - * a packet on the wire and so we need to figure out the cases where we - * need to linearize the skb. + * Note: Our HW can't DMA more than 8 buffers to build a packet on the wire + * and so we need to figure out the cases where we need to linearize the skb. + * + * For TSO we need to count the TSO header and segment payload separately. + * As such we need to check cases where we have 7 fragments or more as we + * can potentially require 9 DMA transactions, 1 for the TSO header, 1 for + * the segment payload in the first descriptor, and another 7 for the + * fragments. **/ -static bool i40e_chk_linearize(struct sk_buff *skb, u32 tx_flags) +bool __i40e_chk_linearize(struct sk_buff *skb) { - struct skb_frag_struct *frag; - bool linearize = false; - unsigned int size = 0; - u16 num_frags; - u16 gso_segs; + const struct skb_frag_struct *frag, *stale; + int nr_frags, sum; - num_frags = skb_shinfo(skb)->nr_frags; - gso_segs = skb_shinfo(skb)->gso_segs; + /* no need to check if number of frags is less than 7 */ + nr_frags = skb_shinfo(skb)->nr_frags; + if (nr_frags < (I40E_MAX_BUFFER_TXD - 1)) + return false; - if (tx_flags & (I40E_TX_FLAGS_TSO | I40E_TX_FLAGS_FSO)) { - u16 j = 0; + /* We need to walk through the list and validate that each group + * of 6 fragments totals at least gso_size. However we don't need + * to perform such validation on the last 6 since the last 6 cannot + * inherit any data from a descriptor after them. + */ + nr_frags -= I40E_MAX_BUFFER_TXD - 2; + frag = &skb_shinfo(skb)->frags[0]; + + /* Initialize size to the negative value of gso_size minus 1. We + * use this as the worst case scenerio in which the frag ahead + * of us only provides one byte which is why we are limited to 6 + * descriptors for a single transmit as the header and previous + * fragment are already consuming 2 descriptors. + */ + sum = 1 - skb_shinfo(skb)->gso_size; - if (num_frags < (I40E_MAX_BUFFER_TXD)) - goto linearize_chk_done; - /* try the simple math, if we have too many frags per segment */ - if (DIV_ROUND_UP((num_frags + gso_segs), gso_segs) > - I40E_MAX_BUFFER_TXD) { - linearize = true; - goto linearize_chk_done; - } - frag = &skb_shinfo(skb)->frags[0]; - /* we might still have more fragments per segment */ - do { - size += skb_frag_size(frag); - frag++; j++; - if ((size >= skb_shinfo(skb)->gso_size) && - (j < I40E_MAX_BUFFER_TXD)) { - size = (size % skb_shinfo(skb)->gso_size); - j = (size) ? 1 : 0; - } - if (j == I40E_MAX_BUFFER_TXD) { - linearize = true; - break; - } - num_frags--; - } while (num_frags); - } else { - if (num_frags >= I40E_MAX_BUFFER_TXD) - linearize = true; + /* Add size of frags 0 through 4 to create our initial sum */ + sum += skb_frag_size(frag++); + sum += skb_frag_size(frag++); + sum += skb_frag_size(frag++); + sum += skb_frag_size(frag++); + sum += skb_frag_size(frag++); + + /* Walk through fragments adding latest fragment, testing it, and + * then removing stale fragments from the sum. + */ + stale = &skb_shinfo(skb)->frags[0]; + for (;;) { + sum += skb_frag_size(frag++); + + /* if sum is negative we failed to make sufficient progress */ + if (sum < 0) + return true; + + /* use pre-decrement to avoid processing last fragment */ + if (!--nr_frags) + break; + + sum -= skb_frag_size(stale++); } -linearize_chk_done: - return linearize; + return false; } /** @@ -2759,43 +2863,6 @@ dma_error: tx_ring->next_to_use = i; } -/** - * i40e_xmit_descriptor_count - calculate number of tx descriptors needed - * @skb: send buffer - * @tx_ring: ring to send buffer on - * - * Returns number of data descriptors needed for this skb. Returns 0 to indicate - * there is not enough descriptors available in this ring since we need at least - * one descriptor. - **/ -#ifdef I40E_FCOE -inline int i40e_xmit_descriptor_count(struct sk_buff *skb, - struct i40e_ring *tx_ring) -#else -static inline int i40e_xmit_descriptor_count(struct sk_buff *skb, - struct i40e_ring *tx_ring) -#endif -{ - unsigned int f; - int count = 0; - - /* need: 1 descriptor per page * PAGE_SIZE/I40E_MAX_DATA_PER_TXD, - * + 1 desc for skb_head_len/I40E_MAX_DATA_PER_TXD, - * + 4 desc gap to avoid the cache line where head is, - * + 1 desc for context descriptor, - * otherwise try next time - */ - for (f = 0; f < skb_shinfo(skb)->nr_frags; f++) - count += TXD_USE_COUNT(skb_shinfo(skb)->frags[f].size); - - count += TXD_USE_COUNT(skb_headlen(skb)); - if (i40e_maybe_stop_tx(tx_ring, count + 4 + 1)) { - tx_ring->tx_stats.tx_busy++; - return 0; - } - return count; -} - /** * i40e_xmit_frame_ring - Sends buffer on Tx ring * @skb: send buffer @@ -2814,14 +2881,30 @@ static netdev_tx_t i40e_xmit_frame_ring(struct sk_buff *skb, __be16 protocol; u32 td_cmd = 0; u8 hdr_len = 0; + int tso, count; int tsyn; - int tso; /* prefetch the data, we'll need it later */ prefetch(skb->data); - if (0 == i40e_xmit_descriptor_count(skb, tx_ring)) + count = i40e_xmit_descriptor_count(skb); + if (i40e_chk_linearize(skb, count)) { + if (__skb_linearize(skb)) + goto out_drop; + count = TXD_USE_COUNT(skb->len); + tx_ring->tx_stats.tx_linearize++; + } + + /* need: 1 descriptor per page * PAGE_SIZE/I40E_MAX_DATA_PER_TXD, + * + 1 desc for skb_head_len/I40E_MAX_DATA_PER_TXD, + * + 4 desc gap to avoid the cache line where head is, + * + 1 desc for context descriptor, + * otherwise try next time + */ + if (i40e_maybe_stop_tx(tx_ring, count + 4 + 1)) { + tx_ring->tx_stats.tx_busy++; return NETDEV_TX_BUSY; + } /* prepare the xmit flags */ if (i40e_tx_prepare_vlan_flags(skb, tx_ring, &tx_flags)) @@ -2846,29 +2929,22 @@ static netdev_tx_t i40e_xmit_frame_ring(struct sk_buff *skb, else if (tso) tx_flags |= I40E_TX_FLAGS_TSO; + /* Always offload the checksum, since it's in the data descriptor */ + tso = i40e_tx_enable_csum(skb, &tx_flags, &td_cmd, &td_offset, + tx_ring, &cd_tunneling); + if (tso < 0) + goto out_drop; + tsyn = i40e_tsyn(tx_ring, skb, tx_flags, &cd_type_cmd_tso_mss); if (tsyn) tx_flags |= I40E_TX_FLAGS_TSYN; - if (i40e_chk_linearize(skb, tx_flags)) { - if (skb_linearize(skb)) - goto out_drop; - tx_ring->tx_stats.tx_linearize++; - } skb_tx_timestamp(skb); /* always enable CRC insertion offload */ td_cmd |= I40E_TX_DESC_CMD_ICRC; - /* Always offload the checksum, since it's in the data descriptor */ - if (skb->ip_summed == CHECKSUM_PARTIAL) { - tx_flags |= I40E_TX_FLAGS_CSUM; - - i40e_tx_enable_csum(skb, &tx_flags, &td_cmd, &td_offset, - tx_ring, &cd_tunneling); - } - i40e_create_tx_ctx(tx_ring, cd_type_cmd_tso_mss, cd_tunneling, cd_l2tag2); @@ -2876,7 +2952,7 @@ static netdev_tx_t i40e_xmit_frame_ring(struct sk_buff *skb, * * NOTE: this must always be directly before the data descriptor. */ - i40e_atr(tx_ring, skb, tx_flags, protocol); + i40e_atr(tx_ring, skb, tx_flags); i40e_tx_map(tx_ring, skb, first, tx_flags, hdr_len, td_cmd, td_offset); -- cgit v1.2.3-54-g00ecf