summaryrefslogtreecommitdiff
path: root/drivers/net/ethernet/intel/i40evf/i40e_txrx.c
diff options
context:
space:
mode:
Diffstat (limited to 'drivers/net/ethernet/intel/i40evf/i40e_txrx.c')
-rw-r--r--drivers/net/ethernet/intel/i40evf/i40e_txrx.c199
1 files changed, 133 insertions, 66 deletions
diff --git a/drivers/net/ethernet/intel/i40evf/i40e_txrx.c b/drivers/net/ethernet/intel/i40evf/i40e_txrx.c
index 395f32f226c0..7e91d825c760 100644
--- a/drivers/net/ethernet/intel/i40evf/i40e_txrx.c
+++ b/drivers/net/ethernet/intel/i40evf/i40e_txrx.c
@@ -366,15 +366,32 @@ static bool i40e_clean_tx_irq(struct i40e_ring *tx_ring, int budget)
**/
static void i40e_force_wb(struct i40e_vsi *vsi, struct i40e_q_vector *q_vector)
{
- u32 val = I40E_VFINT_DYN_CTLN_INTENA_MASK |
- I40E_VFINT_DYN_CTLN1_ITR_INDX_MASK | /* set noitr */
- I40E_VFINT_DYN_CTLN_SWINT_TRIG_MASK |
- I40E_VFINT_DYN_CTLN_SW_ITR_INDX_ENA_MASK;
- /* allow 00 to be written to the index */
-
- wr32(&vsi->back->hw,
- I40E_VFINT_DYN_CTLN1(q_vector->v_idx + vsi->base_vector - 1),
- val);
+ u16 flags = q_vector->tx.ring[0].flags;
+
+ if (flags & I40E_TXR_FLAGS_WB_ON_ITR) {
+ u32 val;
+
+ if (q_vector->arm_wb_state)
+ return;
+
+ val = I40E_VFINT_DYN_CTLN1_WB_ON_ITR_MASK;
+
+ wr32(&vsi->back->hw,
+ I40E_VFINT_DYN_CTLN1(q_vector->v_idx +
+ vsi->base_vector - 1),
+ val);
+ q_vector->arm_wb_state = true;
+ } else {
+ u32 val = I40E_VFINT_DYN_CTLN1_INTENA_MASK |
+ I40E_VFINT_DYN_CTLN1_ITR_INDX_MASK | /* set noitr */
+ I40E_VFINT_DYN_CTLN1_SWINT_TRIG_MASK |
+ I40E_VFINT_DYN_CTLN1_SW_ITR_INDX_ENA_MASK;
+ /* allow 00 to be written to the index */
+
+ wr32(&vsi->back->hw,
+ I40E_VFINT_DYN_CTLN1(q_vector->v_idx +
+ vsi->base_vector - 1), val);
+ }
}
/**
@@ -404,7 +421,7 @@ static void i40e_set_new_dynamic_itr(struct i40e_ring_container *rc)
* 20-1249MB/s bulk (8000 ints/s)
*/
bytes_per_int = rc->total_bytes / rc->itr;
- switch (rc->itr) {
+ switch (new_latency_range) {
case I40E_LOWEST_LATENCY:
if (bytes_per_int > 10)
new_latency_range = I40E_LOW_LATENCY;
@@ -417,9 +434,14 @@ static void i40e_set_new_dynamic_itr(struct i40e_ring_container *rc)
break;
case I40E_BULK_LATENCY:
if (bytes_per_int <= 20)
- rc->latency_range = I40E_LOW_LATENCY;
+ new_latency_range = I40E_LOW_LATENCY;
+ break;
+ default:
+ if (bytes_per_int <= 20)
+ new_latency_range = I40E_LOW_LATENCY;
break;
}
+ rc->latency_range = new_latency_range;
switch (new_latency_range) {
case I40E_LOWEST_LATENCY:
@@ -435,42 +457,14 @@ static void i40e_set_new_dynamic_itr(struct i40e_ring_container *rc)
break;
}
- if (new_itr != rc->itr) {
- /* do an exponential smoothing */
- new_itr = (10 * new_itr * rc->itr) /
- ((9 * new_itr) + rc->itr);
- rc->itr = new_itr & I40E_MAX_ITR;
- }
+ if (new_itr != rc->itr)
+ rc->itr = new_itr;
rc->total_bytes = 0;
rc->total_packets = 0;
}
-/**
- * i40e_update_dynamic_itr - Adjust ITR based on bytes per int
- * @q_vector: the vector to adjust
- **/
-static void i40e_update_dynamic_itr(struct i40e_q_vector *q_vector)
-{
- u16 vector = q_vector->vsi->base_vector + q_vector->v_idx;
- struct i40e_hw *hw = &q_vector->vsi->back->hw;
- u32 reg_addr;
- u16 old_itr;
-
- reg_addr = I40E_VFINT_ITRN1(I40E_RX_ITR, vector - 1);
- old_itr = q_vector->rx.itr;
- i40e_set_new_dynamic_itr(&q_vector->rx);
- if (old_itr != q_vector->rx.itr)
- wr32(hw, reg_addr, q_vector->rx.itr);
-
- reg_addr = I40E_VFINT_ITRN1(I40E_TX_ITR, vector - 1);
- old_itr = q_vector->tx.itr;
- i40e_set_new_dynamic_itr(&q_vector->tx);
- if (old_itr != q_vector->tx.itr)
- wr32(hw, reg_addr, q_vector->tx.itr);
-}
-
-/**
+/*
* i40evf_setup_tx_descriptors - Allocate the Tx descriptors
* @tx_ring: the tx ring to set up
*
@@ -873,7 +867,7 @@ static inline void i40e_rx_checksum(struct i40e_vsi *vsi,
return;
/* did the hardware decode the packet and checksum? */
- if (!(rx_status & (1 << I40E_RX_DESC_STATUS_L3L4P_SHIFT)))
+ if (!(rx_status & BIT(I40E_RX_DESC_STATUS_L3L4P_SHIFT)))
return;
/* both known and outer_ip must be set for the below code to work */
@@ -888,25 +882,25 @@ static inline void i40e_rx_checksum(struct i40e_vsi *vsi,
ipv6 = true;
if (ipv4 &&
- (rx_error & ((1 << I40E_RX_DESC_ERROR_IPE_SHIFT) |
- (1 << I40E_RX_DESC_ERROR_EIPE_SHIFT))))
+ (rx_error & (BIT(I40E_RX_DESC_ERROR_IPE_SHIFT) |
+ BIT(I40E_RX_DESC_ERROR_EIPE_SHIFT))))
goto checksum_fail;
/* likely incorrect csum if alternate IP extension headers found */
if (ipv6 &&
- rx_status & (1 << I40E_RX_DESC_STATUS_IPV6EXADD_SHIFT))
+ rx_status & BIT(I40E_RX_DESC_STATUS_IPV6EXADD_SHIFT))
/* don't increment checksum err here, non-fatal err */
return;
/* there was some L4 error, count error and punt packet to the stack */
- if (rx_error & (1 << I40E_RX_DESC_ERROR_L4E_SHIFT))
+ if (rx_error & BIT(I40E_RX_DESC_ERROR_L4E_SHIFT))
goto checksum_fail;
/* handle packets that were not able to be checksummed due
* to arrival speed, in this case the stack can compute
* the csum.
*/
- if (rx_error & (1 << I40E_RX_DESC_ERROR_PPRS_SHIFT))
+ if (rx_error & BIT(I40E_RX_DESC_ERROR_PPRS_SHIFT))
return;
/* If VXLAN traffic has an outer UDPv4 checksum we need to check
@@ -1027,7 +1021,7 @@ static int i40e_clean_rx_irq_ps(struct i40e_ring *rx_ring, int budget)
rx_status = (qword & I40E_RXD_QW1_STATUS_MASK) >>
I40E_RXD_QW1_STATUS_SHIFT;
- if (!(rx_status & (1 << I40E_RX_DESC_STATUS_DD_SHIFT)))
+ if (!(rx_status & BIT(I40E_RX_DESC_STATUS_DD_SHIFT)))
break;
/* This memory barrier is needed to keep us from reading
@@ -1063,8 +1057,8 @@ static int i40e_clean_rx_irq_ps(struct i40e_ring *rx_ring, int budget)
rx_error = (qword & I40E_RXD_QW1_ERROR_MASK) >>
I40E_RXD_QW1_ERROR_SHIFT;
- rx_hbo = rx_error & (1 << I40E_RX_DESC_ERROR_HBO_SHIFT);
- rx_error &= ~(1 << I40E_RX_DESC_ERROR_HBO_SHIFT);
+ rx_hbo = rx_error & BIT(I40E_RX_DESC_ERROR_HBO_SHIFT);
+ rx_error &= ~BIT(I40E_RX_DESC_ERROR_HBO_SHIFT);
rx_ptype = (qword & I40E_RXD_QW1_PTYPE_MASK) >>
I40E_RXD_QW1_PTYPE_SHIFT;
@@ -1116,7 +1110,7 @@ static int i40e_clean_rx_irq_ps(struct i40e_ring *rx_ring, int budget)
I40E_RX_INCREMENT(rx_ring, i);
if (unlikely(
- !(rx_status & (1 << I40E_RX_DESC_STATUS_EOF_SHIFT)))) {
+ !(rx_status & BIT(I40E_RX_DESC_STATUS_EOF_SHIFT)))) {
struct i40e_rx_buffer *next_buffer;
next_buffer = &rx_ring->rx_bi[i];
@@ -1126,7 +1120,7 @@ static int i40e_clean_rx_irq_ps(struct i40e_ring *rx_ring, int budget)
}
/* ERR_MASK will only have valid bits if EOP set */
- if (unlikely(rx_error & (1 << I40E_RX_DESC_ERROR_RXE_SHIFT))) {
+ if (unlikely(rx_error & BIT(I40E_RX_DESC_ERROR_RXE_SHIFT))) {
dev_kfree_skb_any(skb);
continue;
}
@@ -1141,7 +1135,7 @@ static int i40e_clean_rx_irq_ps(struct i40e_ring *rx_ring, int budget)
i40e_rx_checksum(vsi, skb, rx_status, rx_error, rx_ptype);
- vlan_tag = rx_status & (1 << I40E_RX_DESC_STATUS_L2TAG1P_SHIFT)
+ vlan_tag = rx_status & BIT(I40E_RX_DESC_STATUS_L2TAG1P_SHIFT)
? le16_to_cpu(rx_desc->wb.qword0.lo_dword.l2tag1)
: 0;
#ifdef I40E_FCOE
@@ -1202,7 +1196,7 @@ static int i40e_clean_rx_irq_1buf(struct i40e_ring *rx_ring, int budget)
rx_status = (qword & I40E_RXD_QW1_STATUS_MASK) >>
I40E_RXD_QW1_STATUS_SHIFT;
- if (!(rx_status & (1 << I40E_RX_DESC_STATUS_DD_SHIFT)))
+ if (!(rx_status & BIT(I40E_RX_DESC_STATUS_DD_SHIFT)))
break;
/* This memory barrier is needed to keep us from reading
@@ -1220,7 +1214,7 @@ static int i40e_clean_rx_irq_1buf(struct i40e_ring *rx_ring, int budget)
rx_error = (qword & I40E_RXD_QW1_ERROR_MASK) >>
I40E_RXD_QW1_ERROR_SHIFT;
- rx_error &= ~(1 << I40E_RX_DESC_ERROR_HBO_SHIFT);
+ rx_error &= ~BIT(I40E_RX_DESC_ERROR_HBO_SHIFT);
rx_ptype = (qword & I40E_RXD_QW1_PTYPE_MASK) >>
I40E_RXD_QW1_PTYPE_SHIFT;
@@ -1238,13 +1232,13 @@ static int i40e_clean_rx_irq_1buf(struct i40e_ring *rx_ring, int budget)
I40E_RX_INCREMENT(rx_ring, i);
if (unlikely(
- !(rx_status & (1 << I40E_RX_DESC_STATUS_EOF_SHIFT)))) {
+ !(rx_status & BIT(I40E_RX_DESC_STATUS_EOF_SHIFT)))) {
rx_ring->rx_stats.non_eop_descs++;
continue;
}
/* ERR_MASK will only have valid bits if EOP set */
- if (unlikely(rx_error & (1 << I40E_RX_DESC_ERROR_RXE_SHIFT))) {
+ if (unlikely(rx_error & BIT(I40E_RX_DESC_ERROR_RXE_SHIFT))) {
dev_kfree_skb_any(skb);
/* TODO: shouldn't we increment a counter indicating the
* drop?
@@ -1262,7 +1256,7 @@ static int i40e_clean_rx_irq_1buf(struct i40e_ring *rx_ring, int budget)
i40e_rx_checksum(vsi, skb, rx_status, rx_error, rx_ptype);
- vlan_tag = rx_status & (1 << I40E_RX_DESC_STATUS_L2TAG1P_SHIFT)
+ vlan_tag = rx_status & BIT(I40E_RX_DESC_STATUS_L2TAG1P_SHIFT)
? le16_to_cpu(rx_desc->wb.qword0.lo_dword.l2tag1)
: 0;
i40e_receive_skb(rx_ring, skb, vlan_tag);
@@ -1281,6 +1275,67 @@ static int i40e_clean_rx_irq_1buf(struct i40e_ring *rx_ring, int budget)
}
/**
+ * i40e_update_enable_itr - Update itr and re-enable MSIX interrupt
+ * @vsi: the VSI we care about
+ * @q_vector: q_vector for which itr is being updated and interrupt enabled
+ *
+ **/
+static inline void i40e_update_enable_itr(struct i40e_vsi *vsi,
+ struct i40e_q_vector *q_vector)
+{
+ struct i40e_hw *hw = &vsi->back->hw;
+ u16 old_itr;
+ int vector;
+ u32 val;
+
+ vector = (q_vector->v_idx + vsi->base_vector);
+ if (ITR_IS_DYNAMIC(vsi->rx_itr_setting)) {
+ old_itr = q_vector->rx.itr;
+ i40e_set_new_dynamic_itr(&q_vector->rx);
+ if (old_itr != q_vector->rx.itr) {
+ val = I40E_VFINT_DYN_CTLN1_INTENA_MASK |
+ I40E_VFINT_DYN_CTLN1_CLEARPBA_MASK |
+ (I40E_RX_ITR <<
+ I40E_VFINT_DYN_CTLN1_ITR_INDX_SHIFT) |
+ (q_vector->rx.itr <<
+ I40E_VFINT_DYN_CTLN1_INTERVAL_SHIFT);
+ } else {
+ val = I40E_VFINT_DYN_CTLN1_INTENA_MASK |
+ I40E_VFINT_DYN_CTLN1_CLEARPBA_MASK |
+ (I40E_ITR_NONE <<
+ I40E_VFINT_DYN_CTLN1_ITR_INDX_SHIFT);
+ }
+ if (!test_bit(__I40E_DOWN, &vsi->state))
+ wr32(hw, I40E_VFINT_DYN_CTLN1(vector - 1), val);
+ } else {
+ i40evf_irq_enable_queues(vsi->back, 1
+ << q_vector->v_idx);
+ }
+ if (ITR_IS_DYNAMIC(vsi->tx_itr_setting)) {
+ old_itr = q_vector->tx.itr;
+ i40e_set_new_dynamic_itr(&q_vector->tx);
+ if (old_itr != q_vector->tx.itr) {
+ val = I40E_VFINT_DYN_CTLN1_INTENA_MASK |
+ I40E_VFINT_DYN_CTLN1_CLEARPBA_MASK |
+ (I40E_TX_ITR <<
+ I40E_VFINT_DYN_CTLN1_ITR_INDX_SHIFT) |
+ (q_vector->tx.itr <<
+ I40E_VFINT_DYN_CTLN1_INTERVAL_SHIFT);
+
+ } else {
+ val = I40E_VFINT_DYN_CTLN1_INTENA_MASK |
+ I40E_VFINT_DYN_CTLN1_CLEARPBA_MASK |
+ (I40E_ITR_NONE <<
+ I40E_VFINT_DYN_CTLN1_ITR_INDX_SHIFT);
+ }
+ if (!test_bit(__I40E_DOWN, &vsi->state))
+ wr32(hw, I40E_VFINT_DYN_CTLN1(vector - 1), val);
+ } else {
+ i40evf_irq_enable_queues(vsi->back, BIT(q_vector->v_idx));
+ }
+}
+
+/**
* i40evf_napi_poll - NAPI polling Rx/Tx cleanup routine
* @napi: napi struct with our devices info in it
* @budget: amount of work driver is allowed to do this pass, in packets
@@ -1334,15 +1389,12 @@ int i40evf_napi_poll(struct napi_struct *napi, int budget)
return budget;
}
+ if (vsi->back->flags & I40E_TXR_FLAGS_WB_ON_ITR)
+ q_vector->arm_wb_state = false;
+
/* Work is done so exit the polling mode and re-enable the interrupt */
napi_complete(napi);
- if (ITR_IS_DYNAMIC(vsi->rx_itr_setting) ||
- ITR_IS_DYNAMIC(vsi->tx_itr_setting))
- i40e_update_dynamic_itr(q_vector);
-
- if (!test_bit(__I40E_DOWN, &vsi->state))
- i40evf_irq_enable_queues(vsi->back, 1 << q_vector->v_idx);
-
+ i40e_update_enable_itr(vsi, q_vector);
return 0;
}
@@ -1476,11 +1528,15 @@ static void i40e_tx_enable_csum(struct sk_buff *skb, u32 *tx_flags,
struct iphdr *this_ip_hdr;
u32 network_hdr_len;
u8 l4_hdr = 0;
+ struct udphdr *oudph;
+ struct iphdr *oiph;
u32 l4_tunnel = 0;
if (skb->encapsulation) {
switch (ip_hdr(skb)->protocol) {
case IPPROTO_UDP:
+ oudph = udp_hdr(skb);
+ oiph = ip_hdr(skb);
l4_tunnel = I40E_TXD_CTX_UDP_TUNNELING;
*tx_flags |= I40E_TX_FLAGS_VXLAN_TUNNEL;
break;
@@ -1519,6 +1575,15 @@ static void i40e_tx_enable_csum(struct sk_buff *skb, u32 *tx_flags,
}
+ if ((tx_ring->flags & I40E_TXR_FLAGS_OUTER_UDP_CSUM) &&
+ (l4_tunnel == I40E_TXD_CTX_UDP_TUNNELING) &&
+ (*cd_tunneling & I40E_TXD_CTX_QW0_EXT_IP_MASK)) {
+ oudph->check = ~csum_tcpudp_magic(oiph->saddr,
+ oiph->daddr,
+ (skb->len - skb_transport_offset(skb)),
+ IPPROTO_UDP, 0);
+ *cd_tunneling |= I40E_TXD_CTX_QW0_L4T_CS_MASK;
+ }
} else {
network_hdr_len = skb_network_header_len(skb);
this_ip_hdr = ip_hdr(skb);
@@ -1841,6 +1906,8 @@ static inline void i40evf_tx_map(struct i40e_ring *tx_ring, struct sk_buff *skb,
netif_xmit_stopped(netdev_get_tx_queue(tx_ring->netdev,
tx_ring->queue_index)))
writel(i, tx_ring->tail);
+ else
+ prefetchw(tx_desc + 1);
return;