From 9f11a0ab6a9650de4b458b2a8759a32c2e4e89f0 Mon Sep 17 00:00:00 2001
From: Thomas Petazzoni <thomas.petazzoni@free-electrons.com>
Date: Wed, 21 Dec 2016 11:28:49 +0100
Subject: net: mvpp2: fix dma unmapping of TX buffers for fragments

commit 8354491c9d5b06709384cea91d13019bf5e61449 upstream.

Since commit 71ce391dfb784 ("net: mvpp2: enable proper per-CPU TX
buffers unmapping"), we are not correctly DMA unmapping TX buffers for
fragments.

Indeed, the mvpp2_txq_inc_put() function only stores in the
txq_cpu->tx_buffs[] array the physical address of the buffer to be
DMA-unmapped when skb != NULL. In addition, when DMA-unmapping, we use
skb_headlen(skb) to get the size to be unmapped. Both of this works fine
for TX descriptors that are associated directly to a SKB, but not the
ones that are used for fragments, with a NULL pointer as skb:

 - We have a NULL physical address when calling DMA unmap
 - skb_headlen(skb) crashes because skb is NULL

This causes random crashes when fragments are used.

To solve this problem, we need to:

 - Store the physical address of the buffer to be unmapped
   unconditionally, regardless of whether it is tied to a SKB or not.

 - Store the length of the buffer to be unmapped, which requires a new
   field.

Instead of adding a third array to store the length of the buffer to be
unmapped, and as suggested by David Miller, this commit refactors the
tx_buffs[] and tx_skb[] arrays of 'struct mvpp2_txq_pcpu' into a
separate structure 'mvpp2_txq_pcpu_buf', to which a 'size' field is
added. Therefore, instead of having three arrays to allocate/free, we
have a single one, which also improve data locality, reducing the
impact on the CPU cache.

Fixes: 71ce391dfb784 ("net: mvpp2: enable proper per-CPU TX buffers unmapping")
Reported-by: Raphael G <raphael.glon@corp.ovh.com>
Cc: Raphael G <raphael.glon@corp.ovh.com>
Signed-off-by: Thomas Petazzoni <thomas.petazzoni@free-electrons.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/net/ethernet/marvell/mvpp2.c | 59 ++++++++++++++++++------------------
 1 file changed, 30 insertions(+), 29 deletions(-)

diff --git a/drivers/net/ethernet/marvell/mvpp2.c b/drivers/net/ethernet/marvell/mvpp2.c
index a4beccf1fd46..25aba9886990 100644
--- a/drivers/net/ethernet/marvell/mvpp2.c
+++ b/drivers/net/ethernet/marvell/mvpp2.c
@@ -772,6 +772,17 @@ struct mvpp2_rx_desc {
 	u32 reserved8;
 };
 
+struct mvpp2_txq_pcpu_buf {
+	/* Transmitted SKB */
+	struct sk_buff *skb;
+
+	/* Physical address of transmitted buffer */
+	dma_addr_t phys;
+
+	/* Size transmitted */
+	size_t size;
+};
+
 /* Per-CPU Tx queue control */
 struct mvpp2_txq_pcpu {
 	int cpu;
@@ -787,11 +798,8 @@ struct mvpp2_txq_pcpu {
 	/* Number of Tx DMA descriptors reserved for each CPU */
 	int reserved_num;
 
-	/* Array of transmitted skb */
-	struct sk_buff **tx_skb;
-
-	/* Array of transmitted buffers' physical addresses */
-	dma_addr_t *tx_buffs;
+	/* Infos about transmitted buffers */
+	struct mvpp2_txq_pcpu_buf *buffs;
 
 	/* Index of last TX DMA descriptor that was inserted */
 	int txq_put_index;
@@ -981,10 +989,11 @@ static void mvpp2_txq_inc_put(struct mvpp2_txq_pcpu *txq_pcpu,
 			      struct sk_buff *skb,
 			      struct mvpp2_tx_desc *tx_desc)
 {
-	txq_pcpu->tx_skb[txq_pcpu->txq_put_index] = skb;
-	if (skb)
-		txq_pcpu->tx_buffs[txq_pcpu->txq_put_index] =
-							 tx_desc->buf_phys_addr;
+	struct mvpp2_txq_pcpu_buf *tx_buf =
+		txq_pcpu->buffs + txq_pcpu->txq_put_index;
+	tx_buf->skb = skb;
+	tx_buf->size = tx_desc->data_size;
+	tx_buf->phys = tx_desc->buf_phys_addr;
 	txq_pcpu->txq_put_index++;
 	if (txq_pcpu->txq_put_index == txq_pcpu->size)
 		txq_pcpu->txq_put_index = 0;
@@ -4403,17 +4412,16 @@ static void mvpp2_txq_bufs_free(struct mvpp2_port *port,
 	int i;
 
 	for (i = 0; i < num; i++) {
-		dma_addr_t buf_phys_addr =
-				    txq_pcpu->tx_buffs[txq_pcpu->txq_get_index];
-		struct sk_buff *skb = txq_pcpu->tx_skb[txq_pcpu->txq_get_index];
+		struct mvpp2_txq_pcpu_buf *tx_buf =
+			txq_pcpu->buffs + txq_pcpu->txq_get_index;
 
 		mvpp2_txq_inc_get(txq_pcpu);
 
-		dma_unmap_single(port->dev->dev.parent, buf_phys_addr,
-				 skb_headlen(skb), DMA_TO_DEVICE);
-		if (!skb)
+		dma_unmap_single(port->dev->dev.parent, tx_buf->phys,
+				 tx_buf->size, DMA_TO_DEVICE);
+		if (!tx_buf->skb)
 			continue;
-		dev_kfree_skb_any(skb);
+		dev_kfree_skb_any(tx_buf->skb);
 	}
 }
 
@@ -4664,15 +4672,10 @@ static int mvpp2_txq_init(struct mvpp2_port *port,
 	for_each_present_cpu(cpu) {
 		txq_pcpu = per_cpu_ptr(txq->pcpu, cpu);
 		txq_pcpu->size = txq->size;
-		txq_pcpu->tx_skb = kmalloc(txq_pcpu->size *
-					   sizeof(*txq_pcpu->tx_skb),
-					   GFP_KERNEL);
-		if (!txq_pcpu->tx_skb)
-			goto error;
-
-		txq_pcpu->tx_buffs = kmalloc(txq_pcpu->size *
-					     sizeof(dma_addr_t), GFP_KERNEL);
-		if (!txq_pcpu->tx_buffs)
+		txq_pcpu->buffs = kmalloc(txq_pcpu->size *
+					  sizeof(struct mvpp2_txq_pcpu_buf),
+					  GFP_KERNEL);
+		if (!txq_pcpu->buffs)
 			goto error;
 
 		txq_pcpu->count = 0;
@@ -4686,8 +4689,7 @@ static int mvpp2_txq_init(struct mvpp2_port *port,
 error:
 	for_each_present_cpu(cpu) {
 		txq_pcpu = per_cpu_ptr(txq->pcpu, cpu);
-		kfree(txq_pcpu->tx_skb);
-		kfree(txq_pcpu->tx_buffs);
+		kfree(txq_pcpu->buffs);
 	}
 
 	dma_free_coherent(port->dev->dev.parent,
@@ -4706,8 +4708,7 @@ static void mvpp2_txq_deinit(struct mvpp2_port *port,
 
 	for_each_present_cpu(cpu) {
 		txq_pcpu = per_cpu_ptr(txq->pcpu, cpu);
-		kfree(txq_pcpu->tx_skb);
-		kfree(txq_pcpu->tx_buffs);
+		kfree(txq_pcpu->buffs);
 	}
 
 	if (txq->descs)
-- 
cgit v1.2.3