diff options
author | Brenden Blanco <bblanco@plumgrid.com> | 2016-07-19 12:16:55 -0700 |
---|---|---|
committer | David S. Miller <davem@davemloft.net> | 2016-07-19 21:46:33 -0700 |
commit | 9ecc2d86171adf23796133c89610987a14624875 (patch) | |
tree | ff803acdc2762d33fafe18be3d78daa29d3a70f0 /drivers/net/ethernet/mellanox/mlx4/en_rx.c | |
parent | 224e92e02a769b8028ca2450443586af8b4f1715 (diff) |
net/mlx4_en: add xdp forwarding and data write support
A user will now be able to loop packets back out of the same port using
a bpf program attached to xdp hook. Updates to the packet contents from
the bpf program is also supported.
For the packet write feature to work, the rx buffers are now mapped as
bidirectional when the page is allocated. This occurs only when the xdp
hook is active.
When the program returns a TX action, enqueue the packet directly to a
dedicated tx ring, so as to avoid completely any locking. This requires
the tx ring to be allocated 1:1 for each rx ring, as well as the tx
completion running in the same softirq.
Upon tx completion, this dedicated tx ring recycles pages without
unmapping directly back to the original rx ring. In steady state tx/drop
workload, effectively 0 page allocs/frees will occur.
In order to separate out the paths between free and recycle, a
free_tx_desc func pointer is introduced that is optionally updated
whenever recycle_ring is activated. By default the original free
function is always initialized.
Signed-off-by: Brenden Blanco <bblanco@plumgrid.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
Diffstat (limited to 'drivers/net/ethernet/mellanox/mlx4/en_rx.c')
-rw-r--r-- | drivers/net/ethernet/mellanox/mlx4/en_rx.c | 14 |
1 files changed, 14 insertions, 0 deletions
diff --git a/drivers/net/ethernet/mellanox/mlx4/en_rx.c b/drivers/net/ethernet/mellanox/mlx4/en_rx.c index 9dd5dc19a537..11d88c817137 100644 --- a/drivers/net/ethernet/mellanox/mlx4/en_rx.c +++ b/drivers/net/ethernet/mellanox/mlx4/en_rx.c @@ -783,7 +783,9 @@ int mlx4_en_process_rx_cq(struct net_device *dev, struct mlx4_en_cq *cq, int bud struct mlx4_en_rx_alloc *frags; struct mlx4_en_rx_desc *rx_desc; struct bpf_prog *xdp_prog; + int doorbell_pending; struct sk_buff *skb; + int tx_index; int index; int nr; unsigned int length; @@ -800,6 +802,8 @@ int mlx4_en_process_rx_cq(struct net_device *dev, struct mlx4_en_cq *cq, int bud return polled; xdp_prog = READ_ONCE(ring->xdp_prog); + doorbell_pending = 0; + tx_index = (priv->tx_ring_num - priv->xdp_ring_num) + cq->ring; /* We assume a 1:1 mapping between CQEs and Rx descriptors, so Rx * descriptor offset can be deduced from the CQE index instead of @@ -898,6 +902,12 @@ int mlx4_en_process_rx_cq(struct net_device *dev, struct mlx4_en_cq *cq, int bud switch (act) { case XDP_PASS: break; + case XDP_TX: + if (!mlx4_en_xmit_frame(frags, dev, + length, tx_index, + &doorbell_pending)) + goto consumed; + break; default: bpf_warn_invalid_xdp_action(act); case XDP_ABORTED: @@ -1068,6 +1078,9 @@ consumed: } out: + if (doorbell_pending) + mlx4_en_xmit_doorbell(priv->tx_ring[tx_index]); + AVG_PERF_COUNTER(priv->pstats.rx_coal_avg, polled); mlx4_cq_set_ci(&cq->mcq); wmb(); /* ensure HW sees CQ consumer before we post new buffers */ @@ -1147,6 +1160,7 @@ void mlx4_en_calc_rx_buf(struct net_device *dev) * This only works when num_frags == 1. */ if (priv->xdp_ring_num) { + dma_dir = PCI_DMA_BIDIRECTIONAL; /* This will gain efficient xdp frame recycling at the expense * of more costly truesize accounting */ |