From 6fa8f719844b8455033e295f720e739c1dc3804a Mon Sep 17 00:00:00 2001 From: Vladimir Sokolovsky Date: Wed, 14 Apr 2010 17:23:39 +0300 Subject: IB/mlx4: Add support for masked atomic operations Add support for masked atomic operations (masked compare and swap, masked fetch and add). Signed-off-by: Vladimir Sokolovsky Signed-off-by: Roland Dreier --- drivers/infiniband/hw/mlx4/qp.c | 50 ++++++++++++++++++++++++++++++++--------- 1 file changed, 39 insertions(+), 11 deletions(-) (limited to 'drivers/infiniband/hw/mlx4/qp.c') diff --git a/drivers/infiniband/hw/mlx4/qp.c b/drivers/infiniband/hw/mlx4/qp.c index 5643f4a8ffef..6a60827b2301 100644 --- a/drivers/infiniband/hw/mlx4/qp.c +++ b/drivers/infiniband/hw/mlx4/qp.c @@ -74,17 +74,19 @@ enum { }; static const __be32 mlx4_ib_opcode[] = { - [IB_WR_SEND] = cpu_to_be32(MLX4_OPCODE_SEND), - [IB_WR_LSO] = cpu_to_be32(MLX4_OPCODE_LSO), - [IB_WR_SEND_WITH_IMM] = cpu_to_be32(MLX4_OPCODE_SEND_IMM), - [IB_WR_RDMA_WRITE] = cpu_to_be32(MLX4_OPCODE_RDMA_WRITE), - [IB_WR_RDMA_WRITE_WITH_IMM] = cpu_to_be32(MLX4_OPCODE_RDMA_WRITE_IMM), - [IB_WR_RDMA_READ] = cpu_to_be32(MLX4_OPCODE_RDMA_READ), - [IB_WR_ATOMIC_CMP_AND_SWP] = cpu_to_be32(MLX4_OPCODE_ATOMIC_CS), - [IB_WR_ATOMIC_FETCH_AND_ADD] = cpu_to_be32(MLX4_OPCODE_ATOMIC_FA), - [IB_WR_SEND_WITH_INV] = cpu_to_be32(MLX4_OPCODE_SEND_INVAL), - [IB_WR_LOCAL_INV] = cpu_to_be32(MLX4_OPCODE_LOCAL_INVAL), - [IB_WR_FAST_REG_MR] = cpu_to_be32(MLX4_OPCODE_FMR), + [IB_WR_SEND] = cpu_to_be32(MLX4_OPCODE_SEND), + [IB_WR_LSO] = cpu_to_be32(MLX4_OPCODE_LSO), + [IB_WR_SEND_WITH_IMM] = cpu_to_be32(MLX4_OPCODE_SEND_IMM), + [IB_WR_RDMA_WRITE] = cpu_to_be32(MLX4_OPCODE_RDMA_WRITE), + [IB_WR_RDMA_WRITE_WITH_IMM] = cpu_to_be32(MLX4_OPCODE_RDMA_WRITE_IMM), + [IB_WR_RDMA_READ] = cpu_to_be32(MLX4_OPCODE_RDMA_READ), + [IB_WR_ATOMIC_CMP_AND_SWP] = cpu_to_be32(MLX4_OPCODE_ATOMIC_CS), + [IB_WR_ATOMIC_FETCH_AND_ADD] = cpu_to_be32(MLX4_OPCODE_ATOMIC_FA), + [IB_WR_SEND_WITH_INV] = cpu_to_be32(MLX4_OPCODE_SEND_INVAL), + [IB_WR_LOCAL_INV] = cpu_to_be32(MLX4_OPCODE_LOCAL_INVAL), + [IB_WR_FAST_REG_MR] = cpu_to_be32(MLX4_OPCODE_FMR), + [IB_WR_MASKED_ATOMIC_CMP_AND_SWP] = cpu_to_be32(MLX4_OPCODE_MASKED_ATOMIC_CS), + [IB_WR_MASKED_ATOMIC_FETCH_AND_ADD] = cpu_to_be32(MLX4_OPCODE_MASKED_ATOMIC_FA), }; static struct mlx4_ib_sqp *to_msqp(struct mlx4_ib_qp *mqp) @@ -1407,6 +1409,9 @@ static void set_atomic_seg(struct mlx4_wqe_atomic_seg *aseg, struct ib_send_wr * if (wr->opcode == IB_WR_ATOMIC_CMP_AND_SWP) { aseg->swap_add = cpu_to_be64(wr->wr.atomic.swap); aseg->compare = cpu_to_be64(wr->wr.atomic.compare_add); + } else if (wr->opcode == IB_WR_MASKED_ATOMIC_FETCH_AND_ADD) { + aseg->swap_add = cpu_to_be64(wr->wr.atomic.compare_add); + aseg->compare = cpu_to_be64(wr->wr.atomic.compare_add_mask); } else { aseg->swap_add = cpu_to_be64(wr->wr.atomic.compare_add); aseg->compare = 0; @@ -1414,6 +1419,15 @@ static void set_atomic_seg(struct mlx4_wqe_atomic_seg *aseg, struct ib_send_wr * } +static void set_masked_atomic_seg(struct mlx4_wqe_masked_atomic_seg *aseg, + struct ib_send_wr *wr) +{ + aseg->swap_add = cpu_to_be64(wr->wr.atomic.swap); + aseg->swap_add_mask = cpu_to_be64(wr->wr.atomic.swap_mask); + aseg->compare = cpu_to_be64(wr->wr.atomic.compare_add); + aseg->compare_mask = cpu_to_be64(wr->wr.atomic.compare_add_mask); +} + static void set_datagram_seg(struct mlx4_wqe_datagram_seg *dseg, struct ib_send_wr *wr) { @@ -1567,6 +1581,7 @@ int mlx4_ib_post_send(struct ib_qp *ibqp, struct ib_send_wr *wr, switch (wr->opcode) { case IB_WR_ATOMIC_CMP_AND_SWP: case IB_WR_ATOMIC_FETCH_AND_ADD: + case IB_WR_MASKED_ATOMIC_FETCH_AND_ADD: set_raddr_seg(wqe, wr->wr.atomic.remote_addr, wr->wr.atomic.rkey); wqe += sizeof (struct mlx4_wqe_raddr_seg); @@ -1579,6 +1594,19 @@ int mlx4_ib_post_send(struct ib_qp *ibqp, struct ib_send_wr *wr, break; + case IB_WR_MASKED_ATOMIC_CMP_AND_SWP: + set_raddr_seg(wqe, wr->wr.atomic.remote_addr, + wr->wr.atomic.rkey); + wqe += sizeof (struct mlx4_wqe_raddr_seg); + + set_masked_atomic_seg(wqe, wr); + wqe += sizeof (struct mlx4_wqe_masked_atomic_seg); + + size += (sizeof (struct mlx4_wqe_raddr_seg) + + sizeof (struct mlx4_wqe_masked_atomic_seg)) / 16; + + break; + case IB_WR_RDMA_READ: case IB_WR_RDMA_WRITE: case IB_WR_RDMA_WRITE_WITH_IMM: -- cgit v1.2.3