summaryrefslogtreecommitdiff
path: root/drivers/infiniband/sw
diff options
context:
space:
mode:
Diffstat (limited to 'drivers/infiniband/sw')
-rw-r--r--drivers/infiniband/sw/rdmavt/qp.c8
-rw-r--r--drivers/infiniband/sw/rxe/Kconfig1
-rw-r--r--drivers/infiniband/sw/rxe/rxe_mr.c2
-rw-r--r--drivers/infiniband/sw/rxe/rxe_net.c15
-rw-r--r--drivers/infiniband/sw/rxe/rxe_opcode.c2
-rw-r--r--drivers/infiniband/sw/rxe/rxe_param.h2
-rw-r--r--drivers/infiniband/sw/rxe/rxe_qp.c8
-rw-r--r--drivers/infiniband/sw/rxe/rxe_recv.c27
-rw-r--r--drivers/infiniband/sw/rxe/rxe_resp.c2
-rw-r--r--drivers/infiniband/sw/siw/siw.h9
-rw-r--r--drivers/infiniband/sw/siw/siw_cm.c7
-rw-r--r--drivers/infiniband/sw/siw/siw_main.c2
-rw-r--r--drivers/infiniband/sw/siw/siw_mem.c4
-rw-r--r--drivers/infiniband/sw/siw/siw_qp.c271
-rw-r--r--drivers/infiniband/sw/siw/siw_qp_rx.c46
-rw-r--r--drivers/infiniband/sw/siw/siw_qp_tx.c4
-rw-r--r--drivers/infiniband/sw/siw/siw_verbs.c29
17 files changed, 247 insertions, 192 deletions
diff --git a/drivers/infiniband/sw/rdmavt/qp.c b/drivers/infiniband/sw/rdmavt/qp.c
index d14ad523f96c..48e8612c1bc8 100644
--- a/drivers/infiniband/sw/rdmavt/qp.c
+++ b/drivers/infiniband/sw/rdmavt/qp.c
@@ -3110,6 +3110,8 @@ do_write:
case IB_WR_ATOMIC_FETCH_AND_ADD:
if (unlikely(!(qp->qp_access_flags & IB_ACCESS_REMOTE_ATOMIC)))
goto inv_err;
+ if (unlikely(wqe->atomic_wr.remote_addr & (sizeof(u64) - 1)))
+ goto inv_err;
if (unlikely(!rvt_rkey_ok(qp, &qp->r_sge.sge, sizeof(u64),
wqe->atomic_wr.remote_addr,
wqe->atomic_wr.rkey,
@@ -3225,7 +3227,11 @@ serr_no_r_lock:
spin_lock_irqsave(&sqp->s_lock, flags);
rvt_send_complete(sqp, wqe, send_status);
if (sqp->ibqp.qp_type == IB_QPT_RC) {
- int lastwqe = rvt_error_qp(sqp, IB_WC_WR_FLUSH_ERR);
+ int lastwqe;
+
+ spin_lock(&sqp->r_lock);
+ lastwqe = rvt_error_qp(sqp, IB_WC_WR_FLUSH_ERR);
+ spin_unlock(&sqp->r_lock);
sqp->s_flags &= ~RVT_S_BUSY;
spin_unlock_irqrestore(&sqp->s_lock, flags);
diff --git a/drivers/infiniband/sw/rxe/Kconfig b/drivers/infiniband/sw/rxe/Kconfig
index 71a773f607bb..0e8f1d05dfb2 100644
--- a/drivers/infiniband/sw/rxe/Kconfig
+++ b/drivers/infiniband/sw/rxe/Kconfig
@@ -4,6 +4,7 @@ config RDMA_RXE
depends on INET && PCI && INFINIBAND
depends on INFINIBAND_VIRT_DMA
select NET_UDP_TUNNEL
+ select CRYPTO
select CRYPTO_CRC32
select DMA_VIRT_OPS
---help---
diff --git a/drivers/infiniband/sw/rxe/rxe_mr.c b/drivers/infiniband/sw/rxe/rxe_mr.c
index ffbc50341a55..f885e245699b 100644
--- a/drivers/infiniband/sw/rxe/rxe_mr.c
+++ b/drivers/infiniband/sw/rxe/rxe_mr.c
@@ -173,7 +173,7 @@ int rxe_mem_init_user(struct rxe_pd *pd, u64 start,
if (IS_ERR(umem)) {
pr_warn("err %d from rxe_umem_get\n",
(int)PTR_ERR(umem));
- err = -EINVAL;
+ err = PTR_ERR(umem);
goto err1;
}
diff --git a/drivers/infiniband/sw/rxe/rxe_net.c b/drivers/infiniband/sw/rxe/rxe_net.c
index 312c2fc961c0..7d1df737c655 100644
--- a/drivers/infiniband/sw/rxe/rxe_net.c
+++ b/drivers/infiniband/sw/rxe/rxe_net.c
@@ -251,10 +251,8 @@ static struct socket *rxe_setup_udp_tunnel(struct net *net, __be16 port,
/* Create UDP socket */
err = udp_sock_create(net, &udp_cfg, &sock);
- if (err < 0) {
- pr_err("failed to create udp socket. err = %d\n", err);
+ if (err < 0)
return ERR_PTR(err);
- }
tnl_cfg.encap_type = 1;
tnl_cfg.encap_rcv = rxe_udp_encap_recv;
@@ -453,6 +451,11 @@ int rxe_send(struct rxe_pkt_info *pkt, struct sk_buff *skb)
void rxe_loopback(struct sk_buff *skb)
{
+ if (skb->protocol == htons(ETH_P_IP))
+ skb_pull(skb, sizeof(struct iphdr));
+ else
+ skb_pull(skb, sizeof(struct ipv6hdr));
+
rxe_rcv(skb);
}
@@ -655,6 +658,12 @@ static int rxe_net_ipv6_init(void)
recv_sockets.sk6 = rxe_setup_udp_tunnel(&init_net,
htons(ROCE_V2_UDP_DPORT), true);
+ if (PTR_ERR(recv_sockets.sk6) == -EAFNOSUPPORT) {
+ recv_sockets.sk6 = NULL;
+ pr_warn("IPv6 is not supported, can not create a UDPv6 socket\n");
+ return 0;
+ }
+
if (IS_ERR(recv_sockets.sk6)) {
recv_sockets.sk6 = NULL;
pr_err("Failed to create IPv6 UDP tunnel\n");
diff --git a/drivers/infiniband/sw/rxe/rxe_opcode.c b/drivers/infiniband/sw/rxe/rxe_opcode.c
index 4cf11063e0b5..0f166d6d0ccb 100644
--- a/drivers/infiniband/sw/rxe/rxe_opcode.c
+++ b/drivers/infiniband/sw/rxe/rxe_opcode.c
@@ -137,7 +137,7 @@ struct rxe_opcode_info rxe_opcode[RXE_NUM_OPCODE] = {
}
},
[IB_OPCODE_RC_SEND_MIDDLE] = {
- .name = "IB_OPCODE_RC_SEND_MIDDLE]",
+ .name = "IB_OPCODE_RC_SEND_MIDDLE",
.mask = RXE_PAYLOAD_MASK | RXE_REQ_MASK | RXE_SEND_MASK
| RXE_MIDDLE_MASK,
.length = RXE_BTH_BYTES,
diff --git a/drivers/infiniband/sw/rxe/rxe_param.h b/drivers/infiniband/sw/rxe/rxe_param.h
index fe5207386700..8d3f6d93dfb8 100644
--- a/drivers/infiniband/sw/rxe/rxe_param.h
+++ b/drivers/infiniband/sw/rxe/rxe_param.h
@@ -140,7 +140,7 @@ enum rxe_device_param {
/* default/initial rxe port parameters */
enum rxe_port_param {
RXE_PORT_GID_TBL_LEN = 1024,
- RXE_PORT_PORT_CAP_FLAGS = RDMA_CORE_CAP_PROT_ROCE_UDP_ENCAP,
+ RXE_PORT_PORT_CAP_FLAGS = IB_PORT_CM_SUP,
RXE_PORT_MAX_MSG_SZ = 0x800000,
RXE_PORT_BAD_PKEY_CNTR = 0,
RXE_PORT_QKEY_VIOL_CNTR = 0,
diff --git a/drivers/infiniband/sw/rxe/rxe_qp.c b/drivers/infiniband/sw/rxe/rxe_qp.c
index f85273883794..53166b9ae67e 100644
--- a/drivers/infiniband/sw/rxe/rxe_qp.c
+++ b/drivers/infiniband/sw/rxe/rxe_qp.c
@@ -152,7 +152,6 @@ static void free_rd_atomic_resources(struct rxe_qp *qp)
void free_rd_atomic_resource(struct rxe_qp *qp, struct resp_res *res)
{
if (res->type == RXE_ATOMIC_MASK) {
- rxe_drop_ref(qp);
kfree_skb(res->atomic.skb);
} else if (res->type == RXE_READ_MASK) {
if (res->read.mr)
@@ -260,6 +259,7 @@ static int rxe_qp_init_req(struct rxe_dev *rxe, struct rxe_qp *qp,
if (err) {
vfree(qp->sq.queue->buf);
kfree(qp->sq.queue);
+ qp->sq.queue = NULL;
return err;
}
@@ -313,6 +313,7 @@ static int rxe_qp_init_resp(struct rxe_dev *rxe, struct rxe_qp *qp,
if (err) {
vfree(qp->rq.queue->buf);
kfree(qp->rq.queue);
+ qp->rq.queue = NULL;
return err;
}
}
@@ -373,6 +374,11 @@ int rxe_qp_from_init(struct rxe_dev *rxe, struct rxe_qp *qp, struct rxe_pd *pd,
err2:
rxe_queue_cleanup(qp->sq.queue);
err1:
+ qp->pd = NULL;
+ qp->rcq = NULL;
+ qp->scq = NULL;
+ qp->srq = NULL;
+
if (srq)
rxe_drop_ref(srq);
rxe_drop_ref(scq);
diff --git a/drivers/infiniband/sw/rxe/rxe_recv.c b/drivers/infiniband/sw/rxe/rxe_recv.c
index 9bfb98056fc2..369ba76f1605 100644
--- a/drivers/infiniband/sw/rxe/rxe_recv.c
+++ b/drivers/infiniband/sw/rxe/rxe_recv.c
@@ -36,21 +36,26 @@
#include "rxe.h"
#include "rxe_loc.h"
+/* check that QP matches packet opcode type and is in a valid state */
static int check_type_state(struct rxe_dev *rxe, struct rxe_pkt_info *pkt,
struct rxe_qp *qp)
{
+ unsigned int pkt_type;
+
if (unlikely(!qp->valid))
goto err1;
+ pkt_type = pkt->opcode & 0xe0;
+
switch (qp_type(qp)) {
case IB_QPT_RC:
- if (unlikely((pkt->opcode & IB_OPCODE_RC) != 0)) {
+ if (unlikely(pkt_type != IB_OPCODE_RC)) {
pr_warn_ratelimited("bad qp type\n");
goto err1;
}
break;
case IB_QPT_UC:
- if (unlikely(!(pkt->opcode & IB_OPCODE_UC))) {
+ if (unlikely(pkt_type != IB_OPCODE_UC)) {
pr_warn_ratelimited("bad qp type\n");
goto err1;
}
@@ -58,7 +63,7 @@ static int check_type_state(struct rxe_dev *rxe, struct rxe_pkt_info *pkt,
case IB_QPT_UD:
case IB_QPT_SMI:
case IB_QPT_GSI:
- if (unlikely(!(pkt->opcode & IB_OPCODE_UD))) {
+ if (unlikely(pkt_type != IB_OPCODE_UD)) {
pr_warn_ratelimited("bad qp type\n");
goto err1;
}
@@ -300,7 +305,6 @@ static void rxe_rcv_mcast_pkt(struct rxe_dev *rxe, struct sk_buff *skb)
list_for_each_entry(mce, &mcg->qp_list, qp_list) {
qp = mce->qp;
- pkt = SKB_TO_PKT(skb);
/* validate qp for incoming packet */
err = check_type_state(rxe, pkt, qp);
@@ -312,12 +316,18 @@ static void rxe_rcv_mcast_pkt(struct rxe_dev *rxe, struct sk_buff *skb)
continue;
/* for all but the last qp create a new clone of the
- * skb and pass to the qp.
+ * skb and pass to the qp. If an error occurs in the
+ * checks for the last qp in the list we need to
+ * free the skb since it hasn't been passed on to
+ * rxe_rcv_pkt() which would free it later.
*/
- if (mce->qp_list.next != &mcg->qp_list)
+ if (mce->qp_list.next != &mcg->qp_list) {
per_qp_skb = skb_clone(skb, GFP_ATOMIC);
- else
+ } else {
per_qp_skb = skb;
+ /* show we have consumed the skb */
+ skb = NULL;
+ }
if (unlikely(!per_qp_skb))
continue;
@@ -332,9 +342,8 @@ static void rxe_rcv_mcast_pkt(struct rxe_dev *rxe, struct sk_buff *skb)
rxe_drop_ref(mcg); /* drop ref from rxe_pool_get_key. */
- return;
-
err1:
+ /* free skb if not consumed */
kfree_skb(skb);
}
diff --git a/drivers/infiniband/sw/rxe/rxe_resp.c b/drivers/infiniband/sw/rxe/rxe_resp.c
index c4a8195bf670..186152bf7951 100644
--- a/drivers/infiniband/sw/rxe/rxe_resp.c
+++ b/drivers/infiniband/sw/rxe/rxe_resp.c
@@ -993,8 +993,6 @@ static int send_atomic_ack(struct rxe_qp *qp, struct rxe_pkt_info *pkt,
goto out;
}
- rxe_add_ref(qp);
-
res = &qp->resp.resources[qp->resp.res_head];
free_rd_atomic_resource(qp, res);
rxe_advance_resp_resource(qp);
diff --git a/drivers/infiniband/sw/siw/siw.h b/drivers/infiniband/sw/siw/siw.h
index dba4535494ab..eaa9863dd1e0 100644
--- a/drivers/infiniband/sw/siw/siw.h
+++ b/drivers/infiniband/sw/siw/siw.h
@@ -658,16 +658,11 @@ static inline struct siw_sqe *orq_get_current(struct siw_qp *qp)
return &qp->orq[qp->orq_get % qp->attrs.orq_size];
}
-static inline struct siw_sqe *orq_get_tail(struct siw_qp *qp)
-{
- return &qp->orq[qp->orq_put % qp->attrs.orq_size];
-}
-
static inline struct siw_sqe *orq_get_free(struct siw_qp *qp)
{
- struct siw_sqe *orq_e = orq_get_tail(qp);
+ struct siw_sqe *orq_e = &qp->orq[qp->orq_put % qp->attrs.orq_size];
- if (orq_e && READ_ONCE(orq_e->flags) == 0)
+ if (READ_ONCE(orq_e->flags) == 0)
return orq_e;
return NULL;
diff --git a/drivers/infiniband/sw/siw/siw_cm.c b/drivers/infiniband/sw/siw/siw_cm.c
index e3bac1a877bb..3aed597103d3 100644
--- a/drivers/infiniband/sw/siw/siw_cm.c
+++ b/drivers/infiniband/sw/siw/siw_cm.c
@@ -976,14 +976,15 @@ static void siw_accept_newconn(struct siw_cep *cep)
siw_cep_set_inuse(new_cep);
rv = siw_proc_mpareq(new_cep);
- siw_cep_set_free(new_cep);
-
if (rv != -EAGAIN) {
siw_cep_put(cep);
new_cep->listen_cep = NULL;
- if (rv)
+ if (rv) {
+ siw_cep_set_free(new_cep);
goto error;
+ }
}
+ siw_cep_set_free(new_cep);
}
return;
diff --git a/drivers/infiniband/sw/siw/siw_main.c b/drivers/infiniband/sw/siw/siw_main.c
index fb66d6757278..dbbf8c6c16d3 100644
--- a/drivers/infiniband/sw/siw/siw_main.c
+++ b/drivers/infiniband/sw/siw/siw_main.c
@@ -134,7 +134,7 @@ static struct {
static int siw_init_cpulist(void)
{
- int i, num_nodes = num_possible_nodes();
+ int i, num_nodes = nr_node_ids;
memset(siw_tx_thread, 0, sizeof(siw_tx_thread));
diff --git a/drivers/infiniband/sw/siw/siw_mem.c b/drivers/infiniband/sw/siw/siw_mem.c
index e99983f07663..8bffa6e013fc 100644
--- a/drivers/infiniband/sw/siw/siw_mem.c
+++ b/drivers/infiniband/sw/siw/siw_mem.c
@@ -106,8 +106,6 @@ int siw_mr_add_mem(struct siw_mr *mr, struct ib_pd *pd, void *mem_obj,
mem->perms = rights & IWARP_ACCESS_MASK;
kref_init(&mem->ref);
- mr->mem = mem;
-
get_random_bytes(&next, 4);
next &= 0x00ffffff;
@@ -116,6 +114,8 @@ int siw_mr_add_mem(struct siw_mr *mr, struct ib_pd *pd, void *mem_obj,
kfree(mem);
return -ENOMEM;
}
+
+ mr->mem = mem;
/* Set the STag index part */
mem->stag = id << 8;
mr->base_mr.lkey = mr->base_mr.rkey = mem->stag;
diff --git a/drivers/infiniband/sw/siw/siw_qp.c b/drivers/infiniband/sw/siw/siw_qp.c
index b4317480cee7..5927ac5923dd 100644
--- a/drivers/infiniband/sw/siw/siw_qp.c
+++ b/drivers/infiniband/sw/siw/siw_qp.c
@@ -199,26 +199,26 @@ void siw_qp_llp_write_space(struct sock *sk)
static int siw_qp_readq_init(struct siw_qp *qp, int irq_size, int orq_size)
{
- irq_size = roundup_pow_of_two(irq_size);
- orq_size = roundup_pow_of_two(orq_size);
-
- qp->attrs.irq_size = irq_size;
- qp->attrs.orq_size = orq_size;
-
- qp->irq = vzalloc(irq_size * sizeof(struct siw_sqe));
- if (!qp->irq) {
- siw_dbg_qp(qp, "irq malloc for %d failed\n", irq_size);
- qp->attrs.irq_size = 0;
- return -ENOMEM;
+ if (irq_size) {
+ irq_size = roundup_pow_of_two(irq_size);
+ qp->irq = vzalloc(irq_size * sizeof(struct siw_sqe));
+ if (!qp->irq) {
+ qp->attrs.irq_size = 0;
+ return -ENOMEM;
+ }
}
- qp->orq = vzalloc(orq_size * sizeof(struct siw_sqe));
- if (!qp->orq) {
- siw_dbg_qp(qp, "orq malloc for %d failed\n", orq_size);
- qp->attrs.orq_size = 0;
- qp->attrs.irq_size = 0;
- vfree(qp->irq);
- return -ENOMEM;
+ if (orq_size) {
+ orq_size = roundup_pow_of_two(orq_size);
+ qp->orq = vzalloc(orq_size * sizeof(struct siw_sqe));
+ if (!qp->orq) {
+ qp->attrs.orq_size = 0;
+ qp->attrs.irq_size = 0;
+ vfree(qp->irq);
+ return -ENOMEM;
+ }
}
+ qp->attrs.irq_size = irq_size;
+ qp->attrs.orq_size = orq_size;
siw_dbg_qp(qp, "ORD %d, IRD %d\n", orq_size, irq_size);
return 0;
}
@@ -288,13 +288,14 @@ int siw_qp_mpa_rts(struct siw_qp *qp, enum mpa_v2_ctrl ctrl)
if (ctrl & MPA_V2_RDMA_WRITE_RTR)
wqe->sqe.opcode = SIW_OP_WRITE;
else if (ctrl & MPA_V2_RDMA_READ_RTR) {
- struct siw_sqe *rreq;
+ struct siw_sqe *rreq = NULL;
wqe->sqe.opcode = SIW_OP_READ;
spin_lock(&qp->orq_lock);
- rreq = orq_get_free(qp);
+ if (qp->attrs.orq_size)
+ rreq = orq_get_free(qp);
if (rreq) {
siw_read_to_orq(rreq, &wqe->sqe);
qp->orq_put++;
@@ -877,135 +878,88 @@ void siw_read_to_orq(struct siw_sqe *rreq, struct siw_sqe *sqe)
rreq->num_sge = 1;
}
-/*
- * Must be called with SQ locked.
- * To avoid complete SQ starvation by constant inbound READ requests,
- * the active IRQ will not be served after qp->irq_burst, if the
- * SQ has pending work.
- */
-int siw_activate_tx(struct siw_qp *qp)
+static int siw_activate_tx_from_sq(struct siw_qp *qp)
{
- struct siw_sqe *irqe, *sqe;
+ struct siw_sqe *sqe;
struct siw_wqe *wqe = tx_wqe(qp);
int rv = 1;
- irqe = &qp->irq[qp->irq_get % qp->attrs.irq_size];
-
- if (irqe->flags & SIW_WQE_VALID) {
- sqe = sq_get_next(qp);
-
- /*
- * Avoid local WQE processing starvation in case
- * of constant inbound READ request stream
- */
- if (sqe && ++qp->irq_burst >= SIW_IRQ_MAXBURST_SQ_ACTIVE) {
- qp->irq_burst = 0;
- goto skip_irq;
- }
- memset(wqe->mem, 0, sizeof(*wqe->mem) * SIW_MAX_SGE);
- wqe->wr_status = SIW_WR_QUEUED;
-
- /* start READ RESPONSE */
- wqe->sqe.opcode = SIW_OP_READ_RESPONSE;
- wqe->sqe.flags = 0;
- if (irqe->num_sge) {
- wqe->sqe.num_sge = 1;
- wqe->sqe.sge[0].length = irqe->sge[0].length;
- wqe->sqe.sge[0].laddr = irqe->sge[0].laddr;
- wqe->sqe.sge[0].lkey = irqe->sge[0].lkey;
- } else {
- wqe->sqe.num_sge = 0;
- }
-
- /* Retain original RREQ's message sequence number for
- * potential error reporting cases.
- */
- wqe->sqe.sge[1].length = irqe->sge[1].length;
-
- wqe->sqe.rkey = irqe->rkey;
- wqe->sqe.raddr = irqe->raddr;
+ sqe = sq_get_next(qp);
+ if (!sqe)
+ return 0;
- wqe->processed = 0;
- qp->irq_get++;
+ memset(wqe->mem, 0, sizeof(*wqe->mem) * SIW_MAX_SGE);
+ wqe->wr_status = SIW_WR_QUEUED;
- /* mark current IRQ entry free */
- smp_store_mb(irqe->flags, 0);
+ /* First copy SQE to kernel private memory */
+ memcpy(&wqe->sqe, sqe, sizeof(*sqe));
+ if (wqe->sqe.opcode >= SIW_NUM_OPCODES) {
+ rv = -EINVAL;
goto out;
}
- sqe = sq_get_next(qp);
- if (sqe) {
-skip_irq:
- memset(wqe->mem, 0, sizeof(*wqe->mem) * SIW_MAX_SGE);
- wqe->wr_status = SIW_WR_QUEUED;
-
- /* First copy SQE to kernel private memory */
- memcpy(&wqe->sqe, sqe, sizeof(*sqe));
-
- if (wqe->sqe.opcode >= SIW_NUM_OPCODES) {
+ if (wqe->sqe.flags & SIW_WQE_INLINE) {
+ if (wqe->sqe.opcode != SIW_OP_SEND &&
+ wqe->sqe.opcode != SIW_OP_WRITE) {
rv = -EINVAL;
goto out;
}
- if (wqe->sqe.flags & SIW_WQE_INLINE) {
- if (wqe->sqe.opcode != SIW_OP_SEND &&
- wqe->sqe.opcode != SIW_OP_WRITE) {
- rv = -EINVAL;
- goto out;
- }
- if (wqe->sqe.sge[0].length > SIW_MAX_INLINE) {
- rv = -EINVAL;
- goto out;
- }
- wqe->sqe.sge[0].laddr = (uintptr_t)&wqe->sqe.sge[1];
- wqe->sqe.sge[0].lkey = 0;
- wqe->sqe.num_sge = 1;
+ if (wqe->sqe.sge[0].length > SIW_MAX_INLINE) {
+ rv = -EINVAL;
+ goto out;
}
- if (wqe->sqe.flags & SIW_WQE_READ_FENCE) {
- /* A READ cannot be fenced */
- if (unlikely(wqe->sqe.opcode == SIW_OP_READ ||
- wqe->sqe.opcode ==
- SIW_OP_READ_LOCAL_INV)) {
- siw_dbg_qp(qp, "cannot fence read\n");
- rv = -EINVAL;
- goto out;
- }
- spin_lock(&qp->orq_lock);
+ wqe->sqe.sge[0].laddr = (uintptr_t)&wqe->sqe.sge[1];
+ wqe->sqe.sge[0].lkey = 0;
+ wqe->sqe.num_sge = 1;
+ }
+ if (wqe->sqe.flags & SIW_WQE_READ_FENCE) {
+ /* A READ cannot be fenced */
+ if (unlikely(wqe->sqe.opcode == SIW_OP_READ ||
+ wqe->sqe.opcode ==
+ SIW_OP_READ_LOCAL_INV)) {
+ siw_dbg_qp(qp, "cannot fence read\n");
+ rv = -EINVAL;
+ goto out;
+ }
+ spin_lock(&qp->orq_lock);
- if (!siw_orq_empty(qp)) {
- qp->tx_ctx.orq_fence = 1;
- rv = 0;
- }
- spin_unlock(&qp->orq_lock);
+ if (qp->attrs.orq_size && !siw_orq_empty(qp)) {
+ qp->tx_ctx.orq_fence = 1;
+ rv = 0;
+ }
+ spin_unlock(&qp->orq_lock);
- } else if (wqe->sqe.opcode == SIW_OP_READ ||
- wqe->sqe.opcode == SIW_OP_READ_LOCAL_INV) {
- struct siw_sqe *rreq;
+ } else if (wqe->sqe.opcode == SIW_OP_READ ||
+ wqe->sqe.opcode == SIW_OP_READ_LOCAL_INV) {
+ struct siw_sqe *rreq;
- wqe->sqe.num_sge = 1;
+ if (unlikely(!qp->attrs.orq_size)) {
+ /* We negotiated not to send READ req's */
+ rv = -EINVAL;
+ goto out;
+ }
+ wqe->sqe.num_sge = 1;
- spin_lock(&qp->orq_lock);
+ spin_lock(&qp->orq_lock);
- rreq = orq_get_free(qp);
- if (rreq) {
- /*
- * Make an immediate copy in ORQ to be ready
- * to process loopback READ reply
- */
- siw_read_to_orq(rreq, &wqe->sqe);
- qp->orq_put++;
- } else {
- qp->tx_ctx.orq_fence = 1;
- rv = 0;
- }
- spin_unlock(&qp->orq_lock);
+ rreq = orq_get_free(qp);
+ if (rreq) {
+ /*
+ * Make an immediate copy in ORQ to be ready
+ * to process loopback READ reply
+ */
+ siw_read_to_orq(rreq, &wqe->sqe);
+ qp->orq_put++;
+ } else {
+ qp->tx_ctx.orq_fence = 1;
+ rv = 0;
}
-
- /* Clear SQE, can be re-used by application */
- smp_store_mb(sqe->flags, 0);
- qp->sq_get++;
- } else {
- rv = 0;
+ spin_unlock(&qp->orq_lock);
}
+
+ /* Clear SQE, can be re-used by application */
+ smp_store_mb(sqe->flags, 0);
+ qp->sq_get++;
out:
if (unlikely(rv < 0)) {
siw_dbg_qp(qp, "error %d\n", rv);
@@ -1015,6 +969,65 @@ out:
}
/*
+ * Must be called with SQ locked.
+ * To avoid complete SQ starvation by constant inbound READ requests,
+ * the active IRQ will not be served after qp->irq_burst, if the
+ * SQ has pending work.
+ */
+int siw_activate_tx(struct siw_qp *qp)
+{
+ struct siw_sqe *irqe;
+ struct siw_wqe *wqe = tx_wqe(qp);
+
+ if (!qp->attrs.irq_size)
+ return siw_activate_tx_from_sq(qp);
+
+ irqe = &qp->irq[qp->irq_get % qp->attrs.irq_size];
+
+ if (!(irqe->flags & SIW_WQE_VALID))
+ return siw_activate_tx_from_sq(qp);
+
+ /*
+ * Avoid local WQE processing starvation in case
+ * of constant inbound READ request stream
+ */
+ if (sq_get_next(qp) && ++qp->irq_burst >= SIW_IRQ_MAXBURST_SQ_ACTIVE) {
+ qp->irq_burst = 0;
+ return siw_activate_tx_from_sq(qp);
+ }
+ memset(wqe->mem, 0, sizeof(*wqe->mem) * SIW_MAX_SGE);
+ wqe->wr_status = SIW_WR_QUEUED;
+
+ /* start READ RESPONSE */
+ wqe->sqe.opcode = SIW_OP_READ_RESPONSE;
+ wqe->sqe.flags = 0;
+ if (irqe->num_sge) {
+ wqe->sqe.num_sge = 1;
+ wqe->sqe.sge[0].length = irqe->sge[0].length;
+ wqe->sqe.sge[0].laddr = irqe->sge[0].laddr;
+ wqe->sqe.sge[0].lkey = irqe->sge[0].lkey;
+ } else {
+ wqe->sqe.num_sge = 0;
+ }
+
+ /* Retain original RREQ's message sequence number for
+ * potential error reporting cases.
+ */
+ wqe->sqe.sge[1].length = irqe->sge[1].length;
+
+ wqe->sqe.rkey = irqe->rkey;
+ wqe->sqe.raddr = irqe->raddr;
+
+ wqe->processed = 0;
+ qp->irq_get++;
+
+ /* mark current IRQ entry free */
+ smp_store_mb(irqe->flags, 0);
+
+ return 1;
+}
+
+/*
* Check if current CQ state qualifies for calling CQ completion
* handler. Must be called with CQ lock held.
*/
diff --git a/drivers/infiniband/sw/siw/siw_qp_rx.c b/drivers/infiniband/sw/siw/siw_qp_rx.c
index 0520e70084f9..5f94c716301f 100644
--- a/drivers/infiniband/sw/siw/siw_qp_rx.c
+++ b/drivers/infiniband/sw/siw/siw_qp_rx.c
@@ -680,6 +680,10 @@ static int siw_init_rresp(struct siw_qp *qp, struct siw_rx_stream *srx)
}
spin_lock_irqsave(&qp->sq_lock, flags);
+ if (unlikely(!qp->attrs.irq_size)) {
+ run_sq = 0;
+ goto error_irq;
+ }
if (tx_work->wr_status == SIW_WR_IDLE) {
/*
* immediately schedule READ response w/o
@@ -712,8 +716,9 @@ static int siw_init_rresp(struct siw_qp *qp, struct siw_rx_stream *srx)
/* RRESP now valid as current TX wqe or placed into IRQ */
smp_store_mb(resp->flags, SIW_WQE_VALID);
} else {
- pr_warn("siw: [QP %u]: irq %d exceeded %d\n", qp_id(qp),
- qp->irq_put % qp->attrs.irq_size, qp->attrs.irq_size);
+error_irq:
+ pr_warn("siw: [QP %u]: IRQ exceeded or null, size %d\n",
+ qp_id(qp), qp->attrs.irq_size);
siw_init_terminate(qp, TERM_ERROR_LAYER_RDMAP,
RDMAP_ETYPE_REMOTE_OPERATION,
@@ -740,6 +745,9 @@ static int siw_orqe_start_rx(struct siw_qp *qp)
struct siw_sqe *orqe;
struct siw_wqe *wqe = NULL;
+ if (unlikely(!qp->attrs.orq_size))
+ return -EPROTO;
+
/* make sure ORQ indices are current */
smp_mb();
@@ -796,8 +804,8 @@ int siw_proc_rresp(struct siw_qp *qp)
*/
rv = siw_orqe_start_rx(qp);
if (rv) {
- pr_warn("siw: [QP %u]: ORQ empty at idx %d\n",
- qp_id(qp), qp->orq_get % qp->attrs.orq_size);
+ pr_warn("siw: [QP %u]: ORQ empty, size %d\n",
+ qp_id(qp), qp->attrs.orq_size);
goto error_term;
}
rv = siw_rresp_check_ntoh(srx, frx);
@@ -1145,11 +1153,12 @@ static int siw_check_tx_fence(struct siw_qp *qp)
spin_lock_irqsave(&qp->orq_lock, flags);
- rreq = orq_get_current(qp);
-
/* free current orq entry */
+ rreq = orq_get_current(qp);
WRITE_ONCE(rreq->flags, 0);
+ qp->orq_get++;
+
if (qp->tx_ctx.orq_fence) {
if (unlikely(tx_waiting->wr_status != SIW_WR_QUEUED)) {
pr_warn("siw: [QP %u]: fence resume: bad status %d\n",
@@ -1157,10 +1166,12 @@ static int siw_check_tx_fence(struct siw_qp *qp)
rv = -EPROTO;
goto out;
}
- /* resume SQ processing */
+ /* resume SQ processing, if possible */
if (tx_waiting->sqe.opcode == SIW_OP_READ ||
tx_waiting->sqe.opcode == SIW_OP_READ_LOCAL_INV) {
- rreq = orq_get_tail(qp);
+
+ /* SQ processing was stopped because of a full ORQ */
+ rreq = orq_get_free(qp);
if (unlikely(!rreq)) {
pr_warn("siw: [QP %u]: no ORQE\n", qp_id(qp));
rv = -EPROTO;
@@ -1173,15 +1184,14 @@ static int siw_check_tx_fence(struct siw_qp *qp)
resume_tx = 1;
} else if (siw_orq_empty(qp)) {
+ /*
+ * SQ processing was stopped by fenced work request.
+ * Resume since all previous Read's are now completed.
+ */
qp->tx_ctx.orq_fence = 0;
resume_tx = 1;
- } else {
- pr_warn("siw: [QP %u]: fence resume: orq idx: %d:%d\n",
- qp_id(qp), qp->orq_get, qp->orq_put);
- rv = -EPROTO;
}
}
- qp->orq_get++;
out:
spin_unlock_irqrestore(&qp->orq_lock, flags);
@@ -1290,11 +1300,13 @@ static int siw_rdmap_complete(struct siw_qp *qp, int error)
wc_status);
siw_wqe_put_mem(wqe, SIW_OP_READ);
- if (!error)
+ if (!error) {
rv = siw_check_tx_fence(qp);
- else
- /* Disable current ORQ eleement */
- WRITE_ONCE(orq_get_current(qp)->flags, 0);
+ } else {
+ /* Disable current ORQ element */
+ if (qp->attrs.orq_size)
+ WRITE_ONCE(orq_get_current(qp)->flags, 0);
+ }
break;
case RDMAP_RDMA_READ_REQ:
diff --git a/drivers/infiniband/sw/siw/siw_qp_tx.c b/drivers/infiniband/sw/siw/siw_qp_tx.c
index e7cd04eda04a..424918eb1cd4 100644
--- a/drivers/infiniband/sw/siw/siw_qp_tx.c
+++ b/drivers/infiniband/sw/siw/siw_qp_tx.c
@@ -1107,8 +1107,8 @@ next_wqe:
/*
* RREQ may have already been completed by inbound RRESP!
*/
- if (tx_type == SIW_OP_READ ||
- tx_type == SIW_OP_READ_LOCAL_INV) {
+ if ((tx_type == SIW_OP_READ ||
+ tx_type == SIW_OP_READ_LOCAL_INV) && qp->attrs.orq_size) {
/* Cleanup pending entry in ORQ */
qp->orq_put--;
qp->orq[qp->orq_put % qp->attrs.orq_size].flags = 0;
diff --git a/drivers/infiniband/sw/siw/siw_verbs.c b/drivers/infiniband/sw/siw/siw_verbs.c
index 1b1a40db529c..b9ca54e372b4 100644
--- a/drivers/infiniband/sw/siw/siw_verbs.c
+++ b/drivers/infiniband/sw/siw/siw_verbs.c
@@ -314,7 +314,6 @@ struct ib_qp *siw_create_qp(struct ib_pd *pd,
struct siw_ucontext *uctx =
rdma_udata_to_drv_context(udata, struct siw_ucontext,
base_ucontext);
- struct siw_cq *scq = NULL, *rcq = NULL;
unsigned long flags;
int num_sqe, num_rqe, rv = 0;
@@ -353,10 +352,8 @@ struct ib_qp *siw_create_qp(struct ib_pd *pd,
rv = -EINVAL;
goto err_out;
}
- scq = to_siw_cq(attrs->send_cq);
- rcq = to_siw_cq(attrs->recv_cq);
- if (!scq || (!rcq && !attrs->srq)) {
+ if (!attrs->send_cq || (!attrs->recv_cq && !attrs->srq)) {
siw_dbg(base_dev, "send CQ or receive CQ invalid\n");
rv = -EINVAL;
goto err_out;
@@ -387,13 +384,23 @@ struct ib_qp *siw_create_qp(struct ib_pd *pd,
if (rv)
goto err_out;
+ num_sqe = attrs->cap.max_send_wr;
+ num_rqe = attrs->cap.max_recv_wr;
+
/* All queue indices are derived from modulo operations
* on a free running 'get' (consumer) and 'put' (producer)
* unsigned counter. Having queue sizes at power of two
* avoids handling counter wrap around.
*/
- num_sqe = roundup_pow_of_two(attrs->cap.max_send_wr);
- num_rqe = roundup_pow_of_two(attrs->cap.max_recv_wr);
+ if (num_sqe)
+ num_sqe = roundup_pow_of_two(num_sqe);
+ else {
+ /* Zero sized SQ is not supported */
+ rv = -EINVAL;
+ goto err_out_xa;
+ }
+ if (num_rqe)
+ num_rqe = roundup_pow_of_two(num_rqe);
if (qp->kernel_verbs)
qp->sendq = vzalloc(num_sqe * sizeof(struct siw_sqe));
@@ -401,7 +408,6 @@ struct ib_qp *siw_create_qp(struct ib_pd *pd,
qp->sendq = vmalloc_user(num_sqe * sizeof(struct siw_sqe));
if (qp->sendq == NULL) {
- siw_dbg(base_dev, "SQ size %d alloc failed\n", num_sqe);
rv = -ENOMEM;
goto err_out_xa;
}
@@ -414,8 +420,8 @@ struct ib_qp *siw_create_qp(struct ib_pd *pd,
}
}
qp->pd = pd;
- qp->scq = scq;
- qp->rcq = rcq;
+ qp->scq = to_siw_cq(attrs->send_cq);
+ qp->rcq = to_siw_cq(attrs->recv_cq);
if (attrs->srq) {
/*
@@ -434,7 +440,6 @@ struct ib_qp *siw_create_qp(struct ib_pd *pd,
vmalloc_user(num_rqe * sizeof(struct siw_rqe));
if (qp->recvq == NULL) {
- siw_dbg(base_dev, "RQ size %d alloc failed\n", num_rqe);
rv = -ENOMEM;
goto err_out_xa;
}
@@ -982,9 +987,9 @@ int siw_post_receive(struct ib_qp *base_qp, const struct ib_recv_wr *wr,
unsigned long flags;
int rv = 0;
- if (qp->srq) {
+ if (qp->srq || qp->attrs.rq_size == 0) {
*bad_wr = wr;
- return -EOPNOTSUPP; /* what else from errno.h? */
+ return -EINVAL;
}
if (!qp->kernel_verbs) {
siw_dbg_qp(qp, "no kernel post_recv for user mapped sq\n");