summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorChuck Lever <chuck.lever@oracle.com>2017-02-08 17:00:10 -0500
committerGreg Kroah-Hartman <gregkh@linuxfoundation.org>2017-03-12 06:41:52 +0100
commit86840a6305149ce9b466f3a841cffdb3c0fd2608 (patch)
treedf541dcd74d5796f01b6a332f75efa264ff0ec20
parent73eea1c4000fd73c138dbf8826bc6e1fa901ae9b (diff)
xprtrdma: Reduce required number of send SGEs
commit 16f906d66cd76fb9895cbc628f447532a7ac1faa upstream. The MAX_SEND_SGES check introduced in commit 655fec6987be ("xprtrdma: Use gathered Send for large inline messages") fails for devices that have a small max_sge. Instead of checking for a large fixed maximum number of SGEs, check for a minimum small number. RPC-over-RDMA will switch to using a Read chunk if an xdr_buf has more pages than can fit in the device's max_sge limit. This is considerably better than failing all together to mount the server. This fix supports devices that have as few as three send SGEs available. Reported-by: Selvin Xavier <selvin.xavier@broadcom.com> Reported-by: Devesh Sharma <devesh.sharma@broadcom.com> Reported-by: Honggang Li <honli@redhat.com> Reported-by: Ram Amrani <Ram.Amrani@cavium.com> Fixes: 655fec6987be ("xprtrdma: Use gathered Send for large ...") Tested-by: Honggang Li <honli@redhat.com> Tested-by: Ram Amrani <Ram.Amrani@cavium.com> Tested-by: Steve Wise <swise@opengridcomputing.com> Reviewed-by: Parav Pandit <parav@mellanox.com> Signed-off-by: Chuck Lever <chuck.lever@oracle.com> Signed-off-by: Anna Schumaker <Anna.Schumaker@Netapp.com> Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
-rw-r--r--net/sunrpc/xprtrdma/rpc_rdma.c26
-rw-r--r--net/sunrpc/xprtrdma/verbs.c13
-rw-r--r--net/sunrpc/xprtrdma/xprt_rdma.h2
3 files changed, 32 insertions, 9 deletions
diff --git a/net/sunrpc/xprtrdma/rpc_rdma.c b/net/sunrpc/xprtrdma/rpc_rdma.c
index b8f46c186f02..f57c9f0ab8f9 100644
--- a/net/sunrpc/xprtrdma/rpc_rdma.c
+++ b/net/sunrpc/xprtrdma/rpc_rdma.c
@@ -125,14 +125,34 @@ void rpcrdma_set_max_header_sizes(struct rpcrdma_xprt *r_xprt)
/* The client can send a request inline as long as the RPCRDMA header
* plus the RPC call fit under the transport's inline limit. If the
* combined call message size exceeds that limit, the client must use
- * the read chunk list for this operation.
+ * a Read chunk for this operation.
+ *
+ * A Read chunk is also required if sending the RPC call inline would
+ * exceed this device's max_sge limit.
*/
static bool rpcrdma_args_inline(struct rpcrdma_xprt *r_xprt,
struct rpc_rqst *rqst)
{
- struct rpcrdma_ia *ia = &r_xprt->rx_ia;
+ struct xdr_buf *xdr = &rqst->rq_snd_buf;
+ unsigned int count, remaining, offset;
+
+ if (xdr->len > r_xprt->rx_ia.ri_max_inline_write)
+ return false;
- return rqst->rq_snd_buf.len <= ia->ri_max_inline_write;
+ if (xdr->page_len) {
+ remaining = xdr->page_len;
+ offset = xdr->page_base & ~PAGE_MASK;
+ count = 0;
+ while (remaining) {
+ remaining -= min_t(unsigned int,
+ PAGE_SIZE - offset, remaining);
+ offset = 0;
+ if (++count > r_xprt->rx_ia.ri_max_send_sges)
+ return false;
+ }
+ }
+
+ return true;
}
/* The client can't know how large the actual reply will be. Thus it
diff --git a/net/sunrpc/xprtrdma/verbs.c b/net/sunrpc/xprtrdma/verbs.c
index 4d5d5b1c98c4..e2c37061edbe 100644
--- a/net/sunrpc/xprtrdma/verbs.c
+++ b/net/sunrpc/xprtrdma/verbs.c
@@ -479,18 +479,19 @@ rpcrdma_ia_close(struct rpcrdma_ia *ia)
*/
int
rpcrdma_ep_create(struct rpcrdma_ep *ep, struct rpcrdma_ia *ia,
- struct rpcrdma_create_data_internal *cdata)
+ struct rpcrdma_create_data_internal *cdata)
{
struct rpcrdma_connect_private *pmsg = &ep->rep_cm_private;
+ unsigned int max_qp_wr, max_sge;
struct ib_cq *sendcq, *recvcq;
- unsigned int max_qp_wr;
int rc;
- if (ia->ri_device->attrs.max_sge < RPCRDMA_MAX_SEND_SGES) {
- dprintk("RPC: %s: insufficient sge's available\n",
- __func__);
+ max_sge = min(ia->ri_device->attrs.max_sge, RPCRDMA_MAX_SEND_SGES);
+ if (max_sge < RPCRDMA_MIN_SEND_SGES) {
+ pr_warn("rpcrdma: HCA provides only %d send SGEs\n", max_sge);
return -ENOMEM;
}
+ ia->ri_max_send_sges = max_sge - RPCRDMA_MIN_SEND_SGES;
if (ia->ri_device->attrs.max_qp_wr <= RPCRDMA_BACKWARD_WRS) {
dprintk("RPC: %s: insufficient wqe's available\n",
@@ -515,7 +516,7 @@ rpcrdma_ep_create(struct rpcrdma_ep *ep, struct rpcrdma_ia *ia,
ep->rep_attr.cap.max_recv_wr = cdata->max_requests;
ep->rep_attr.cap.max_recv_wr += RPCRDMA_BACKWARD_WRS;
ep->rep_attr.cap.max_recv_wr += 1; /* drain cqe */
- ep->rep_attr.cap.max_send_sge = RPCRDMA_MAX_SEND_SGES;
+ ep->rep_attr.cap.max_send_sge = max_sge;
ep->rep_attr.cap.max_recv_sge = 1;
ep->rep_attr.cap.max_inline_data = 0;
ep->rep_attr.sq_sig_type = IB_SIGNAL_REQ_WR;
diff --git a/net/sunrpc/xprtrdma/xprt_rdma.h b/net/sunrpc/xprtrdma/xprt_rdma.h
index 07db2d1e072d..48989d5b2883 100644
--- a/net/sunrpc/xprtrdma/xprt_rdma.h
+++ b/net/sunrpc/xprtrdma/xprt_rdma.h
@@ -74,6 +74,7 @@ struct rpcrdma_ia {
unsigned int ri_max_frmr_depth;
unsigned int ri_max_inline_write;
unsigned int ri_max_inline_read;
+ unsigned int ri_max_send_sges;
bool ri_reminv_expected;
bool ri_implicit_roundup;
struct ib_qp_attr ri_qp_attr;
@@ -310,6 +311,7 @@ struct rpcrdma_mr_seg { /* chunk descriptors */
* - xdr_buf tail iovec
*/
enum {
+ RPCRDMA_MIN_SEND_SGES = 3,
RPCRDMA_MAX_SEND_PAGES = PAGE_SIZE + RPCRDMA_MAX_INLINE - 1,
RPCRDMA_MAX_PAGE_SGES = (RPCRDMA_MAX_SEND_PAGES >> PAGE_SHIFT) + 1,
RPCRDMA_MAX_SEND_SGES = 1 + 1 + RPCRDMA_MAX_PAGE_SGES + 1,