For FRWR, the computation of max_send_wr is split between frwr_op_open and rpcrdma_ep_create, which makes it difficult to tell that the max_send_wr result is currently incorrect if frwr_op_open has to reduce the credit limit to accommodate a small max_qp_wr. This is a problem now that extra WRs are needed for backchannel operations and a drain CQE. So, refactor the computation so that it is all done in ->ro_open, and fix the FRWR version of this computation so that it accommodates HCAs with small max_qp_wr correctly. Signed-off-by: Chuck Lever <chuck.lever@xxxxxxxxxx> --- net/sunrpc/xprtrdma/fmr_ops.c | 22 ++++++++++++++++++++++ net/sunrpc/xprtrdma/frwr_ops.c | 30 ++++++++++++++++++++++++++---- net/sunrpc/xprtrdma/verbs.c | 24 ++++-------------------- 3 files changed, 52 insertions(+), 24 deletions(-) diff --git a/net/sunrpc/xprtrdma/fmr_ops.c b/net/sunrpc/xprtrdma/fmr_ops.c index f2f6395..0815f9e 100644 --- a/net/sunrpc/xprtrdma/fmr_ops.c +++ b/net/sunrpc/xprtrdma/fmr_ops.c @@ -156,10 +156,32 @@ enum { fmr_op_release_mr(mr); } +/* On success, sets: + * ep->rep_attr.cap.max_send_wr + * ep->rep_attr.cap.max_recv_wr + * cdata->max_requests + * ia->ri_max_segs + */ static int fmr_op_open(struct rpcrdma_ia *ia, struct rpcrdma_ep *ep, struct rpcrdma_create_data_internal *cdata) { + int max_qp_wr; + + max_qp_wr = ia->ri_device->attrs.max_qp_wr; + max_qp_wr -= RPCRDMA_BACKWARD_WRS; + max_qp_wr -= 1; + if (max_qp_wr < RPCRDMA_MIN_SLOT_TABLE) + return -ENOMEM; + if (cdata->max_requests > max_qp_wr) + cdata->max_requests = max_qp_wr; + ep->rep_attr.cap.max_send_wr = cdata->max_requests; + ep->rep_attr.cap.max_send_wr += RPCRDMA_BACKWARD_WRS; + ep->rep_attr.cap.max_send_wr += 1; /* for ib_drain_sq */ + ep->rep_attr.cap.max_recv_wr = cdata->max_requests; + ep->rep_attr.cap.max_recv_wr += RPCRDMA_BACKWARD_WRS; + ep->rep_attr.cap.max_recv_wr += 1; /* for ib_drain_rq */ + ia->ri_max_segs = max_t(unsigned int, 1, RPCRDMA_MAX_DATA_SEGS / RPCRDMA_MAX_FMR_SGES); return 0; diff --git a/net/sunrpc/xprtrdma/frwr_ops.c b/net/sunrpc/xprtrdma/frwr_ops.c index c59c5c7..cf5095d6 100644 --- a/net/sunrpc/xprtrdma/frwr_ops.c +++ b/net/sunrpc/xprtrdma/frwr_ops.c @@ -202,12 +202,22 @@ frwr_op_release_mr(mr); } +/* On success, sets: + * ep->rep_attr.cap.max_send_wr + * ep->rep_attr.cap.max_recv_wr + * cdata->max_requests + * ia->ri_max_segs + * + * And these FRWR-related fields: + * ia->ri_max_frwr_depth + * ia->ri_mrtype + */ static int frwr_op_open(struct rpcrdma_ia *ia, struct rpcrdma_ep *ep, struct rpcrdma_create_data_internal *cdata) { struct ib_device_attr *attrs = &ia->ri_device->attrs; - int depth, delta; + int max_qp_wr, depth, delta; ia->ri_mrtype = IB_MR_TYPE_MEM_REG; if (attrs->device_cap_flags & IB_DEVICE_SG_GAPS_REG) @@ -241,14 +251,26 @@ } while (delta > 0); } - ep->rep_attr.cap.max_send_wr *= depth; - if (ep->rep_attr.cap.max_send_wr > attrs->max_qp_wr) { - cdata->max_requests = attrs->max_qp_wr / depth; + max_qp_wr = ia->ri_device->attrs.max_qp_wr; + max_qp_wr -= RPCRDMA_BACKWARD_WRS; + max_qp_wr -= 1; + if (max_qp_wr < RPCRDMA_MIN_SLOT_TABLE) + return -ENOMEM; + if (cdata->max_requests > max_qp_wr) + cdata->max_requests = max_qp_wr; + ep->rep_attr.cap.max_send_wr = cdata->max_requests * depth; + if (ep->rep_attr.cap.max_send_wr > max_qp_wr) { + cdata->max_requests = max_qp_wr / depth; if (!cdata->max_requests) return -EINVAL; ep->rep_attr.cap.max_send_wr = cdata->max_requests * depth; } + ep->rep_attr.cap.max_send_wr += RPCRDMA_BACKWARD_WRS; + ep->rep_attr.cap.max_send_wr += 1; /* for ib_drain_sq */ + ep->rep_attr.cap.max_recv_wr = cdata->max_requests; + ep->rep_attr.cap.max_recv_wr += RPCRDMA_BACKWARD_WRS; + ep->rep_attr.cap.max_recv_wr += 1; /* for ib_drain_rq */ ia->ri_max_segs = max_t(unsigned int, 1, RPCRDMA_MAX_DATA_SEGS / ia->ri_max_frwr_depth); diff --git a/net/sunrpc/xprtrdma/verbs.c b/net/sunrpc/xprtrdma/verbs.c index 817a692..581d0ae 100644 --- a/net/sunrpc/xprtrdma/verbs.c +++ b/net/sunrpc/xprtrdma/verbs.c @@ -501,8 +501,8 @@ struct rpcrdma_create_data_internal *cdata) { struct rpcrdma_connect_private *pmsg = &ep->rep_cm_private; - unsigned int max_qp_wr, max_sge; struct ib_cq *sendcq, *recvcq; + unsigned int max_sge; int rc; max_sge = min_t(unsigned int, ia->ri_device->attrs.max_sge, @@ -513,29 +513,13 @@ } ia->ri_max_send_sges = max_sge; - if (ia->ri_device->attrs.max_qp_wr <= RPCRDMA_BACKWARD_WRS) { - dprintk("RPC: %s: insufficient wqe's available\n", - __func__); - return -ENOMEM; - } - max_qp_wr = ia->ri_device->attrs.max_qp_wr - RPCRDMA_BACKWARD_WRS - 1; - - /* check provider's send/recv wr limits */ - if (cdata->max_requests > max_qp_wr) - cdata->max_requests = max_qp_wr; + rc = ia->ri_ops->ro_open(ia, ep, cdata); + if (rc) + return rc; ep->rep_attr.event_handler = rpcrdma_qp_async_error_upcall; ep->rep_attr.qp_context = ep; ep->rep_attr.srq = NULL; - ep->rep_attr.cap.max_send_wr = cdata->max_requests; - ep->rep_attr.cap.max_send_wr += RPCRDMA_BACKWARD_WRS; - ep->rep_attr.cap.max_send_wr += 1; /* drain cqe */ - rc = ia->ri_ops->ro_open(ia, ep, cdata); - if (rc) - return rc; - ep->rep_attr.cap.max_recv_wr = cdata->max_requests; - ep->rep_attr.cap.max_recv_wr += RPCRDMA_BACKWARD_WRS; - ep->rep_attr.cap.max_recv_wr += 1; /* drain cqe */ ep->rep_attr.cap.max_send_sge = max_sge; ep->rep_attr.cap.max_recv_sge = 1; ep->rep_attr.cap.max_inline_data = 0; -- To unsubscribe from this list: send the line "unsubscribe linux-rdma" in the body of a message to majordomo@xxxxxxxxxxxxxxx More majordomo info at http://vger.kernel.org/majordomo-info.html