Send and Receive completion is handled on a single CPU selected at the time each Completion Queue is allocated. Typically this is when an initiator instantiates an RDMA transport, or when a target accepts an RDMA connection. Some ULPs cannot open a connection per CPU to spread completion workload across available CPUs. For these ULPs, allow the RDMA core to select a completion vector based on the device's complement of available comp_vecs. When a ULP elects to use RDMA_CORE_ANY_COMPVEC, if multiple CPUs are available, a different CPU will be selected for each Completion Queue. For the moment, a simple round-robin mechanism is used. Suggested-by: Håkon Bugge <haakon.bugge@xxxxxxxxxx> Signed-off-by: Chuck Lever <chuck.lever@xxxxxxxxxx> --- drivers/infiniband/core/cq.c | 20 +++++++++++++++++++- include/rdma/ib_verbs.h | 3 +++ net/sunrpc/xprtrdma/svc_rdma_transport.c | 6 ++++-- net/sunrpc/xprtrdma/verbs.c | 5 ++--- 4 files changed, 28 insertions(+), 6 deletions(-) Jason- If this patch is acceptable to all, then I would expect you to take it through the RDMA tree. diff --git a/drivers/infiniband/core/cq.c b/drivers/infiniband/core/cq.c index 7c599878ccf7..a89d549490c4 100644 --- a/drivers/infiniband/core/cq.c +++ b/drivers/infiniband/core/cq.c @@ -165,12 +165,27 @@ static void ib_cq_completion_workqueue(struct ib_cq *cq, void *private) queue_work(cq->comp_wq, &cq->work); } +/* + * Attempt to spread ULP completion queues over a device's completion + * vectors so that all available CPU cores can help service the device's + * interrupt workload. This mechanism may be improved at a later point + * to dynamically take into account the system's actual workload. + */ +static int ib_get_comp_vector(struct ib_device *dev) +{ + static atomic_t cv; + + if (dev->num_comp_vectors > 1) + return atomic_inc_return(&cv) % dev->num_comp_vectors; + return 0; +} + /** * __ib_alloc_cq_user - allocate a completion queue * @dev: device to allocate the CQ for * @private: driver private data, accessible from cq->cq_context * @nr_cqe: number of CQEs to allocate - * @comp_vector: HCA completion vectors for this CQ + * @comp_vector: HCA completion vector for this CQ * @poll_ctx: context to poll the CQ from. * @caller: module owner name. * @udata: Valid user data or NULL for kernel object @@ -208,6 +223,9 @@ struct ib_cq *__ib_alloc_cq_user(struct ib_device *dev, void *private, cq->res.type = RDMA_RESTRACK_CQ; rdma_restrack_set_task(&cq->res, caller); + if (comp_vector == RDMA_CORE_ANY_COMPVEC) + cq_attr.comp_vector = ib_get_comp_vector(dev); + ret = dev->ops.create_cq(cq, &cq_attr, NULL); if (ret) goto out_free_wc; diff --git a/include/rdma/ib_verbs.h b/include/rdma/ib_verbs.h index c5f8a9f17063..547d36bcef7e 100644 --- a/include/rdma/ib_verbs.h +++ b/include/rdma/ib_verbs.h @@ -3669,6 +3669,9 @@ static inline int ib_post_recv(struct ib_qp *qp, return qp->device->ops.post_recv(qp, recv_wr, bad_recv_wr ? : &dummy); } +/* Tell the RDMA core to select an appropriate comp_vector */ +#define RDMA_CORE_ANY_COMPVEC ((int)(-1)) + struct ib_cq *__ib_alloc_cq_user(struct ib_device *dev, void *private, int nr_cqe, int comp_vector, enum ib_poll_context poll_ctx, diff --git a/net/sunrpc/xprtrdma/svc_rdma_transport.c b/net/sunrpc/xprtrdma/svc_rdma_transport.c index 3fe665152d95..7df6de6e9162 100644 --- a/net/sunrpc/xprtrdma/svc_rdma_transport.c +++ b/net/sunrpc/xprtrdma/svc_rdma_transport.c @@ -455,13 +455,15 @@ static struct svc_xprt *svc_rdma_accept(struct svc_xprt *xprt) goto errout; } newxprt->sc_sq_cq = ib_alloc_cq(dev, newxprt, newxprt->sc_sq_depth, - 0, IB_POLL_WORKQUEUE); + RDMA_CORE_ANY_COMPVEC, + IB_POLL_WORKQUEUE); if (IS_ERR(newxprt->sc_sq_cq)) { dprintk("svcrdma: error creating SQ CQ for connect request\n"); goto errout; } newxprt->sc_rq_cq = ib_alloc_cq(dev, newxprt, rq_depth, - 0, IB_POLL_WORKQUEUE); + RDMA_CORE_ANY_COMPVEC, + IB_POLL_WORKQUEUE); if (IS_ERR(newxprt->sc_rq_cq)) { dprintk("svcrdma: error creating RQ CQ for connect request\n"); goto errout; diff --git a/net/sunrpc/xprtrdma/verbs.c b/net/sunrpc/xprtrdma/verbs.c index 805b1f35e1ca..6e5989e2b8ed 100644 --- a/net/sunrpc/xprtrdma/verbs.c +++ b/net/sunrpc/xprtrdma/verbs.c @@ -523,8 +523,7 @@ int rpcrdma_ep_create(struct rpcrdma_xprt *r_xprt) sendcq = ib_alloc_cq(ia->ri_id->device, NULL, ep->rep_attr.cap.max_send_wr + 1, - ia->ri_id->device->num_comp_vectors > 1 ? 1 : 0, - IB_POLL_WORKQUEUE); + RDMA_CORE_ANY_COMPVEC, IB_POLL_WORKQUEUE); if (IS_ERR(sendcq)) { rc = PTR_ERR(sendcq); goto out1; @@ -532,7 +531,7 @@ int rpcrdma_ep_create(struct rpcrdma_xprt *r_xprt) recvcq = ib_alloc_cq(ia->ri_id->device, NULL, ep->rep_attr.cap.max_recv_wr + 1, - 0, IB_POLL_WORKQUEUE); + RDMA_CORE_ANY_COMPVEC, IB_POLL_WORKQUEUE); if (IS_ERR(recvcq)) { rc = PTR_ERR(recvcq); goto out2;