Send and Receive completion is handled on a single CPU selected at the time each Completion Queue is allocated. Typically this is when an initiator instantiates an RDMA transport, or when a target accepts an RDMA connection. Some ULPs cannot open a connection per CPU to spread completion workload across available CPUs and MSI vectors. For such ULPs, provide an API that allows the RDMA core to select a completion vector based on the device's complement of available comp_vecs. ULPs that invoke ib_alloc_cq() with only comp_vector 0 are converted to use the new API so that their completion workloads interfere less with each other. Suggested-by: Håkon Bugge <haakon.bugge@xxxxxxxxxx> Signed-off-by: Chuck Lever <chuck.lever@xxxxxxxxxx> Reviewed-by: Leon Romanovsky <leonro@xxxxxxxxxxxx> Cc: <linux-cifs@xxxxxxxxxxxxxxx> Cc: <v9fs-developer@xxxxxxxxxxxxxxxxxxxxx> --- net/sunrpc/xprtrdma/svc_rdma_transport.c | 8 ++++---- net/sunrpc/xprtrdma/verbs.c | 13 ++++++------- 2 files changed, 10 insertions(+), 11 deletions(-) Changes since v2: - Reviewed-by tag added - Fix initialization of comp_vector value - Clarify comments based on review diff --git a/drivers/infiniband/core/cq.c b/drivers/infiniband/core/cq.c index 7c59987..bbfded6 100644 --- a/drivers/infiniband/core/cq.c +++ b/drivers/infiniband/core/cq.c @@ -253,6 +253,34 @@ struct ib_cq *__ib_alloc_cq_user(struct ib_device *dev, void *private, EXPORT_SYMBOL(__ib_alloc_cq_user); /** + * __ib_alloc_cq_any - allocate a completion queue + * @dev: device to allocate the CQ for + * @private: driver private data, accessible from cq->cq_context + * @nr_cqe: number of CQEs to allocate + * @poll_ctx: context to poll the CQ from + * @caller: module owner name + * + * Attempt to spread ULP Completion Queues over each device's interrupt + * vectors. A simple best-effort mechanism is used. + */ +struct ib_cq *__ib_alloc_cq_any(struct ib_device *dev, void *private, + int nr_cqe, enum ib_poll_context poll_ctx, + const char *caller) +{ + static atomic_t counter; + int comp_vector = 0; + + if (dev->num_comp_vectors > 1) + comp_vector = + atomic_inc_return(&counter) % + min_t(int, dev->num_comp_vectors, num_online_cpus()); + + return __ib_alloc_cq_user(dev, private, nr_cqe, comp_vector, poll_ctx, + caller, NULL); +} +EXPORT_SYMBOL(__ib_alloc_cq_any); + +/** * ib_free_cq_user - free a completion queue * @cq: completion queue to free. * @udata: User data or NULL for kernel object diff --git a/drivers/infiniband/ulp/srpt/ib_srpt.c b/drivers/infiniband/ulp/srpt/ib_srpt.c index 1a039f1..e25c70a 100644 --- a/drivers/infiniband/ulp/srpt/ib_srpt.c +++ b/drivers/infiniband/ulp/srpt/ib_srpt.c @@ -1767,8 +1767,8 @@ static int srpt_create_ch_ib(struct srpt_rdma_ch *ch) goto out; retry: - ch->cq = ib_alloc_cq(sdev->device, ch, ch->rq_size + sq_size, - 0 /* XXX: spread CQs */, IB_POLL_WORKQUEUE); + ch->cq = ib_alloc_cq_any(sdev->device, ch, ch->rq_size + sq_size, + IB_POLL_WORKQUEUE); if (IS_ERR(ch->cq)) { ret = PTR_ERR(ch->cq); pr_err("failed to create CQ cqe= %d ret= %d\n", diff --git a/fs/cifs/smbdirect.c b/fs/cifs/smbdirect.c index cd07e53..3c91fa9 100644 --- a/fs/cifs/smbdirect.c +++ b/fs/cifs/smbdirect.c @@ -1654,15 +1654,17 @@ static struct smbd_connection *_smbd_get_connection( info->send_cq = NULL; info->recv_cq = NULL; - info->send_cq = ib_alloc_cq(info->id->device, info, - info->send_credit_target, 0, IB_POLL_SOFTIRQ); + info->send_cq = + ib_alloc_cq_any(info->id->device, info, + info->send_credit_target, IB_POLL_SOFTIRQ); if (IS_ERR(info->send_cq)) { info->send_cq = NULL; goto alloc_cq_failed; } - info->recv_cq = ib_alloc_cq(info->id->device, info, - info->receive_credit_max, 0, IB_POLL_SOFTIRQ); + info->recv_cq = + ib_alloc_cq_any(info->id->device, info, + info->receive_credit_max, IB_POLL_SOFTIRQ); if (IS_ERR(info->recv_cq)) { info->recv_cq = NULL; goto alloc_cq_failed; diff --git a/include/rdma/ib_verbs.h b/include/rdma/ib_verbs.h index c5f8a9f..2a1523cc 100644 --- a/include/rdma/ib_verbs.h +++ b/include/rdma/ib_verbs.h @@ -3711,6 +3711,25 @@ static inline struct ib_cq *ib_alloc_cq(struct ib_device *dev, void *private, NULL); } +struct ib_cq *__ib_alloc_cq_any(struct ib_device *dev, void *private, + int nr_cqe, enum ib_poll_context poll_ctx, + const char *caller); + +/** + * ib_alloc_cq_any: Allocate kernel CQ + * @dev: The IB device + * @private: Private data attached to the CQE + * @nr_cqe: Number of CQEs in the CQ + * @poll_ctx: Context used for polling the CQ + */ +static inline struct ib_cq *ib_alloc_cq_any(struct ib_device *dev, + void *private, int nr_cqe, + enum ib_poll_context poll_ctx) +{ + return __ib_alloc_cq_any(dev, private, nr_cqe, poll_ctx, + KBUILD_MODNAME); +} + /** * ib_free_cq_user - Free kernel/user CQ * @cq: The CQ to free diff --git a/net/9p/trans_rdma.c b/net/9p/trans_rdma.c index bac8dad..b21c3c2 100644 --- a/net/9p/trans_rdma.c +++ b/net/9p/trans_rdma.c @@ -685,9 +685,9 @@ static int p9_rdma_bind_privport(struct p9_trans_rdma *rdma) goto error; /* Create the Completion Queue */ - rdma->cq = ib_alloc_cq(rdma->cm_id->device, client, - opts.sq_depth + opts.rq_depth + 1, - 0, IB_POLL_SOFTIRQ); + rdma->cq = ib_alloc_cq_any(rdma->cm_id->device, client, + opts.sq_depth + opts.rq_depth + 1, + IB_POLL_SOFTIRQ); if (IS_ERR(rdma->cq)) goto error; diff --git a/net/sunrpc/xprtrdma/svc_rdma_transport.c b/net/sunrpc/xprtrdma/svc_rdma_transport.c index 3fe6651..4d3db6e 100644 --- a/net/sunrpc/xprtrdma/svc_rdma_transport.c +++ b/net/sunrpc/xprtrdma/svc_rdma_transport.c @@ -454,14 +454,14 @@ static struct svc_xprt *svc_rdma_accept(struct svc_xprt *xprt) dprintk("svcrdma: error creating PD for connect request\n"); goto errout; } - newxprt->sc_sq_cq = ib_alloc_cq(dev, newxprt, newxprt->sc_sq_depth, - 0, IB_POLL_WORKQUEUE); + newxprt->sc_sq_cq = ib_alloc_cq_any(dev, newxprt, newxprt->sc_sq_depth, + IB_POLL_WORKQUEUE); if (IS_ERR(newxprt->sc_sq_cq)) { dprintk("svcrdma: error creating SQ CQ for connect request\n"); goto errout; } - newxprt->sc_rq_cq = ib_alloc_cq(dev, newxprt, rq_depth, - 0, IB_POLL_WORKQUEUE); + newxprt->sc_rq_cq = + ib_alloc_cq_any(dev, newxprt, rq_depth, IB_POLL_WORKQUEUE); if (IS_ERR(newxprt->sc_rq_cq)) { dprintk("svcrdma: error creating RQ CQ for connect request\n"); goto errout; diff --git a/net/sunrpc/xprtrdma/verbs.c b/net/sunrpc/xprtrdma/verbs.c index 805b1f35..b10aa16 100644 --- a/net/sunrpc/xprtrdma/verbs.c +++ b/net/sunrpc/xprtrdma/verbs.c @@ -521,18 +521,17 @@ int rpcrdma_ep_create(struct rpcrdma_xprt *r_xprt) init_waitqueue_head(&ep->rep_connect_wait); ep->rep_receive_count = 0; - sendcq = ib_alloc_cq(ia->ri_id->device, NULL, - ep->rep_attr.cap.max_send_wr + 1, - ia->ri_id->device->num_comp_vectors > 1 ? 1 : 0, - IB_POLL_WORKQUEUE); + sendcq = ib_alloc_cq_any(ia->ri_id->device, NULL, + ep->rep_attr.cap.max_send_wr + 1, + IB_POLL_WORKQUEUE); if (IS_ERR(sendcq)) { rc = PTR_ERR(sendcq); goto out1; } - recvcq = ib_alloc_cq(ia->ri_id->device, NULL, - ep->rep_attr.cap.max_recv_wr + 1, - 0, IB_POLL_WORKQUEUE); + recvcq = ib_alloc_cq_any(ia->ri_id->device, NULL, + ep->rep_attr.cap.max_recv_wr + 1, + IB_POLL_WORKQUEUE); if (IS_ERR(recvcq)) { rc = PTR_ERR(recvcq); goto out2;