On Sun, Jul 28, 2019 at 12:30:27PM -0400, Chuck Lever wrote: > Send and Receive completion is handled on a single CPU selected at > the time each Completion Queue is allocated. Typically this is when > an initiator instantiates an RDMA transport, or when a target > accepts an RDMA connection. > > Some ULPs cannot open a connection per CPU to spread completion > workload across available CPUs and MSI vectors. For such ULPs, > provide an API that allows the RDMA core to select a completion > vector based on the device's complement of available comp_vecs. > > ULPs that invoke ib_alloc_cq() with only comp_vector 0 are converted > to use the new API so that their completion workloads interfere less > with each other. > > Suggested-by: Håkon Bugge <haakon.bugge@xxxxxxxxxx> > Signed-off-by: Chuck Lever <chuck.lever@xxxxxxxxxx> > Cc: <linux-cifs@xxxxxxxxxxxxxxx> > Cc: <v9fs-developer@xxxxxxxxxxxxxxxxxxxxx> > --- > drivers/infiniband/core/cq.c | 29 +++++++++++++++++++++++++++++ > drivers/infiniband/ulp/srpt/ib_srpt.c | 4 ++-- > fs/cifs/smbdirect.c | 10 ++++++---- > include/rdma/ib_verbs.h | 19 +++++++++++++++++++ > net/9p/trans_rdma.c | 6 +++--- > net/sunrpc/xprtrdma/svc_rdma_transport.c | 8 ++++---- > net/sunrpc/xprtrdma/verbs.c | 13 ++++++------- > 7 files changed, 69 insertions(+), 20 deletions(-) > > diff --git a/drivers/infiniband/core/cq.c b/drivers/infiniband/core/cq.c > index 7c59987..ea3bb0d 100644 > --- a/drivers/infiniband/core/cq.c > +++ b/drivers/infiniband/core/cq.c > @@ -253,6 +253,35 @@ struct ib_cq *__ib_alloc_cq_user(struct ib_device *dev, void *private, > EXPORT_SYMBOL(__ib_alloc_cq_user); > > /** > + * __ib_alloc_cq_any - allocate a completion queue > + * @dev: device to allocate the CQ for > + * @private: driver private data, accessible from cq->cq_context > + * @nr_cqe: number of CQEs to allocate > + * @poll_ctx: context to poll the CQ from. > + * @caller: module owner name. > + * > + * Attempt to spread ULP Completion Queues over each device's interrupt > + * vectors. > + */ > +struct ib_cq *__ib_alloc_cq_any(struct ib_device *dev, void *private, > + int nr_cqe, enum ib_poll_context poll_ctx, > + const char *caller) > +{ > + static atomic_t counter; > + int comp_vector; int comp_vector = 0; > + > + comp_vector = 0; This assignment is better to be part of initialization. > + if (dev->num_comp_vectors > 1) > + comp_vector = > + atomic_inc_return(&counter) % Don't we need manage "free list" of comp_vectors? Otherwise we can find ourselves providing already "taken" comp_vector. Just as an example: dev->num_comp_vectors = 2 num_online_cpus = 4 The following combination will give us same comp_vector: 1. ib_alloc_cq_any() 2. ib_alloc_cq_any() 3. ib_free_cq() 4. goto 2 > + min_t(int, dev->num_comp_vectors, num_online_cpus()); > + > + return __ib_alloc_cq_user(dev, private, nr_cqe, comp_vector, poll_ctx, > + caller, NULL); > +} > +EXPORT_SYMBOL(__ib_alloc_cq_any); > + > +/** > * ib_free_cq_user - free a completion queue > * @cq: completion queue to free. > * @udata: User data or NULL for kernel object > diff --git a/drivers/infiniband/ulp/srpt/ib_srpt.c b/drivers/infiniband/ulp/srpt/ib_srpt.c > index 1a039f1..e25c70a 100644 > --- a/drivers/infiniband/ulp/srpt/ib_srpt.c > +++ b/drivers/infiniband/ulp/srpt/ib_srpt.c > @@ -1767,8 +1767,8 @@ static int srpt_create_ch_ib(struct srpt_rdma_ch *ch) > goto out; > > retry: > - ch->cq = ib_alloc_cq(sdev->device, ch, ch->rq_size + sq_size, > - 0 /* XXX: spread CQs */, IB_POLL_WORKQUEUE); > + ch->cq = ib_alloc_cq_any(sdev->device, ch, ch->rq_size + sq_size, > + IB_POLL_WORKQUEUE); > if (IS_ERR(ch->cq)) { > ret = PTR_ERR(ch->cq); > pr_err("failed to create CQ cqe= %d ret= %d\n", > diff --git a/fs/cifs/smbdirect.c b/fs/cifs/smbdirect.c > index cd07e53..3c91fa9 100644 > --- a/fs/cifs/smbdirect.c > +++ b/fs/cifs/smbdirect.c > @@ -1654,15 +1654,17 @@ static struct smbd_connection *_smbd_get_connection( > > info->send_cq = NULL; > info->recv_cq = NULL; > - info->send_cq = ib_alloc_cq(info->id->device, info, > - info->send_credit_target, 0, IB_POLL_SOFTIRQ); > + info->send_cq = > + ib_alloc_cq_any(info->id->device, info, > + info->send_credit_target, IB_POLL_SOFTIRQ); > if (IS_ERR(info->send_cq)) { > info->send_cq = NULL; > goto alloc_cq_failed; > } > > - info->recv_cq = ib_alloc_cq(info->id->device, info, > - info->receive_credit_max, 0, IB_POLL_SOFTIRQ); > + info->recv_cq = > + ib_alloc_cq_any(info->id->device, info, > + info->receive_credit_max, IB_POLL_SOFTIRQ); > if (IS_ERR(info->recv_cq)) { > info->recv_cq = NULL; > goto alloc_cq_failed; > diff --git a/include/rdma/ib_verbs.h b/include/rdma/ib_verbs.h > index c5f8a9f..2a1523cc 100644 > --- a/include/rdma/ib_verbs.h > +++ b/include/rdma/ib_verbs.h > @@ -3711,6 +3711,25 @@ static inline struct ib_cq *ib_alloc_cq(struct ib_device *dev, void *private, > NULL); > } > > +struct ib_cq *__ib_alloc_cq_any(struct ib_device *dev, void *private, > + int nr_cqe, enum ib_poll_context poll_ctx, > + const char *caller); > + > +/** > + * ib_alloc_cq_any: Allocate kernel CQ > + * @dev: The IB device > + * @private: Private data attached to the CQE > + * @nr_cqe: Number of CQEs in the CQ > + * @poll_ctx: Context used for polling the CQ > + */ > +static inline struct ib_cq *ib_alloc_cq_any(struct ib_device *dev, > + void *private, int nr_cqe, > + enum ib_poll_context poll_ctx) > +{ > + return __ib_alloc_cq_any(dev, private, nr_cqe, poll_ctx, > + KBUILD_MODNAME); > +} This should be macro and not inline function, because compiler can be instructed do not inline functions (there is kconfig option for that) and it will cause that wrong KBUILD_MODNAME will be inserted (ib_core instead of ulp). And yes, commit c4367a26357b ("IB: Pass uverbs_attr_bundle down ib_x destroy path") did it completely wrong. > + > /** > * ib_free_cq_user - Free kernel/user CQ > * @cq: The CQ to free > diff --git a/net/9p/trans_rdma.c b/net/9p/trans_rdma.c > index bac8dad..b21c3c2 100644 > --- a/net/9p/trans_rdma.c > +++ b/net/9p/trans_rdma.c > @@ -685,9 +685,9 @@ static int p9_rdma_bind_privport(struct p9_trans_rdma *rdma) > goto error; > > /* Create the Completion Queue */ > - rdma->cq = ib_alloc_cq(rdma->cm_id->device, client, > - opts.sq_depth + opts.rq_depth + 1, > - 0, IB_POLL_SOFTIRQ); > + rdma->cq = ib_alloc_cq_any(rdma->cm_id->device, client, > + opts.sq_depth + opts.rq_depth + 1, > + IB_POLL_SOFTIRQ); > if (IS_ERR(rdma->cq)) > goto error; > > diff --git a/net/sunrpc/xprtrdma/svc_rdma_transport.c b/net/sunrpc/xprtrdma/svc_rdma_transport.c > index 3fe6651..4d3db6e 100644 > --- a/net/sunrpc/xprtrdma/svc_rdma_transport.c > +++ b/net/sunrpc/xprtrdma/svc_rdma_transport.c > @@ -454,14 +454,14 @@ static struct svc_xprt *svc_rdma_accept(struct svc_xprt *xprt) > dprintk("svcrdma: error creating PD for connect request\n"); > goto errout; > } > - newxprt->sc_sq_cq = ib_alloc_cq(dev, newxprt, newxprt->sc_sq_depth, > - 0, IB_POLL_WORKQUEUE); > + newxprt->sc_sq_cq = ib_alloc_cq_any(dev, newxprt, newxprt->sc_sq_depth, > + IB_POLL_WORKQUEUE); > if (IS_ERR(newxprt->sc_sq_cq)) { > dprintk("svcrdma: error creating SQ CQ for connect request\n"); > goto errout; > } > - newxprt->sc_rq_cq = ib_alloc_cq(dev, newxprt, rq_depth, > - 0, IB_POLL_WORKQUEUE); > + newxprt->sc_rq_cq = > + ib_alloc_cq_any(dev, newxprt, rq_depth, IB_POLL_WORKQUEUE); > if (IS_ERR(newxprt->sc_rq_cq)) { > dprintk("svcrdma: error creating RQ CQ for connect request\n"); > goto errout; > diff --git a/net/sunrpc/xprtrdma/verbs.c b/net/sunrpc/xprtrdma/verbs.c > index 805b1f35..b10aa16 100644 > --- a/net/sunrpc/xprtrdma/verbs.c > +++ b/net/sunrpc/xprtrdma/verbs.c > @@ -521,18 +521,17 @@ int rpcrdma_ep_create(struct rpcrdma_xprt *r_xprt) > init_waitqueue_head(&ep->rep_connect_wait); > ep->rep_receive_count = 0; > > - sendcq = ib_alloc_cq(ia->ri_id->device, NULL, > - ep->rep_attr.cap.max_send_wr + 1, > - ia->ri_id->device->num_comp_vectors > 1 ? 1 : 0, > - IB_POLL_WORKQUEUE); > + sendcq = ib_alloc_cq_any(ia->ri_id->device, NULL, > + ep->rep_attr.cap.max_send_wr + 1, > + IB_POLL_WORKQUEUE); > if (IS_ERR(sendcq)) { > rc = PTR_ERR(sendcq); > goto out1; > } > > - recvcq = ib_alloc_cq(ia->ri_id->device, NULL, > - ep->rep_attr.cap.max_recv_wr + 1, > - 0, IB_POLL_WORKQUEUE); > + recvcq = ib_alloc_cq_any(ia->ri_id->device, NULL, > + ep->rep_attr.cap.max_recv_wr + 1, > + IB_POLL_WORKQUEUE); > if (IS_ERR(recvcq)) { > rc = PTR_ERR(recvcq); > goto out2; >