Re: [PATCH rdma-next 2/3] RDMA: Handle PD allocations by IB/core

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

 



On Mon, Jan 28, 2019 at 06:12:58PM +0000, Jason Gunthorpe wrote:
> On Sun, Jan 27, 2019 at 08:56:08PM +0200, Leon Romanovsky wrote:
>
> > 41 files changed, 270 insertions(+), 435 deletions(-)
>
> Wow!

You can extrapolate how much code we will remove once we will convert
all other objects too.

>
> > -	pd = ib_dev->ops.alloc_pd(ib_dev, uobj->context, &attrs->driver_udata);
> > -	if (IS_ERR(pd)) {
> > -		ret = PTR_ERR(pd);
> > +	pd = kzalloc(RDMA_DRIVER_SIZE(ib_dev, ib_pd), GFP_KERNEL);
> > +	if (!pd) {
> > +		ret = -ENOMEM;
>
> If we are going to add macros, I think this is better written as
>
> #define rdma_zalloc_drv_obj(ib_dev, ib_type)                                   \
> 	((struct obj *)kzalloc(ib_dev->ops->size##ib_type, GFP_KERNEL))
>
> pd = rdma_zalloc_drv_obj(ib_dev, ib_pd);
>
> All drivers should be able to rely on zero initialized objects, and
> this continues to propogate the type safey since we have it..

ok, I'll drop RDMA_DRIVER_SIZE() macro.

>
>
> >  		goto err;
> >  	}
> >
> > @@ -413,11 +413,15 @@ static int ib_uverbs_alloc_pd(struct uverbs_attr_bundle *attrs)
> >  	pd->uobject = uobj;
> >  	pd->__internal_mr = NULL;
> >  	atomic_set(&pd->usecnt, 0);
> > +	rdma_rt_set_type(&pd->res, RDMA_RESTRACK_PD);
> > +
> > +	ret = ib_dev->ops.alloc_pd(pd, uobj->context, &attrs->driver_udata);
> > +	if (ret)
> > +		goto err_alloc;
> >
> >  	uobj->object = pd;
> >  	memset(&resp, 0, sizeof resp);
> >  	resp.pd_handle = uobj->id;
> > -	rdma_rt_set_type(&pd->res, RDMA_RESTRACK_PD);
> >  	rdma_restrack_uadd(&pd->res);
> >
> >  	ret = uverbs_response(attrs, &resp, sizeof(resp));
> > @@ -428,7 +432,8 @@ static int ib_uverbs_alloc_pd(struct uverbs_attr_bundle *attrs)
> >
> >  err_copy:
> >  	ib_dealloc_pd(pd);
> > -
> > +err_alloc:
> > +	kfree(pd);
> >  err:
> >  	uobj_alloc_abort(uobj);
> >  	return ret;
> > diff --git a/drivers/infiniband/core/uverbs_std_types.c b/drivers/infiniband/core/uverbs_std_types.c
> > index cbc72312eb41..f224cb727224 100644
> > +++ b/drivers/infiniband/core/uverbs_std_types.c
> > @@ -188,7 +188,7 @@ static int uverbs_free_pd(struct ib_uobject *uobject,
> >  	if (ret)
> >  		return ret;
> >
> > -	ib_dealloc_pd((struct ib_pd *)uobject->object);
> > +	ib_dealloc_pd(pd);
> >  	return 0;
> >  }
> >
> > diff --git a/drivers/infiniband/core/verbs.c b/drivers/infiniband/core/verbs.c
> > index 814cbc814cf4..6acede55c9e8 100644
> > +++ b/drivers/infiniband/core/verbs.c
> > @@ -254,10 +254,11 @@ struct ib_pd *__ib_alloc_pd(struct ib_device *device, unsigned int flags,
> >  {
> >  	struct ib_pd *pd;
> >  	int mr_access_flags = 0;
> > +	int ret;
> >
> > -	pd = device->ops.alloc_pd(device, NULL, NULL);
> > -	if (IS_ERR(pd))
> > -		return pd;
> > +	pd = kzalloc(RDMA_DRIVER_SIZE(device, ib_pd), GFP_KERNEL);
> > +	if (!pd)
> > +		return ERR_PTR(-ENOMEM);
>
> Doesn't this miss

This pd will be released inside of ib_dealloc_pd(pd), because we
successfully called to driver's alloc_pd and need clean it with
driver's dealloc_pd.

>
> 		mr = pd->device->ops.get_dma_mr(pd, mr_access_flags);
> 		if (IS_ERR(mr)) {
> 			ib_dealloc_pd(pd);
> 			return ERR_CAST(mr);
> 		}
>
> ?
>
> >  	pd->device = device;
> >  	pd->uobject = NULL;
> > @@ -265,6 +266,16 @@ struct ib_pd *__ib_alloc_pd(struct ib_device *device, unsigned int flags,
> >  	atomic_set(&pd->usecnt, 0);
> >  	pd->flags = flags;
> >
> > +	rdma_rt_set_type(&pd->res, RDMA_RESTRACK_PD);
> > +	rdma_restrack_set_task(&pd->res, caller);
> > +
> > +	ret = device->ops.alloc_pd(pd, NULL, NULL);
> > +	if (ret) {
> > +		kfree(pd);
> > +		return ERR_PTR(ret);
> > +	}
> > +	rdma_restrack_kadd(&pd->res);
> > +
> >  	if (device->attrs.device_cap_flags & IB_DEVICE_LOCAL_DMA_LKEY)
> >  		pd->local_dma_lkey = device->local_dma_lkey;
> >  	else
> > @@ -275,10 +286,6 @@ struct ib_pd *__ib_alloc_pd(struct ib_device *device, unsigned int flags,
> >  		mr_access_flags |= IB_ACCESS_REMOTE_READ | IB_ACCESS_REMOTE_WRITE;
> >  	}
> >
> > -	rdma_rt_set_type(&pd->res, RDMA_RESTRACK_PD);
> > -	rdma_restrack_set_task(&pd->res, caller);
> > -	rdma_restrack_kadd(&pd->res);
> > -
> >  	if (mr_access_flags) {
> >  		struct ib_mr *mr;
> >
> > @@ -329,10 +336,8 @@ void ib_dealloc_pd(struct ib_pd *pd)
> >  	WARN_ON(atomic_read(&pd->usecnt));
> >
> >  	rdma_restrack_del(&pd->res);
> > -	/* Making delalloc_pd a void return is a WIP, no driver should return
> > -	   an error here. */
> > -	ret = pd->device->ops.dealloc_pd(pd);
> > -	WARN_ONCE(ret, "Infiniband HW driver failed dealloc_pd");
> > +	pd->device->ops.dealloc_pd(pd);
> > +	kfree(pd);
> >  }
> >  EXPORT_SYMBOL(ib_dealloc_pd);
> >
> > diff --git a/drivers/infiniband/hw/bnxt_re/ib_verbs.c b/drivers/infiniband/hw/bnxt_re/ib_verbs.c
> > index 9bc637e49faa..e71fee5f42f4 100644
> > +++ b/drivers/infiniband/hw/bnxt_re/ib_verbs.c
> > @@ -563,41 +563,29 @@ static int bnxt_re_create_fence_mr(struct bnxt_re_pd *pd)
> >  }
> >
> >  /* Protection Domains */
> > -int bnxt_re_dealloc_pd(struct ib_pd *ib_pd)
> > +void bnxt_re_dealloc_pd(struct ib_pd *ib_pd)
> >  {
> >  	struct bnxt_re_pd *pd = container_of(ib_pd, struct bnxt_re_pd, ib_pd);
> >  	struct bnxt_re_dev *rdev = pd->rdev;
> > -	int rc;
> >
> >  	bnxt_re_destroy_fence_mr(pd);
> >
> > -	if (pd->qplib_pd.id) {
> > -		rc = bnxt_qplib_dealloc_pd(&rdev->qplib_res,
> > -					   &rdev->qplib_res.pd_tbl,
> > -					   &pd->qplib_pd);
> > -		if (rc)
> > -			dev_err(rdev_to_dev(rdev), "Failed to deallocate HW PD");
>
> Should we be deleting these warnings? Does indicate a driver problem.

bnxt_qplib_dealloc_pd() already prints such warning inside.

>
> Maybe making destory_pd void should be a another patch?

I anyway need to touch dealloc_pd path to remove kfree(pd) from all
drivers, deleting return value makes perfect sense to do in the same
patch.

>
> > diff --git a/drivers/infiniband/hw/bnxt_re/main.c b/drivers/infiniband/hw/bnxt_re/main.c
> > index 16eecfa5882c..20e41521455f 100644
> > +++ b/drivers/infiniband/hw/bnxt_re/main.c
> > @@ -575,6 +575,7 @@ static const struct ib_device_ops bnxt_re_dev_ops = {
> >  	.alloc_hw_stats = bnxt_re_ib_alloc_hw_stats,
> >  	.alloc_mr = bnxt_re_alloc_mr,
> >  	.alloc_pd = bnxt_re_alloc_pd,
> > +	INIT_RDMA_DRIVER_SIZE(ib_pd, bnxt_re_pd, ib_pd),
> >  	.alloc_ucontext = bnxt_re_alloc_ucontext,
> >  	.create_ah = bnxt_re_create_ah,
> >  	.create_cq = bnxt_re_create_cq,
>
> I think we should keep these sorted at the end of the struct. Putting
> them here makes keep-sorted harder.

No problem

>
> > @@ -742,12 +743,16 @@ static int hns_roce_v1_rsv_lp_qp(struct hns_roce_dev *hr_dev)
> >  	free_mr->mr_free_cq->ib_cq.cq_context		= NULL;
> >  	atomic_set(&free_mr->mr_free_cq->ib_cq.usecnt, 0);
> >
> > -	pd = hns_roce_alloc_pd(&hr_dev->ib_dev, NULL, NULL);
> > -	if (IS_ERR(pd)) {
> > -		dev_err(dev, "Create pd for reserved loop qp failed!");
> > -		ret = -ENOMEM;
> > +	ibdev = &hr_dev->ib_dev;
> > +	pd = kzalloc(RDMA_DRIVER_SIZE(ibdev, ib_pd), GFP_KERNEL);
> > +	if (pd)
> > +		goto alloc_mem_failed;
> > +
> > +	pd->device  = ibdev;
> > +	ret = hns_roce_alloc_pd(pd, NULL, NULL);
> > +	if (ret)
> >  		goto alloc_pd_failed;
> > -	}
>
> So this thing doesn't exist in restrack then? Weird.. It would be
> nicer if this driver would call the verbs instead..

It was influenced by mlx4, mlx4/mlx5 has the same bad pattern in many
places.

>
> > diff --git a/include/rdma/ib_verbs.h b/include/rdma/ib_verbs.h
> > index a1f097fa33f2..cf59c61cb953 100644
> > +++ b/include/rdma/ib_verbs.h
> > @@ -2380,10 +2380,10 @@ struct ib_device_ops {
> >  	int (*dealloc_ucontext)(struct ib_ucontext *context);
> >  	int (*mmap)(struct ib_ucontext *context, struct vm_area_struct *vma);
> >  	void (*disassociate_ucontext)(struct ib_ucontext *ibcontext);
> > -	struct ib_pd *(*alloc_pd)(struct ib_device *device,
> > -				  struct ib_ucontext *context,
> > -				  struct ib_udata *udata);
> > -	int (*dealloc_pd)(struct ib_pd *pd);
> > +	int (*alloc_pd)(struct ib_pd *pd, struct ib_ucontext *context,
> > +			struct ib_udata *udata);
> > +	DECLARE_RDMA_DRIVER_SIZE(ib_pd);
>
> These should be a sorted block at the end, putting them here will
> waste memory due to alignment.

No problem.

>
> Jason

Attachment: signature.asc
Description: PGP signature


[Index of Archives]     [Linux USB Devel]     [Video for Linux]     [Linux Audio Users]     [Photo]     [Yosemite News]     [Yosemite Photos]     [Linux Kernel]     [Linux SCSI]     [XFree86]

  Powered by Linux