> Subject: [PATCHv2 1/1] RDMA/irdma: Add support for dmabuf pin memory > regions > > From: Zhu Yanjun <yanjun.zhu@xxxxxxxxx> > > This is a followup to the EFA dmabuf[1]. Irdma driver currently does not support > on-demand-paging(ODP). So it uses habanalabs as the dmabuf exporter, and > irdma as the importer to allow for peer2peer access through libibverbs. > > In this commit, the function ib_umem_dmabuf_get_pinned() is used. > This function is introduced in EFA dmabuf[1] which allows the driver to get a > dmabuf umem which is pinned and does not require move_notify callback > implementation. The returned umem is pinned and DMA mapped like standard cpu > umems, and is released through ib_umem_release(). > > [1]https://lore.kernel.org/lkml/20211007114018.GD2688930@xxxxxxxx/t/ > > Signed-off-by: Zhu Yanjun <yanjun.zhu@xxxxxxxxx> Is there a corresponding user-space patch? > --- > V1->V2: Fix the build warning by adding a static > --- > drivers/infiniband/hw/irdma/verbs.c | 158 ++++++++++++++++++++++++++++ > 1 file changed, 158 insertions(+) > > diff --git a/drivers/infiniband/hw/irdma/verbs.c b/drivers/infiniband/hw/irdma/verbs.c > index f6973ea55eda..1572baa93856 100644 > --- a/drivers/infiniband/hw/irdma/verbs.c > +++ b/drivers/infiniband/hw/irdma/verbs.c > @@ -2912,6 +2912,163 @@ static struct ib_mr *irdma_reg_user_mr(struct ib_pd > *pd, u64 start, u64 len, > return ERR_PTR(err); > } > > +static struct ib_mr *irdma_reg_user_mr_dmabuf(struct ib_pd *pd, u64 start, > + u64 len, u64 virt, > + int fd, int access, > + struct ib_udata *udata) > +{ > + struct irdma_device *iwdev = to_iwdev(pd->device); > + struct irdma_ucontext *ucontext; > + struct irdma_pble_alloc *palloc; > + struct irdma_pbl *iwpbl; > + struct irdma_mr *iwmr; > + struct irdma_mem_reg_req req; > + u32 total, stag = 0; > + u8 shadow_pgcnt = 1; > + bool use_pbles = false; > + unsigned long flags; > + int err = -EINVAL; > + struct ib_umem_dmabuf *umem_dmabuf; > + > + if (len > iwdev->rf->sc_dev.hw_attrs.max_mr_size) > + return ERR_PTR(-EINVAL); > + > + if (udata->inlen < IRDMA_MEM_REG_MIN_REQ_LEN) > + return ERR_PTR(-EINVAL); > + > + umem_dmabuf = ib_umem_dmabuf_get_pinned(pd->device, start, len, fd, > + access); > + if (IS_ERR(umem_dmabuf)) { > + err = PTR_ERR(umem_dmabuf); > + ibdev_dbg(&iwdev->ibdev, "Failed to get dmabuf umem[%d]\n", > err); > + return ERR_PTR(err); > + } > + > + if (ib_copy_from_udata(&req, udata, min(sizeof(req), udata->inlen))) { > + ib_umem_release(&umem_dmabuf->umem); > + return ERR_PTR(-EFAULT); > + } > + > + iwmr = kzalloc(sizeof(*iwmr), GFP_KERNEL); > + if (!iwmr) { > + ib_umem_release(&umem_dmabuf->umem); > + return ERR_PTR(-ENOMEM); > + } > + > + iwpbl = &iwmr->iwpbl; > + iwpbl->iwmr = iwmr; > + iwmr->region = &umem_dmabuf->umem; > + iwmr->ibmr.pd = pd; > + iwmr->ibmr.device = pd->device; > + iwmr->ibmr.iova = virt; > + iwmr->page_size = PAGE_SIZE; > + > + if (req.reg_type == IRDMA_MEMREG_TYPE_MEM) { > + iwmr->page_size = ib_umem_find_best_pgsz(iwmr->region, > + iwdev->rf- > >sc_dev.hw_attrs.page_size_cap, > + virt); > + if (unlikely(!iwmr->page_size)) { > + kfree(iwmr); > + ib_umem_release(iwmr->region); > + return ERR_PTR(-EOPNOTSUPP); > + } > + } > + iwmr->len = iwmr->region->length; > + iwpbl->user_base = virt; > + palloc = &iwpbl->pble_alloc; > + iwmr->type = req.reg_type; > + iwmr->page_cnt = ib_umem_num_dma_blocks(iwmr->region, > +iwmr->page_size); > + > + switch (req.reg_type) { > + case IRDMA_MEMREG_TYPE_QP: > + total = req.sq_pages + req.rq_pages + shadow_pgcnt; > + if (total > iwmr->page_cnt) { > + err = -EINVAL; > + goto error; > + } > + total = req.sq_pages + req.rq_pages; > + use_pbles = (total > 2); > + err = irdma_handle_q_mem(iwdev, &req, iwpbl, use_pbles); > + if (err) > + goto error; > + > + ucontext = rdma_udata_to_drv_context(udata, struct > irdma_ucontext, > + ibucontext); > + spin_lock_irqsave(&ucontext->qp_reg_mem_list_lock, flags); > + list_add_tail(&iwpbl->list, &ucontext->qp_reg_mem_list); > + iwpbl->on_list = true; > + spin_unlock_irqrestore(&ucontext->qp_reg_mem_list_lock, flags); > + break; > + case IRDMA_MEMREG_TYPE_CQ: > + if (iwdev->rf->sc_dev.hw_attrs.uk_attrs.feature_flags & > IRDMA_FEATURE_CQ_RESIZE) > + shadow_pgcnt = 0; > + total = req.cq_pages + shadow_pgcnt; > + if (total > iwmr->page_cnt) { > + err = -EINVAL; > + goto error; > + } > + > + use_pbles = (req.cq_pages > 1); > + err = irdma_handle_q_mem(iwdev, &req, iwpbl, use_pbles); > + if (err) > + goto error; > + > + ucontext = rdma_udata_to_drv_context(udata, struct > irdma_ucontext, > + ibucontext); > + spin_lock_irqsave(&ucontext->cq_reg_mem_list_lock, flags); > + list_add_tail(&iwpbl->list, &ucontext->cq_reg_mem_list); > + iwpbl->on_list = true; > + spin_unlock_irqrestore(&ucontext->cq_reg_mem_list_lock, flags); > + break; I don't think we want to do this for user QP, CQ pinned memory. In fact, it will just be dead-code. The irdma provider implementation of the ibv_reg_dmabuf_mr will just default to IRDMA_MEMREG_TYPE_MEM type similar to how irdma_ureg_mr is implemented. https://github.com/linux-rdma/rdma-core/blob/master/providers/irdma/uverbs.c#L128 It should simplify this function a lot. > + case IRDMA_MEMREG_TYPE_MEM: > + use_pbles = (iwmr->page_cnt != 1); > + > + err = irdma_setup_pbles(iwdev->rf, iwmr, use_pbles, false); > + if (err) > + goto error; > + > + if (use_pbles) { > + err = irdma_check_mr_contiguous(palloc, > + iwmr->page_size); > + if (err) { > + irdma_free_pble(iwdev->rf->pble_rsrc, palloc); > + iwpbl->pbl_allocated = false; > + } > + } > + > + stag = irdma_create_stag(iwdev); > + if (!stag) { > + err = -ENOMEM; > + goto error; > + } > + > + iwmr->stag = stag; > + iwmr->ibmr.rkey = stag; > + iwmr->ibmr.lkey = stag; > + err = irdma_hwreg_mr(iwdev, iwmr, access); > + if (err) { > + irdma_free_stag(iwdev, stag); > + goto error; > + } > + > + break; > + default: > + goto error; > + } > + > + iwmr->type = req.reg_type; > + > + return &iwmr->ibmr; > + > +error: > + if (palloc->level != PBLE_LEVEL_0 && iwpbl->pbl_allocated) > + irdma_free_pble(iwdev->rf->pble_rsrc, palloc); > + ib_umem_release(iwmr->region); > + kfree(iwmr); Ideally we want unwind in the reverse order of allocation. > + > + return ERR_PTR(err); > +} > + > /** > * irdma_reg_phys_mr - register kernel physical memory > * @pd: ibpd pointer > @@ -4418,6 +4575,7 @@ static const struct ib_device_ops irdma_dev_ops = { > .query_port = irdma_query_port, > .query_qp = irdma_query_qp, > .reg_user_mr = irdma_reg_user_mr, > + .reg_user_mr_dmabuf = irdma_reg_user_mr_dmabuf, > .req_notify_cq = irdma_req_notify_cq, > .resize_cq = irdma_resize_cq, > INIT_RDMA_OBJ_SIZE(ib_pd, irdma_pd, ibpd), > -- > 2.27.0