Implement a dmabuf importer for the EFA driver. As ODP is not supported, the dmabuf memory regions always pin the buffers to prevent the move_notify callback from being called. Signed-off-by: Gal Pressman <galpress@xxxxxxxxxx> --- drivers/infiniband/hw/efa/efa.h | 4 + drivers/infiniband/hw/efa/efa_main.c | 1 + drivers/infiniband/hw/efa/efa_verbs.c | 166 +++++++++++++++++++++----- 3 files changed, 141 insertions(+), 30 deletions(-) diff --git a/drivers/infiniband/hw/efa/efa.h b/drivers/infiniband/hw/efa/efa.h index 2b8ca099b381..407d7c4baa16 100644 --- a/drivers/infiniband/hw/efa/efa.h +++ b/drivers/infiniband/hw/efa/efa.h @@ -141,6 +141,10 @@ int efa_create_cq(struct ib_cq *ibcq, const struct ib_cq_init_attr *attr, struct ib_mr *efa_reg_mr(struct ib_pd *ibpd, u64 start, u64 length, u64 virt_addr, int access_flags, struct ib_udata *udata); +struct ib_mr *efa_reg_user_mr_dmabuf(struct ib_pd *ibpd, u64 start, + u64 length, u64 virt_addr, + int fd, int access_flags, + struct ib_udata *udata); int efa_dereg_mr(struct ib_mr *ibmr, struct ib_udata *udata); int efa_get_port_immutable(struct ib_device *ibdev, u32 port_num, struct ib_port_immutable *immutable); diff --git a/drivers/infiniband/hw/efa/efa_main.c b/drivers/infiniband/hw/efa/efa_main.c index 203e6ddcacbc..72cd7d952a07 100644 --- a/drivers/infiniband/hw/efa/efa_main.c +++ b/drivers/infiniband/hw/efa/efa_main.c @@ -267,6 +267,7 @@ static const struct ib_device_ops efa_dev_ops = { .query_port = efa_query_port, .query_qp = efa_query_qp, .reg_user_mr = efa_reg_mr, + .reg_user_mr_dmabuf = efa_reg_user_mr_dmabuf, INIT_RDMA_OBJ_SIZE(ib_ah, efa_ah, ibah), INIT_RDMA_OBJ_SIZE(ib_cq, efa_cq, ibcq), diff --git a/drivers/infiniband/hw/efa/efa_verbs.c b/drivers/infiniband/hw/efa/efa_verbs.c index be6d3ff0f1be..ca907853a84f 100644 --- a/drivers/infiniband/hw/efa/efa_verbs.c +++ b/drivers/infiniband/hw/efa/efa_verbs.c @@ -3,6 +3,8 @@ * Copyright 2018-2020 Amazon.com, Inc. or its affiliates. All rights reserved. */ +#include <linux/dma-buf.h> +#include <linux/dma-resv.h> #include <linux/vmalloc.h> #include <linux/log2.h> @@ -1491,26 +1493,29 @@ static int efa_create_pbl(struct efa_dev *dev, return 0; } -struct ib_mr *efa_reg_mr(struct ib_pd *ibpd, u64 start, u64 length, - u64 virt_addr, int access_flags, - struct ib_udata *udata) +static void efa_dmabuf_invalidate_cb(struct dma_buf_attachment *attach) +{ + WARN_ON_ONCE(1, + "Invalidate callback should not be called when memory is pinned\n"); +} + +static struct dma_buf_attach_ops efa_dmabuf_attach_ops = { + .allow_peer2peer = true, + .move_notify = efa_dmabuf_invalidate_cb, +}; + +static struct efa_mr *efa_alloc_mr(struct ib_pd *ibpd, int access_flags, + struct ib_udata *udata) { struct efa_dev *dev = to_edev(ibpd->device); - struct efa_com_reg_mr_params params = {}; - struct efa_com_reg_mr_result result = {}; - struct pbl_context pbl; int supp_access_flags; - unsigned int pg_sz; struct efa_mr *mr; - int inline_size; - int err; if (udata && udata->inlen && !ib_is_udata_cleared(udata, 0, sizeof(udata->inlen))) { ibdev_dbg(&dev->ibdev, "Incompatible ABI params, udata not cleared\n"); - err = -EINVAL; - goto err_out; + return ERR_PTR(-EINVAL); } supp_access_flags = @@ -1522,23 +1527,26 @@ struct ib_mr *efa_reg_mr(struct ib_pd *ibpd, u64 start, u64 length, ibdev_dbg(&dev->ibdev, "Unsupported access flags[%#x], supported[%#x]\n", access_flags, supp_access_flags); - err = -EOPNOTSUPP; - goto err_out; + return ERR_PTR(-EOPNOTSUPP); } mr = kzalloc(sizeof(*mr), GFP_KERNEL); - if (!mr) { - err = -ENOMEM; - goto err_out; - } + if (!mr) + return ERR_PTR(-ENOMEM); - mr->umem = ib_umem_get(ibpd->device, start, length, access_flags); - if (IS_ERR(mr->umem)) { - err = PTR_ERR(mr->umem); - ibdev_dbg(&dev->ibdev, - "Failed to pin and map user space memory[%d]\n", err); - goto err_free; - } + return mr; +} + +static int efa_register_mr(struct ib_pd *ibpd, struct efa_mr *mr, u64 start, + u64 length, u64 virt_addr, int access_flags) +{ + struct efa_dev *dev = to_edev(ibpd->device); + struct efa_com_reg_mr_params params = {}; + struct efa_com_reg_mr_result result = {}; + struct pbl_context pbl; + unsigned int pg_sz; + int inline_size; + int err; params.pd = to_epd(ibpd)->pdn; params.iova = virt_addr; @@ -1549,10 +1557,9 @@ struct ib_mr *efa_reg_mr(struct ib_pd *ibpd, u64 start, u64 length, dev->dev_attr.page_size_cap, virt_addr); if (!pg_sz) { - err = -EOPNOTSUPP; ibdev_dbg(&dev->ibdev, "Failed to find a suitable page size in page_size_cap %#llx\n", dev->dev_attr.page_size_cap); - goto err_unmap; + return -EOPNOTSUPP; } params.page_shift = order_base_2(pg_sz); @@ -1566,21 +1573,21 @@ struct ib_mr *efa_reg_mr(struct ib_pd *ibpd, u64 start, u64 length, if (params.page_num <= inline_size) { err = efa_create_inline_pbl(dev, mr, ¶ms); if (err) - goto err_unmap; + return err; err = efa_com_register_mr(&dev->edev, ¶ms, &result); if (err) - goto err_unmap; + return err; } else { err = efa_create_pbl(dev, &pbl, mr, ¶ms); if (err) - goto err_unmap; + return err; err = efa_com_register_mr(&dev->edev, ¶ms, &result); pbl_destroy(dev, &pbl); if (err) - goto err_unmap; + return err; } mr->ibmr.lkey = result.l_key; @@ -1588,9 +1595,98 @@ struct ib_mr *efa_reg_mr(struct ib_pd *ibpd, u64 start, u64 length, mr->ibmr.length = length; ibdev_dbg(&dev->ibdev, "Registered mr[%d]\n", mr->ibmr.lkey); + return 0; +} + +struct ib_mr *efa_reg_user_mr_dmabuf(struct ib_pd *ibpd, u64 start, + u64 length, u64 virt_addr, + int fd, int access_flags, + struct ib_udata *udata) +{ + struct efa_dev *dev = to_edev(ibpd->device); + struct ib_umem_dmabuf *umem_dmabuf; + struct efa_mr *mr; + int err; + + mr = efa_alloc_mr(ibpd, access_flags, udata); + if (IS_ERR(mr)) { + err = PTR_ERR(mr); + goto err_out; + } + + umem_dmabuf = ib_umem_dmabuf_get(ibpd->device, start, length, fd, + access_flags, &efa_dmabuf_attach_ops); + if (IS_ERR(umem_dmabuf)) { + ibdev_dbg(&dev->ibdev, "Failed to get dmabuf[%d]\n", err); + err = PTR_ERR(umem_dmabuf); + goto err_free; + } + + dma_resv_lock(umem_dmabuf->attach->dmabuf->resv, NULL); + err = dma_buf_pin(umem_dmabuf->attach); + if (err) { + ibdev_dbg(&dev->ibdev, "Failed to pin dmabuf memory\n"); + goto err_release; + } + + err = ib_umem_dmabuf_map_pages(umem_dmabuf); + if (err) { + ibdev_dbg(&dev->ibdev, "Failed to map dmabuf pages\n"); + goto err_unpin; + } + dma_resv_unlock(umem_dmabuf->attach->dmabuf->resv); + + mr->umem = &umem_dmabuf->umem; + err = efa_register_mr(ibpd, mr, start, length, virt_addr, access_flags); + if (err) + goto err_unmap; + return &mr->ibmr; err_unmap: + dma_resv_lock(umem_dmabuf->attach->dmabuf->resv, NULL); + ib_umem_dmabuf_unmap_pages(umem_dmabuf); +err_unpin: + dma_buf_unpin(umem_dmabuf->attach); +err_release: + dma_resv_unlock(umem_dmabuf->attach->dmabuf->resv); + ib_umem_release(mr->umem); +err_free: + kfree(mr); +err_out: + atomic64_inc(&dev->stats.reg_mr_err); + return ERR_PTR(err); +} + +struct ib_mr *efa_reg_mr(struct ib_pd *ibpd, u64 start, u64 length, + u64 virt_addr, int access_flags, + struct ib_udata *udata) +{ + struct efa_dev *dev = to_edev(ibpd->device); + struct efa_mr *mr; + int err; + + mr = efa_alloc_mr(ibpd, access_flags, udata); + if (IS_ERR(mr)) { + err = PTR_ERR(mr); + goto err_out; + } + + mr->umem = ib_umem_get(ibpd->device, start, length, access_flags); + if (IS_ERR(mr->umem)) { + err = PTR_ERR(mr->umem); + ibdev_dbg(&dev->ibdev, + "Failed to pin and map user space memory[%d]\n", err); + goto err_free; + } + + err = efa_register_mr(ibpd, mr, start, length, virt_addr, access_flags); + if (err) + goto err_release; + + return &mr->ibmr; + +err_release: ib_umem_release(mr->umem); err_free: kfree(mr); @@ -1603,6 +1699,7 @@ int efa_dereg_mr(struct ib_mr *ibmr, struct ib_udata *udata) { struct efa_dev *dev = to_edev(ibmr->device); struct efa_com_dereg_mr_params params; + struct ib_umem_dmabuf *umem_dmabuf; struct efa_mr *mr = to_emr(ibmr); int err; @@ -1613,6 +1710,15 @@ int efa_dereg_mr(struct ib_mr *ibmr, struct ib_udata *udata) if (err) return err; + if (mr->umem->is_dmabuf) { + umem_dmabuf = to_ib_umem_dmabuf(mr->umem); + + dma_resv_lock(umem_dmabuf->attach->dmabuf->resv, NULL); + ib_umem_dmabuf_unmap_pages(umem_dmabuf); + dma_buf_unpin(umem_dmabuf->attach); + dma_resv_unlock(umem_dmabuf->attach->dmabuf->resv); + } + ib_umem_release(mr->umem); kfree(mr); -- 2.33.0