> Subject: Re: [Patch v7 12/12] RDMA/mana_ib: Add a driver for Microsoft > Azure Network Adapter > > On 2022/10/18 3:20, longli@xxxxxxxxxxxxxxxxx wrote: > > From: Long Li <longli@xxxxxxxxxxxxx> > > > > Add a RDMA VF driver for Microsoft Azure Network Adapter (MANA). > > > > Co-developed-by: Ajay Sharma <sharmaajay@xxxxxxxxxxxxx> > > Signed-off-by: Ajay Sharma <sharmaajay@xxxxxxxxxxxxx> > > Reviewed-by: Dexuan Cui <decui@xxxxxxxxxxxxx> > > Signed-off-by: Long Li <longli@xxxxxxxxxxxxx> > > --- > > Change log: > > v2: > > Changed coding sytles/formats > > Checked undersize for udata length > > Changed all logging to use ibdev_xxx() Avoided page array copy when > > doing MR Sorted driver ops Fixed warnings reported by kernel test > > robot <lkp@xxxxxxxxx> > > > > v3: > > More coding sytle/format changes > > > > v4: > > Process error on hardware vport configuration > > > > v5: > > Change licenses to GPL-2.0-only > > Fix error handling in mana_ib_gd_create_dma_region() > > > > v6: > > rebased to rdma-next > > removed redundant initialization to return value in mana_ib_probe() > > added missing tabs at the end of mana_ib_gd_create_dma_region() > > > > v7: > > move mana_gd_destroy_doorbell_page() and > > mana_gd_allocate_doorbell_page() from GDMA to this driver use > > ib_umem_find_best_pgsz() for finding page size for registering dma > > regions with hardware fix a bug that may double free mana_ind_table in > > mana_ib_create_qp_rss() add Ajay Sharma <sharmaajay@xxxxxxxxxxxxx> > to > > maintainer list add details to description in > > drivers/infiniband/hw/mana/Kconfig > > change multiple lines comments to use RDMA style from NETDEV style > > change mana_ib_dev_ops to static use module_auxiliary_driver() in > > place of module_init and module_exit move all user-triggerable error > > messages to debug messages check for ind_tbl_size overflow in > > mana_ib_create_qp_rss() > > > > MAINTAINERS | 9 + > > drivers/infiniband/Kconfig | 1 + > > drivers/infiniband/hw/Makefile | 1 + > > drivers/infiniband/hw/mana/Kconfig | 10 + > > drivers/infiniband/hw/mana/Makefile | 4 + > > drivers/infiniband/hw/mana/cq.c | 79 ++++ > > drivers/infiniband/hw/mana/device.c | 117 ++++++ > > drivers/infiniband/hw/mana/main.c | 508 > ++++++++++++++++++++++++ > > drivers/infiniband/hw/mana/mana_ib.h | 156 ++++++++ > > drivers/infiniband/hw/mana/mr.c | 200 ++++++++++ > > drivers/infiniband/hw/mana/qp.c | 505 +++++++++++++++++++++++ > > drivers/infiniband/hw/mana/wq.c | 115 ++++++ > > include/net/mana/mana.h | 3 + > > include/uapi/rdma/ib_user_ioctl_verbs.h | 1 + > > include/uapi/rdma/mana-abi.h | 66 +++ > > 15 files changed, 1775 insertions(+) > > create mode 100644 drivers/infiniband/hw/mana/Kconfig > > create mode 100644 drivers/infiniband/hw/mana/Makefile > > create mode 100644 drivers/infiniband/hw/mana/cq.c create mode > > 100644 drivers/infiniband/hw/mana/device.c > > create mode 100644 drivers/infiniband/hw/mana/main.c create mode > > 100644 drivers/infiniband/hw/mana/mana_ib.h > > create mode 100644 drivers/infiniband/hw/mana/mr.c create mode > > 100644 drivers/infiniband/hw/mana/qp.c create mode 100644 > > drivers/infiniband/hw/mana/wq.c create mode 100644 > > include/uapi/rdma/mana-abi.h > > > > [...] > > > + > > +int mana_ib_cfg_vport(struct mana_ib_dev *dev, u32 port, struct > mana_ib_pd *pd, > > + u32 doorbell_id) > > +{ > > + struct gdma_dev *mdev = dev->gdma_dev; > > + struct mana_port_context *mpc; > > + struct mana_context *mc; > > + struct net_device *ndev; > > + int err; > > + > > + mc = mdev->driver_data; > > + ndev = mc->ports[port]; > > + mpc = netdev_priv(ndev); > > + > > + mutex_lock(&pd->vport_mutex); > > + > > + pd->vport_use_count++; > > + if (pd->vport_use_count > 1) { > > + ibdev_dbg(&dev->ib_dev, > > + "Skip as this PD is already configured vport\n"); > > + mutex_unlock(&pd->vport_mutex); > > + return 0; > > + } > > + mutex_unlock(&pd->vport_mutex); > > + > > + err = mana_cfg_vport(mpc, pd->pdn, doorbell_id); > > + if (err) { > > + mutex_lock(&pd->vport_mutex); > > + pd->vport_use_count--; > > + mutex_unlock(&pd->vport_mutex); > > It seems there might be a race between the "pd->vport_use_count > 1" > checking above and the error handling here, it may cause other user using a > unconfigured vport if other user is checking the "pd->vport_use_count > 1)" > while mana_cfg_vport() fails before doing "pd->vport_use_count--". Thank you. You are correct about the possible race. I'm fixing it. > > > + > > + ibdev_dbg(&dev->ib_dev, "Failed to configure vPort %d\n", > err); > > + return err; > > + } > > + > > + pd->tx_shortform_allowed = mpc->tx_shortform_allowed; > > + pd->tx_vp_offset = mpc->tx_vp_offset; > > + > > + ibdev_dbg(&dev->ib_dev, "vport handle %llx pdid %x > doorbell_id %x\n", > > + mpc->port_handle, pd->pdn, doorbell_id); > > + > > + return 0; > > +} > > + > > [...] > > > + > > +static int mana_gd_allocate_doorbell_page(struct gdma_context *gc, > > + int *doorbell_page) > > +{ > > + struct gdma_allocate_resource_range_req req = {}; > > + struct gdma_allocate_resource_range_resp resp = {}; > > + int err; > > + > > + mana_gd_init_req_hdr(&req.hdr, > GDMA_ALLOCATE_RESOURCE_RANGE, > > + sizeof(req), sizeof(resp)); > > + > > + req.resource_type = GDMA_RESOURCE_DOORBELL_PAGE; > > + req.num_resources = 1; > > + req.alignment = 1; > > + > > + /* Have GDMA start searching from 0 */ > > + req.allocated_resources = 0; > > + > > + err = mana_gd_send_request(gc, sizeof(req), &req, sizeof(resp), > &resp); > > + if (err || resp.hdr.status) { > > + dev_err(gc->dev, > > + "Failed to allocate doorbell page: ret %d, 0x%x\n", > > + err, resp.hdr.status); > > + return err ? err : -EPROTO; > > + } > > + > > + *doorbell_page = resp.allocated_resources; > > + > > + return 0; > > +} > > + > > +int mana_ib_alloc_ucontext(struct ib_ucontext *ibcontext, > > + struct ib_udata *udata) > > +{ > > + struct mana_ib_ucontext *ucontext = > > + container_of(ibcontext, struct mana_ib_ucontext, > ibucontext); > > + struct ib_device *ibdev = ibcontext->device; > > + struct mana_ib_dev *mdev; > > + struct gdma_context *gc; > > + struct gdma_dev *dev; > > + int doorbell_page; > > + int ret; > > + > > + mdev = container_of(ibdev, struct mana_ib_dev, ib_dev); > > + dev = mdev->gdma_dev; > > + gc = dev->gdma_context; > > + > > + /* Allocate a doorbell page index */ > > + ret = mana_gd_allocate_doorbell_page(gc, &doorbell_page); > > + if (ret) { > > + ibdev_dbg(ibdev, "Failed to allocate doorbell page %d\n", > ret); > > + return -ENOMEM; > > It does not make much sense to do "err ? err : -EPROTO" in > mana_gd_allocate_doorbell_page() if -ENOMEM is returned unconditionally > here. I will change it to return "ret", to be consistent with other usage on mana_gd_xxx functions. > > > + } > > + > > + ibdev_dbg(ibdev, "Doorbell page allocated %d\n", doorbell_page); > > + > > + ucontext->doorbell = doorbell_page; > > + > > + return 0; > > +} > > + > > [...] > > > diff --git a/drivers/infiniband/hw/mana/mana_ib.h > > b/drivers/infiniband/hw/mana/mana_ib.h > > new file mode 100644 > > index 000000000000..2225a6d6f8e1 > > --- /dev/null > > +++ b/drivers/infiniband/hw/mana/mana_ib.h > > @@ -0,0 +1,156 @@ > > +/* SPDX-License-Identifier: GPL-2.0-only */ > > +/* > > + * Copyright (c) 2022 Microsoft Corporation. All rights reserved. > > + */ > > + > > +#ifndef _MANA_IB_H_ > > +#define _MANA_IB_H_ > > + > > +#include <rdma/ib_verbs.h> > > +#include <rdma/ib_mad.h> > > +#include <rdma/ib_umem.h> > > +#include <rdma/mana-abi.h> > > +#include <rdma/uverbs_ioctl.h> > > + > > +#include <net/mana/mana.h> > > + > > +#define PAGE_SZ_BM \ > > + (SZ_4K | SZ_8K | SZ_16K | SZ_32K | SZ_64K | SZ_128K | SZ_256K | > \ > > + SZ_512K | SZ_1M | SZ_2M) > > + > > +/* MANA doesn't have any limit for MR size */ #define > > +MANA_IB_MAX_MR_SIZE ((u64)(~(0ULL))) > > Use U64_MAX? Good idea. Will change it. > > > + > > +struct mana_ib_dev { > > + struct ib_device ib_dev; > > + struct gdma_dev *gdma_dev; > > +}; > > + > > [...] > > > diff --git a/drivers/infiniband/hw/mana/mr.c > > b/drivers/infiniband/hw/mana/mr.c new file mode 100644 index > > 000000000000..09124dd1792d > > --- /dev/null > > +++ b/drivers/infiniband/hw/mana/mr.c > > @@ -0,0 +1,200 @@ > > +// SPDX-License-Identifier: GPL-2.0-only > > +/* > > + * Copyright (c) 2022, Microsoft Corporation. All rights reserved. > > + */ > > + > > +#include "mana_ib.h" > > + > > +#define VALID_MR_FLAGS \ > > + (IB_ACCESS_LOCAL_WRITE | IB_ACCESS_REMOTE_WRITE | > > +IB_ACCESS_REMOTE_READ) > > + > > +static enum gdma_mr_access_flags > > +mana_ib_verbs_to_gdma_access_flags(int access_flags) { > > + enum gdma_mr_access_flags flags = > GDMA_ACCESS_FLAG_LOCAL_READ; > > + > > + if (access_flags & IB_ACCESS_LOCAL_WRITE) > > + flags |= GDMA_ACCESS_FLAG_LOCAL_WRITE; > > + > > + if (access_flags & IB_ACCESS_REMOTE_WRITE) > > + flags |= GDMA_ACCESS_FLAG_REMOTE_WRITE; > > + > > + if (access_flags & IB_ACCESS_REMOTE_READ) > > + flags |= GDMA_ACCESS_FLAG_REMOTE_READ; > > + > > + return flags; > > +} > > + > > +static int mana_ib_gd_create_mr(struct mana_ib_dev *dev, struct > mana_ib_mr *mr, > > + struct gdma_create_mr_params *mr_params) > { > > + struct gdma_create_mr_response resp = {}; > > + struct gdma_create_mr_request req = {}; > > + struct gdma_dev *mdev = dev->gdma_dev; > > + struct gdma_context *gc; > > + int err; > > + > > + gc = mdev->gdma_context; > > + > > + mana_gd_init_req_hdr(&req.hdr, GDMA_CREATE_MR, sizeof(req), > > + sizeof(resp)); > > + req.pd_handle = mr_params->pd_handle; > > + req.mr_type = mr_params->mr_type; > > + > > + switch (mr_params->mr_type) { > > + case GDMA_MR_TYPE_GVA: > > + req.gva.dma_region_handle = mr_params- > >gva.dma_region_handle; > > + req.gva.virtual_address = mr_params->gva.virtual_address; > > + req.gva.access_flags = mr_params->gva.access_flags; > > + break; > > + > > + default: > > + ibdev_dbg(&dev->ib_dev, > > + "invalid param (GDMA_MR_TYPE) passed, > type %d\n", > > + req.mr_type); > > + err = -EINVAL; > > + goto error; > > + } > > + > > + err = mana_gd_send_request(gc, sizeof(req), &req, sizeof(resp), > > +&resp); > > + > > + if (err || resp.hdr.status) { > > + ibdev_dbg(&dev->ib_dev, "Failed to create mr %d, %u", err, > > + resp.hdr.status); > > + if (!err) > > + err = -EPROTO; > > + > > + goto error; > > + } > > + > > + mr->ibmr.lkey = resp.lkey; > > + mr->ibmr.rkey = resp.rkey; > > + mr->mr_handle = resp.mr_handle; > > + > > + return 0; > > +error: > > + return err; > > There is no error handling here, maybe just return error directly instead of a > goto. Yes, will remove the goto. > > > +} > > + > > +static int mana_ib_gd_destroy_mr(struct mana_ib_dev *dev, > > +gdma_obj_handle_t mr_handle) { > > + struct gdma_destroy_mr_response resp = {}; > > + struct gdma_destroy_mr_request req = {}; > > + struct gdma_dev *mdev = dev->gdma_dev; > > + struct gdma_context *gc; > > + int err; > > + > > + gc = mdev->gdma_context; > > + > > + mana_gd_init_req_hdr(&req.hdr, GDMA_DESTROY_MR, sizeof(req), > > + sizeof(resp)); > > + > > + req.mr_handle = mr_handle; > > + > > + err = mana_gd_send_request(gc, sizeof(req), &req, sizeof(resp), > &resp); > > + if (err || resp.hdr.status) { > > + dev_err(gc->dev, "Failed to destroy MR: %d, 0x%x\n", err, > > + resp.hdr.status); > > + if (!err) > > + err = -EPROTO; > > + return err; > > + } > > + > > + return 0; > > +} > > + > > [...] > > > + > > +static int mana_ib_create_qp_rss(struct ib_qp *ibqp, struct ib_pd *pd, > > + struct ib_qp_init_attr *attr, > > + struct ib_udata *udata) > > +{ > > + struct mana_ib_qp *qp = container_of(ibqp, struct mana_ib_qp, > ibqp); > > + struct mana_ib_dev *mdev = > > + container_of(pd->device, struct mana_ib_dev, ib_dev); > > + struct ib_rwq_ind_table *ind_tbl = attr->rwq_ind_tbl; > > + struct mana_ib_create_qp_rss_resp resp = {}; > > + struct mana_ib_create_qp_rss ucmd = {}; > > + struct gdma_dev *gd = mdev->gdma_dev; > > + mana_handle_t *mana_ind_table; > > + struct mana_port_context *mpc; > > + struct mana_context *mc; > > + struct net_device *ndev; > > + struct mana_ib_cq *cq; > > + struct mana_ib_wq *wq; > > + unsigned int ind_tbl_size; > > + struct ib_cq *ibcq; > > + struct ib_wq *ibwq; > > + u32 port; > > + int ret; > > + int i; > > + > > + mc = gd->driver_data; > > + > > + if (!udata || udata->inlen < sizeof(ucmd)) > > + return -EINVAL; > > + > > + ret = ib_copy_from_udata(&ucmd, udata, min(sizeof(ucmd), udata- > >inlen)); > > + if (ret) { > > + ibdev_dbg(&mdev->ib_dev, > > + "Failed copy from udata for create rss-qp, err %d\n", > > + ret); > > + return -EFAULT; > > Why not just return 'ret' directly? Yes, it's better to just return "ret". Will fix this. Thank you.