Re: [PATCH rdma-next 6/7] RDMA/mlx5: Add support in MEMIC operations

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

 



On Thu, Mar 18, 2021 at 01:15:47PM +0200, Leon Romanovsky wrote:
> From: Maor Gottlieb <maorg@xxxxxxxxxx>
> 
> MEMIC buffer, in addition to regular read and write operations, can
> support atomic operations from the host.
> 
> Introduce and implement new UAPI to allocate address space for MEMIC
> operations such as atomic. This includes:
> 
> 1. Expose new IOCTL for request mapping of MEMIC operation.
> 2. Hold the operations address in a list, so same operation to same DM
>    will be allocated only once.
> 3. Manage refcount on the mlx5_ib_dm object, so it would be keep valid
>    until all addresses were unmapped.
> 
> Signed-off-by: Maor Gottlieb <maorg@xxxxxxxxxx>
> Signed-off-by: Leon Romanovsky <leonro@xxxxxxxxxx>
> ---
>  drivers/infiniband/hw/mlx5/dm.c          | 196 +++++++++++++++++++++--
>  drivers/infiniband/hw/mlx5/dm.h          |   2 +
>  drivers/infiniband/hw/mlx5/main.c        |   7 +-
>  drivers/infiniband/hw/mlx5/mlx5_ib.h     |  16 +-
>  include/uapi/rdma/mlx5_user_ioctl_cmds.h |  11 ++
>  5 files changed, 214 insertions(+), 18 deletions(-)
> 
> --
> 2.30.2
> 
> diff --git a/drivers/infiniband/hw/mlx5/dm.c b/drivers/infiniband/hw/mlx5/dm.c
> index 97a925d43312..ee4ee197a626 100644
> --- a/drivers/infiniband/hw/mlx5/dm.c
> +++ b/drivers/infiniband/hw/mlx5/dm.c
> @@ -150,12 +150,14 @@ static int mlx5_cmd_alloc_memic_op(struct mlx5_dm *dm, phys_addr_t addr,
>  }
> 
>  static int add_dm_mmap_entry(struct ib_ucontext *context,
> -			     struct mlx5_ib_dm *mdm, u64 address)
> +			     struct mlx5_user_mmap_entry *mentry, u8 mmap_flag,
> +			     size_t size, u64 address)
>  {
> -	mdm->mentry.mmap_flag = MLX5_IB_MMAP_TYPE_MEMIC;
> -	mdm->mentry.address = address;
> +	mentry->mmap_flag = mmap_flag;
> +	mentry->address = address;
> +
>  	return rdma_user_mmap_entry_insert_range(
> -		context, &mdm->mentry.rdma_entry, mdm->size,
> +		context, &mentry->rdma_entry, size,
>  		MLX5_IB_MMAP_DEVICE_MEM << 16,
>  		(MLX5_IB_MMAP_DEVICE_MEM << 16) + (1UL << 16) - 1);
>  }
> @@ -183,6 +185,114 @@ static inline int check_dm_type_support(struct mlx5_ib_dev *dev, u32 type)
>  	return 0;
>  }
> 
> +void mlx5_ib_dm_memic_free(struct kref *kref)
> +{
> +	struct mlx5_ib_dm *dm =
> +		container_of(kref, struct mlx5_ib_dm, memic.ref);
> +	struct mlx5_ib_dev *dev = to_mdev(dm->ibdm.device);
> +
> +	mlx5_cmd_dealloc_memic(&dev->dm, dm->dev_addr, dm->size);
> +	kfree(dm);
> +}
> +
> +static int copy_op_to_user(struct mlx5_ib_dm_op_entry *op_entry,
> +			   struct uverbs_attr_bundle *attrs)
> +{
> +	u64 start_offset;
> +	u16 page_idx;
> +	int err;
> +
> +	page_idx = op_entry->mentry.rdma_entry.start_pgoff & 0xFFFF;
> +	start_offset = op_entry->op_addr & ~PAGE_MASK;
> +	err = uverbs_copy_to(attrs, MLX5_IB_ATTR_DM_MAP_OP_ADDR_RESP_PAGE_INDEX,
> +			     &page_idx, sizeof(page_idx));
> +	if (err)
> +		return err;
> +
> +	return uverbs_copy_to(attrs,
> +			      MLX5_IB_ATTR_DM_MAP_OP_ADDR_RESP_START_OFFSET,
> +			      &start_offset, sizeof(start_offset));
> +}
> +
> +static int map_existing_op(struct mlx5_ib_dm *dm, u8 op,
> +			   struct uverbs_attr_bundle *attrs)
> +{
> +	struct mlx5_ib_dm_op_entry *op_entry;
> +
> +	op_entry = xa_load(&dm->memic.ops, op);
> +	if (!op_entry)
> +		return -ENOENT;
> +
> +	return copy_op_to_user(op_entry, attrs);
> +}
> +
> +static int UVERBS_HANDLER(MLX5_IB_METHOD_DM_MAP_OP_ADDR)(
> +	struct uverbs_attr_bundle *attrs)
> +{
> +	struct ib_uobject *uobj = uverbs_attr_get_uobject(
> +		attrs, MLX5_IB_ATTR_DM_MAP_OP_ADDR_REQ_HANDLE);
> +	struct mlx5_ib_dev *dev = to_mdev(uobj->context->device);
> +	struct ib_dm *ibdm = uobj->object;
> +	struct mlx5_ib_dm *dm = to_mdm(ibdm);
> +	struct mlx5_ib_dm_op_entry *op_entry;
> +	int err;
> +	u8 op;
> +
> +	err = uverbs_copy_from(&op, attrs, MLX5_IB_ATTR_DM_MAP_OP_ADDR_REQ_OP);
> +	if (err)
> +		return err;
> +
> +	if (!(MLX5_CAP_DEV_MEM(dev->mdev, memic_operations) & BIT(op)))
> +		return -EOPNOTSUPP;
> +
> +	mutex_lock(&dm->memic.ops_xa_lock);
> +	err = map_existing_op(dm, op, attrs);
> +	if (!err || err != -ENOENT)
> +		goto err_unlock;
> +
> +	op_entry = kzalloc(sizeof(*op_entry), GFP_KERNEL);
> +	if (!op_entry)
> +		goto err_unlock;
> +
> +	err = mlx5_cmd_alloc_memic_op(&dev->dm, dm->dev_addr, op,
> +				      &op_entry->op_addr);
> +	if (err) {
> +		kfree(op_entry);
> +		goto err_unlock;
> +	}
> +	op_entry->op = op;
> +	op_entry->dm = dm;
> +
> +	err = add_dm_mmap_entry(uobj->context, &op_entry->mentry,
> +				MLX5_IB_MMAP_TYPE_MEMIC_OP, dm->size,
> +				op_entry->op_addr & PAGE_MASK);
> +	if (err) {
> +		mlx5_cmd_dealloc_memic_op(&dev->dm, dm->dev_addr, op);
> +		kfree(op_entry);
> +		goto err_unlock;
> +	}
> +	/* From this point, entry will be freed by mmap_free */
> +	kref_get(&dm->memic.ref);
> +
> +	err = copy_op_to_user(op_entry, attrs);
> +	if (err)
> +		goto err_remove;
> +
> +	err = xa_insert(&dm->memic.ops, op, op_entry, GFP_KERNEL);
> +	if (err)
> +		goto err_remove;
> +	mutex_unlock(&dm->memic.ops_xa_lock);
> +
> +	return 0;
> +
> +err_remove:
> +	rdma_user_mmap_entry_remove(&op_entry->mentry.rdma_entry);
> +err_unlock:
> +	mutex_unlock(&dm->memic.ops_xa_lock);
> +
> +	return err;
> +}
> +
>  static int handle_alloc_dm_memic(struct ib_ucontext *ctx, struct mlx5_ib_dm *dm,
>  				 struct ib_dm_alloc_attr *attr,
>  				 struct uverbs_attr_bundle *attrs)
> @@ -193,6 +303,9 @@ static int handle_alloc_dm_memic(struct ib_ucontext *ctx, struct mlx5_ib_dm *dm,
>  	int err;
>  	u64 address;
> 
> +	kref_init(&dm->memic.ref);
> +	xa_init(&dm->memic.ops);
> +	mutex_init(&dm->memic.ops_xa_lock);
>  	dm->size = roundup(attr->length, MLX5_MEMIC_BASE_SIZE);
> 
>  	err = mlx5_cmd_alloc_memic(dm_db, &dm->dev_addr,
> @@ -203,18 +316,17 @@ static int handle_alloc_dm_memic(struct ib_ucontext *ctx, struct mlx5_ib_dm *dm,
>  	}
> 
>  	address = dm->dev_addr & PAGE_MASK;
> -	err = add_dm_mmap_entry(ctx, dm, address);
> +	err = add_dm_mmap_entry(ctx, &dm->memic.mentry, MLX5_IB_MMAP_TYPE_MEMIC,
> +				dm->size, address);
>  	if (err) {
>  		mlx5_cmd_dealloc_memic(dm_db, dm->dev_addr, dm->size);
>  		kfree(dm);
>  		return err;
>  	}
> 
> -	page_idx = dm->mentry.rdma_entry.start_pgoff & 0xFFFF;
> -	err = uverbs_copy_to(attrs,
> -			     MLX5_IB_ATTR_ALLOC_DM_RESP_PAGE_INDEX,
> -			     &page_idx,
> -			     sizeof(page_idx));
> +	page_idx = dm->memic.mentry.rdma_entry.start_pgoff & 0xFFFF;
> +	err = uverbs_copy_to(attrs, MLX5_IB_ATTR_ALLOC_DM_RESP_PAGE_INDEX,
> +			     &page_idx, sizeof(page_idx));
>  	if (err)
>  		goto err_copy;
> 
> @@ -228,7 +340,7 @@ static int handle_alloc_dm_memic(struct ib_ucontext *ctx, struct mlx5_ib_dm *dm,
>  	return 0;
> 
>  err_copy:
> -	rdma_user_mmap_entry_remove(&dm->mentry.rdma_entry);
> +	rdma_user_mmap_entry_remove(&dm->memic.mentry.rdma_entry);
> 
>  	return err;
>  }
> @@ -292,6 +404,7 @@ struct ib_dm *mlx5_ib_alloc_dm(struct ib_device *ibdev,
>  		return ERR_PTR(-ENOMEM);
> 
>  	dm->type = type;
> +	dm->ibdm.device = ibdev;
> 
>  	switch (type) {
>  	case MLX5_IB_UAPI_DM_TYPE_MEMIC:
> @@ -323,6 +436,19 @@ struct ib_dm *mlx5_ib_alloc_dm(struct ib_device *ibdev,
>  	return ERR_PTR(err);
>  }
> 
> +static void dm_memic_remove_ops(struct mlx5_ib_dm *dm)
> +{
> +	struct mlx5_ib_dm_op_entry *entry;
> +	unsigned long idx;
> +
> +	mutex_lock(&dm->memic.ops_xa_lock);
> +	xa_for_each(&dm->memic.ops, idx, entry) {
> +		xa_erase(&dm->memic.ops, idx);
> +		rdma_user_mmap_entry_remove(&entry->mentry.rdma_entry);
> +	}
> +	mutex_unlock(&dm->memic.ops_xa_lock);
> +}
> +
>  int mlx5_ib_dealloc_dm(struct ib_dm *ibdm, struct uverbs_attr_bundle *attrs)
>  {
>  	struct mlx5_ib_ucontext *ctx = rdma_udata_to_drv_context(
> @@ -333,7 +459,8 @@ int mlx5_ib_dealloc_dm(struct ib_dm *ibdm, struct uverbs_attr_bundle *attrs)
> 
>  	switch (dm->type) {
>  	case MLX5_IB_UAPI_DM_TYPE_MEMIC:
> -		rdma_user_mmap_entry_remove(&dm->mentry.rdma_entry);
> +		dm_memic_remove_ops(dm);
> +		rdma_user_mmap_entry_remove(&dm->memic.mentry.rdma_entry);
>  		return 0;
>  	case MLX5_IB_UAPI_DM_TYPE_STEERING_SW_ICM:
>  		ret = mlx5_dm_sw_icm_dealloc(dev, MLX5_SW_ICM_TYPE_STEERING,
> @@ -359,6 +486,31 @@ int mlx5_ib_dealloc_dm(struct ib_dm *ibdm, struct uverbs_attr_bundle *attrs)
>  	return 0;
>  }
> 
> +void mlx5_ib_dm_mmap_free(struct mlx5_ib_dev *dev,
> +			  struct mlx5_user_mmap_entry *mentry)
> +{
> +	struct mlx5_ib_dm_op_entry *op_entry;
> +	struct mlx5_ib_dm *mdm;
> +
> +	switch (mentry->mmap_flag) {
> +	case MLX5_IB_MMAP_TYPE_MEMIC:
> +		mdm = container_of(mentry, struct mlx5_ib_dm, memic.mentry);
> +		kref_put(&mdm->memic.ref, mlx5_ib_dm_memic_free);
> +		break;
> +	case MLX5_IB_MMAP_TYPE_MEMIC_OP:
> +		op_entry = container_of(mentry, struct mlx5_ib_dm_op_entry,
> +					mentry);
> +		mdm = op_entry->dm;
> +		mlx5_cmd_dealloc_memic_op(&dev->dm, mdm->dev_addr,
> +					  op_entry->op);
> +		kfree(op_entry);
> +		kref_put(&mdm->memic.ref, mlx5_ib_dm_memic_free);
> +		break;
> +	default:
> +		WARN_ON(true);
> +	}
> +}
> +
>  ADD_UVERBS_ATTRIBUTES_SIMPLE(
>  	mlx5_ib_dm, UVERBS_OBJECT_DM, UVERBS_METHOD_DM_ALLOC,
>  	UVERBS_ATTR_PTR_OUT(MLX5_IB_ATTR_ALLOC_DM_RESP_START_OFFSET,
> @@ -368,8 +520,28 @@ ADD_UVERBS_ATTRIBUTES_SIMPLE(
>  	UVERBS_ATTR_CONST_IN(MLX5_IB_ATTR_ALLOC_DM_REQ_TYPE,
>  			     enum mlx5_ib_uapi_dm_type, UA_OPTIONAL));
> 
> +DECLARE_UVERBS_NAMED_METHOD(
> +	MLX5_IB_METHOD_DM_MAP_OP_ADDR,
> +	UVERBS_ATTR_IDR(MLX5_IB_ATTR_DM_MAP_OP_ADDR_REQ_HANDLE,
> +			UVERBS_OBJECT_DM,
> +			UVERBS_ACCESS_READ,
> +			UA_MANDATORY),
> +	UVERBS_ATTR_PTR_IN(MLX5_IB_ATTR_DM_MAP_OP_ADDR_REQ_OP,
> +			   UVERBS_ATTR_TYPE(u8),
> +			   UA_MANDATORY),
> +	UVERBS_ATTR_PTR_OUT(MLX5_IB_ATTR_DM_MAP_OP_ADDR_RESP_START_OFFSET,
> +			    UVERBS_ATTR_TYPE(u64),
> +			    UA_MANDATORY),
> +	UVERBS_ATTR_PTR_OUT(MLX5_IB_ATTR_DM_MAP_OP_ADDR_RESP_PAGE_INDEX,
> +			    UVERBS_ATTR_TYPE(u16),
> +			    UA_OPTIONAL));
> +
> +DECLARE_UVERBS_GLOBAL_METHODS(UVERBS_OBJECT_DM,
> +			      &UVERBS_METHOD(MLX5_IB_METHOD_DM_MAP_OP_ADDR));
> +
>  const struct uapi_definition mlx5_ib_dm_defs[] = {
>  	UAPI_DEF_CHAIN_OBJ_TREE(UVERBS_OBJECT_DM, &mlx5_ib_dm),
> +	UAPI_DEF_CHAIN_OBJ_TREE_NAMED(UVERBS_OBJECT_DM),
>  	{},
>  };
> 
> diff --git a/drivers/infiniband/hw/mlx5/dm.h b/drivers/infiniband/hw/mlx5/dm.h
> index adb39d3d8131..56cf1ba9c985 100644
> --- a/drivers/infiniband/hw/mlx5/dm.h
> +++ b/drivers/infiniband/hw/mlx5/dm.h
> @@ -8,6 +8,8 @@
> 
>  #include "mlx5_ib.h"
> 
> +void mlx5_ib_dm_mmap_free(struct mlx5_ib_dev *dev,
> +			  struct mlx5_user_mmap_entry *mentry);
>  void mlx5_cmd_dealloc_memic(struct mlx5_dm *dm, phys_addr_t addr,
>  			    u64 length);
>  void mlx5_cmd_dealloc_memic_op(struct mlx5_dm *dm, phys_addr_t addr,
> diff --git a/drivers/infiniband/hw/mlx5/main.c b/drivers/infiniband/hw/mlx5/main.c
> index 49c8c60d9520..6908db28b796 100644
> --- a/drivers/infiniband/hw/mlx5/main.c
> +++ b/drivers/infiniband/hw/mlx5/main.c
> @@ -2090,14 +2090,11 @@ static void mlx5_ib_mmap_free(struct rdma_user_mmap_entry *entry)
>  	struct mlx5_user_mmap_entry *mentry = to_mmmap(entry);
>  	struct mlx5_ib_dev *dev = to_mdev(entry->ucontext->device);
>  	struct mlx5_var_table *var_table = &dev->var_table;
> -	struct mlx5_ib_dm *mdm;
> 
>  	switch (mentry->mmap_flag) {
>  	case MLX5_IB_MMAP_TYPE_MEMIC:
> -		mdm = container_of(mentry, struct mlx5_ib_dm, mentry);
> -		mlx5_cmd_dealloc_memic(&dev->dm, mdm->dev_addr,
> -				       mdm->size);
> -		kfree(mdm);
> +	case MLX5_IB_MMAP_TYPE_MEMIC_OP:
> +		mlx5_ib_dm_mmap_free(dev, mentry);
>  		break;
>  	case MLX5_IB_MMAP_TYPE_VAR:
>  		mutex_lock(&var_table->bitmap_lock);
> diff --git a/drivers/infiniband/hw/mlx5/mlx5_ib.h b/drivers/infiniband/hw/mlx5/mlx5_ib.h
> index ae971de6e934..b714131f87b7 100644
> --- a/drivers/infiniband/hw/mlx5/mlx5_ib.h
> +++ b/drivers/infiniband/hw/mlx5/mlx5_ib.h
> @@ -166,6 +166,7 @@ enum mlx5_ib_mmap_type {
>  	MLX5_IB_MMAP_TYPE_VAR = 2,
>  	MLX5_IB_MMAP_TYPE_UAR_WC = 3,
>  	MLX5_IB_MMAP_TYPE_UAR_NC = 4,
> +	MLX5_IB_MMAP_TYPE_MEMIC_OP = 5,
>  };
> 
>  struct mlx5_bfreg_info {
> @@ -618,18 +619,30 @@ struct mlx5_user_mmap_entry {
>  	u32 page_idx;
>  };
> 
> +struct mlx5_ib_dm_op_entry {
> +	struct mlx5_user_mmap_entry	mentry;
> +	phys_addr_t			op_addr;
> +	struct mlx5_ib_dm		*dm;
> +	u8				op;
> +};
> +
>  struct mlx5_ib_dm {
>  	struct ib_dm		ibdm;
>  	phys_addr_t		dev_addr;
>  	u32			type;
>  	size_t			size;
>  	union {
> +		struct {
> +				struct mlx5_user_mmap_entry mentry;
> +				struct xarray		ops;
> +				struct mutex		ops_xa_lock;
> +				struct kref		ref;
> +		} memic;
>  		struct {
>  			u32	obj_id;
>  		} icm_dm;

This union is making it much too difficult to read and understand now.
An optional kref inside a structure is too far

Please split it to more types and have proper typesafety throughout

It looks mostly fine otherwise, the error flows are a bit hard to read
though, when a new type is added this should also get re-organized so
we don't do stuff like:

err_free:
	/* In MEMIC error flow, dm will be freed internally */
	if (type != MLX5_IB_UAPI_DM_TYPE_MEMIC)
		kfree(dm);

I'd inline the checks from check_dm_type_support() into their
respective allocation functions too

Jason



[Index of Archives]     [Linux USB Devel]     [Video for Linux]     [Linux Audio Users]     [Photo]     [Yosemite News]     [Yosemite Photos]     [Linux Kernel]     [Linux SCSI]     [XFree86]

  Powered by Linux