Re: [PATCH 06/20] RDMA/mlx5: Implement mlx5_ib_map_mr_sg_pi and mlx5_ib_alloc_mr_integrity

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

 



On Thu, May 30, 2019 at 04:25:17PM +0300, Max Gurtovoy wrote:
> mlx5_ib_map_mr_sg_pi() will map the PI and data dma mapped SG lists to the
> mlx5 memory region prior to the registration operation. In the new
> API, the mlx5 driver will allocate an internal memory region for the
> UMR operation to register both PI and data SG lists. The internal MR
> will use KLM mode in order to map 2 (possibly non-contiguous/non-align)
> SG lists using 1 memory key. In the new API, each ULP will use 1 memory
> region for the signature operation (instead of 3 in the old API). This
> memory region will have a key that will be exposed to remote server to
> perform RDMA operation. The internal memory key that will map the SG lists
> will stay private.
> 
> Signed-off-by: Max Gurtovoy <maxg@xxxxxxxxxxxx>
> Signed-off-by: Israel Rukshin <israelr@xxxxxxxxxxxx>
> ---
>  drivers/infiniband/hw/mlx5/main.c    |   2 +
>  drivers/infiniband/hw/mlx5/mlx5_ib.h |  11 +++
>  drivers/infiniband/hw/mlx5/mr.c      | 184 ++++++++++++++++++++++++++++++++---
>  3 files changed, 186 insertions(+), 11 deletions(-)
> 
> diff --git a/drivers/infiniband/hw/mlx5/main.c b/drivers/infiniband/hw/mlx5/main.c
> index abac70ad5c7c..b6588cdef1cf 100644
> --- a/drivers/infiniband/hw/mlx5/main.c
> +++ b/drivers/infiniband/hw/mlx5/main.c
> @@ -6126,6 +6126,7 @@ static void mlx5_ib_stage_flow_db_cleanup(struct mlx5_ib_dev *dev)
>  static const struct ib_device_ops mlx5_ib_dev_ops = {
>  	.add_gid = mlx5_ib_add_gid,
>  	.alloc_mr = mlx5_ib_alloc_mr,
> +	.alloc_mr_integrity = mlx5_ib_alloc_mr_integrity,
>  	.alloc_pd = mlx5_ib_alloc_pd,
>  	.alloc_ucontext = mlx5_ib_alloc_ucontext,
>  	.attach_mcast = mlx5_ib_mcg_attach,
> @@ -6155,6 +6156,7 @@ static const struct ib_device_ops mlx5_ib_dev_ops = {
>  	.get_dma_mr = mlx5_ib_get_dma_mr,
>  	.get_link_layer = mlx5_ib_port_link_layer,
>  	.map_mr_sg = mlx5_ib_map_mr_sg,
> +	.map_mr_sg_pi = mlx5_ib_map_mr_sg_pi,
>  	.mmap = mlx5_ib_mmap,
>  	.modify_cq = mlx5_ib_modify_cq,
>  	.modify_device = mlx5_ib_modify_device,
> diff --git a/drivers/infiniband/hw/mlx5/mlx5_ib.h b/drivers/infiniband/hw/mlx5/mlx5_ib.h
> index 40eb8be482e4..07bac37c3450 100644
> --- a/drivers/infiniband/hw/mlx5/mlx5_ib.h
> +++ b/drivers/infiniband/hw/mlx5/mlx5_ib.h
> @@ -587,6 +587,9 @@ struct mlx5_ib_mr {
>  	void			*descs;
>  	dma_addr_t		desc_map;
>  	int			ndescs;
> +	int			data_length;
> +	int			meta_ndescs;
> +	int			meta_length;
>  	int			max_descs;
>  	int			desc_size;
>  	int			access_mode;
> @@ -605,6 +608,7 @@ struct mlx5_ib_mr {
>  	int			access_flags; /* Needed for rereg MR */
>  
>  	struct mlx5_ib_mr      *parent;
> +	struct mlx5_ib_mr      *pi_mr; /* Needed for IB_MR_TYPE_INTEGRITY */
>  	atomic_t		num_leaf_free;
>  	wait_queue_head_t       q_leaf_free;
>  	struct mlx5_async_work  cb_work;
> @@ -1148,8 +1152,15 @@ int mlx5_ib_rereg_user_mr(struct ib_mr *ib_mr, int flags, u64 start,
>  int mlx5_ib_dereg_mr(struct ib_mr *ibmr, struct ib_udata *udata);
>  struct ib_mr *mlx5_ib_alloc_mr(struct ib_pd *pd, enum ib_mr_type mr_type,
>  			       u32 max_num_sg, struct ib_udata *udata);
> +struct ib_mr *mlx5_ib_alloc_mr_integrity(struct ib_pd *pd,
> +					 u32 max_num_sg,
> +					 u32 max_num_meta_sg);
>  int mlx5_ib_map_mr_sg(struct ib_mr *ibmr, struct scatterlist *sg, int sg_nents,
>  		      unsigned int *sg_offset);
> +int mlx5_ib_map_mr_sg_pi(struct ib_mr *ibmr, struct scatterlist *data_sg,
> +			 int data_sg_nents, unsigned int *data_sg_offset,
> +			 struct scatterlist *meta_sg, int meta_sg_nents,
> +			 unsigned int *meta_sg_offset);
>  int mlx5_ib_process_mad(struct ib_device *ibdev, int mad_flags, u8 port_num,
>  			const struct ib_wc *in_wc, const struct ib_grh *in_grh,
>  			const struct ib_mad_hdr *in, size_t in_mad_size,
> diff --git a/drivers/infiniband/hw/mlx5/mr.c b/drivers/infiniband/hw/mlx5/mr.c
> index 5f09699fab98..6820d80c6a7f 100644
> --- a/drivers/infiniband/hw/mlx5/mr.c
> +++ b/drivers/infiniband/hw/mlx5/mr.c
> @@ -1639,16 +1639,22 @@ static void dereg_mr(struct mlx5_ib_dev *dev, struct mlx5_ib_mr *mr)
>  
>  int mlx5_ib_dereg_mr(struct ib_mr *ibmr, struct ib_udata *udata)
>  {
> -	dereg_mr(to_mdev(ibmr->device), to_mmr(ibmr));
> +	struct mlx5_ib_mr *mmr = to_mmr(ibmr);
> +
> +	if (ibmr->type == IB_MR_TYPE_INTEGRITY)
> +		dereg_mr(to_mdev(mmr->pi_mr->ibmr.device), mmr->pi_mr);
> +
> +	dereg_mr(to_mdev(ibmr->device), mmr);
> +
>  	return 0;
>  }
>  
> -struct ib_mr *mlx5_ib_alloc_mr(struct ib_pd *pd, enum ib_mr_type mr_type,
> -			       u32 max_num_sg, struct ib_udata *udata)
> +static struct mlx5_ib_mr *mlx5_ib_alloc_pi_mr(struct ib_pd *pd,
> +				u32 max_num_sg, u32 max_num_meta_sg)
>  {
>  	struct mlx5_ib_dev *dev = to_mdev(pd->device);
>  	int inlen = MLX5_ST_SZ_BYTES(create_mkey_in);
> -	int ndescs = ALIGN(max_num_sg, 4);
> +	int ndescs = ALIGN(max_num_sg + max_num_meta_sg, 4);
>  	struct mlx5_ib_mr *mr;
>  	void *mkc;
>  	u32 *in;
> @@ -1670,8 +1676,72 @@ struct ib_mr *mlx5_ib_alloc_mr(struct ib_pd *pd, enum ib_mr_type mr_type,
>  	MLX5_SET(mkc, mkc, qpn, 0xffffff);
>  	MLX5_SET(mkc, mkc, pd, to_mpd(pd)->pdn);
>  
> +	mr->access_mode = MLX5_MKC_ACCESS_MODE_KLMS;
> +
> +	err = mlx5_alloc_priv_descs(pd->device, mr,
> +				    ndescs, sizeof(struct mlx5_klm));
> +	if (err)
> +		goto err_free_in;
> +	mr->desc_size = sizeof(struct mlx5_klm);
> +	mr->max_descs = ndescs;
> +
> +	MLX5_SET(mkc, mkc, access_mode_1_0, mr->access_mode & 0x3);
> +	MLX5_SET(mkc, mkc, access_mode_4_2, (mr->access_mode >> 2) & 0x7);
> +	MLX5_SET(mkc, mkc, umr_en, 1);
> +
> +	mr->ibmr.pd = pd;
> +	mr->ibmr.device = pd->device;
> +	err = mlx5_core_create_mkey(dev->mdev, &mr->mmkey, in, inlen);
> +	if (err)
> +		goto err_priv_descs;
> +
> +	mr->mmkey.type = MLX5_MKEY_MR;
> +	mr->ibmr.lkey = mr->mmkey.key;
> +	mr->ibmr.rkey = mr->mmkey.key;
> +	mr->umem = NULL;
> +	kfree(in);
> +
> +	return mr;
> +
> +err_priv_descs:
> +	mlx5_free_priv_descs(mr);
> +err_free_in:
> +	kfree(in);
> +err_free:
> +	kfree(mr);
> +	return ERR_PTR(err);
> +}
> +
> +static struct ib_mr *__mlx5_ib_alloc_mr(struct ib_pd *pd,
> +					enum ib_mr_type mr_type, u32 max_num_sg,
> +					u32 max_num_meta_sg)
> +{
> +	struct mlx5_ib_dev *dev = to_mdev(pd->device);
> +	int inlen = MLX5_ST_SZ_BYTES(create_mkey_in);
> +	int ndescs = ALIGN(max_num_sg, 4);
> +	struct mlx5_ib_mr *mr;
> +	void *mkc;
> +	u32 *in;
> +	int err;
> +
> +	mr = kzalloc(sizeof(*mr), GFP_KERNEL);
> +	if (!mr)
> +		return ERR_PTR(-ENOMEM);
> +
> +	in = kzalloc(inlen, GFP_KERNEL);
> +	if (!in) {
> +		err = -ENOMEM;
> +		goto err_free;
> +	}
> +
> +	mkc = MLX5_ADDR_OF(create_mkey_in, in, memory_key_mkey_entry);
> +	MLX5_SET(mkc, mkc, free, 1);
> +	MLX5_SET(mkc, mkc, qpn, 0xffffff);
> +	MLX5_SET(mkc, mkc, pd, to_mpd(pd)->pdn);
> +
>  	if (mr_type == IB_MR_TYPE_MEM_REG) {
>  		mr->access_mode = MLX5_MKC_ACCESS_MODE_MTT;
> +		MLX5_SET(mkc, mkc, translations_octword_size, ndescs);
>  		MLX5_SET(mkc, mkc, log_page_size, PAGE_SHIFT);
>  		err = mlx5_alloc_priv_descs(pd->device, mr,
>  					    ndescs, sizeof(struct mlx5_mtt));
> @@ -1682,6 +1752,7 @@ struct ib_mr *mlx5_ib_alloc_mr(struct ib_pd *pd, enum ib_mr_type mr_type,
>  		mr->max_descs = ndescs;
>  	} else if (mr_type == IB_MR_TYPE_SG_GAPS) {
>  		mr->access_mode = MLX5_MKC_ACCESS_MODE_KLMS;
> +		MLX5_SET(mkc, mkc, translations_octword_size, ndescs);
>  
>  		err = mlx5_alloc_priv_descs(pd->device, mr,
>  					    ndescs, sizeof(struct mlx5_klm));
> @@ -1689,11 +1760,13 @@ struct ib_mr *mlx5_ib_alloc_mr(struct ib_pd *pd, enum ib_mr_type mr_type,
>  			goto err_free_in;
>  		mr->desc_size = sizeof(struct mlx5_klm);
>  		mr->max_descs = ndescs;
> -	} else if (mr_type == IB_MR_TYPE_SIGNATURE) {
> +	} else if (mr_type == IB_MR_TYPE_SIGNATURE ||
> +		   mr_type == IB_MR_TYPE_INTEGRITY) {
>  		u32 psv_index[2];
>  
>  		MLX5_SET(mkc, mkc, bsf_en, 1);
>  		MLX5_SET(mkc, mkc, bsf_octword_size, MLX5_MKEY_BSF_OCTO_SIZE);
> +		MLX5_SET(mkc, mkc, translations_octword_size, 4);
>  		mr->sig = kzalloc(sizeof(*mr->sig), GFP_KERNEL);
>  		if (!mr->sig) {
>  			err = -ENOMEM;
> @@ -1714,6 +1787,14 @@ struct ib_mr *mlx5_ib_alloc_mr(struct ib_pd *pd, enum ib_mr_type mr_type,
>  		mr->sig->sig_err_exists = false;
>  		/* Next UMR, Arm SIGERR */
>  		++mr->sig->sigerr_count;
> +		if (mr_type == IB_MR_TYPE_INTEGRITY) {
> +			mr->pi_mr = mlx5_ib_alloc_pi_mr(pd, max_num_sg,
> +							max_num_meta_sg);
> +			if (IS_ERR(mr->pi_mr)) {
> +				err = PTR_ERR(mr->pi_mr);
> +				goto err_destroy_psv;
> +			}
> +		}
>  	} else {
>  		mlx5_ib_warn(dev, "Invalid mr type %d\n", mr_type);
>  		err = -EINVAL;
> @@ -1727,7 +1808,7 @@ struct ib_mr *mlx5_ib_alloc_mr(struct ib_pd *pd, enum ib_mr_type mr_type,
>  	mr->ibmr.device = pd->device;
>  	err = mlx5_core_create_mkey(dev->mdev, &mr->mmkey, in, inlen);
>  	if (err)
> -		goto err_destroy_psv;
> +		goto err_free_pi_mr;
>  
>  	mr->mmkey.type = MLX5_MKEY_MR;
>  	mr->ibmr.lkey = mr->mmkey.key;
> @@ -1737,6 +1818,11 @@ struct ib_mr *mlx5_ib_alloc_mr(struct ib_pd *pd, enum ib_mr_type mr_type,
>  
>  	return &mr->ibmr;
>  
> +err_free_pi_mr:
> +	if (mr->pi_mr) {
> +		dereg_mr(to_mdev(mr->pi_mr->ibmr.device), mr->pi_mr);
> +		mr->pi_mr = NULL;
> +	}
>  err_destroy_psv:
>  	if (mr->sig) {
>  		if (mlx5_core_destroy_psv(dev->mdev,
> @@ -1758,6 +1844,19 @@ struct ib_mr *mlx5_ib_alloc_mr(struct ib_pd *pd, enum ib_mr_type mr_type,
>  	return ERR_PTR(err);
>  }
>  
> +struct ib_mr *mlx5_ib_alloc_mr(struct ib_pd *pd, enum ib_mr_type mr_type,
> +			       u32 max_num_sg, struct ib_udata *udata)
> +{
> +	return __mlx5_ib_alloc_mr(pd, mr_type, max_num_sg, 0);
> +}
> +
> +struct ib_mr *mlx5_ib_alloc_mr_integrity(struct ib_pd *pd,
> +					 u32 max_num_sg, u32 max_num_meta_sg)
> +{
> +	return __mlx5_ib_alloc_mr(pd, IB_MR_TYPE_INTEGRITY, max_num_sg,
> +				  max_num_meta_sg);
> +}
> +
>  struct ib_mw *mlx5_ib_alloc_mw(struct ib_pd *pd, enum ib_mw_type type,
>  			       struct ib_udata *udata)
>  {
> @@ -1890,13 +1989,16 @@ static int
>  mlx5_ib_sg_to_klms(struct mlx5_ib_mr *mr,
>  		   struct scatterlist *sgl,
>  		   unsigned short sg_nents,
> -		   unsigned int *sg_offset_p)
> +		   unsigned int *sg_offset_p,
> +		   struct scatterlist *meta_sgl,
> +		   unsigned short meta_sg_nents,
> +		   unsigned int *meta_sg_offset_p)
>  {
>  	struct scatterlist *sg = sgl;
>  	struct mlx5_klm *klms = mr->descs;
>  	unsigned int sg_offset = sg_offset_p ? *sg_offset_p : 0;
>  	u32 lkey = mr->ibmr.pd->local_dma_lkey;
> -	int i;
> +	int i, j = 0;
>  
>  	mr->ibmr.iova = sg_dma_address(sg) + sg_offset;
>  	mr->ibmr.length = 0;
> @@ -1911,12 +2013,36 @@ mlx5_ib_sg_to_klms(struct mlx5_ib_mr *mr,
>  
>  		sg_offset = 0;
>  	}
> -	mr->ndescs = i;
>  
>  	if (sg_offset_p)
>  		*sg_offset_p = sg_offset;
>  
> -	return i;
> +	mr->ndescs = i;
> +	mr->data_length = mr->ibmr.length;
> +
> +	if (meta_sg_nents) {
> +		sg = meta_sgl;
> +		sg_offset = meta_sg_offset_p ? *meta_sg_offset_p : 0;
> +		for_each_sg(meta_sgl, sg, meta_sg_nents, j) {
> +			if (unlikely(i + j >= mr->max_descs))
> +				break;
> +			klms[i + j].va = cpu_to_be64(sg_dma_address(sg) +
> +						     sg_offset);
> +			klms[i + j].bcount = cpu_to_be32(sg_dma_len(sg) -
> +							 sg_offset);
> +			klms[i + j].key = cpu_to_be32(lkey);
> +			mr->ibmr.length += sg_dma_len(sg) - sg_offset;
> +
> +			sg_offset = 0;
> +		}
> +		if (meta_sg_offset_p)
> +			*meta_sg_offset_p = sg_offset;
> +
> +		mr->meta_ndescs = j;
> +		mr->meta_length = mr->ibmr.length - mr->data_length;
> +	}
> +
> +	return i + j;
>  }
>  
>  static int mlx5_set_page(struct ib_mr *ibmr, u64 addr)
> @@ -1933,6 +2059,41 @@ static int mlx5_set_page(struct ib_mr *ibmr, u64 addr)
>  	return 0;
>  }
>  
> +int mlx5_ib_map_mr_sg_pi(struct ib_mr *ibmr, struct scatterlist *data_sg,
> +			 int data_sg_nents, unsigned int *data_sg_offset,
> +			 struct scatterlist *meta_sg, int meta_sg_nents,
> +			 unsigned int *meta_sg_offset)
> +{
> +	struct mlx5_ib_mr *mr = to_mmr(ibmr);
> +	struct mlx5_ib_mr *pi_mr = mr->pi_mr;
> +	int n;
> +
> +	WARN_ON(ibmr->type != IB_MR_TYPE_INTEGRITY);
> +
> +	pi_mr->ndescs = 0;
> +	pi_mr->meta_ndescs = 0;
> +	pi_mr->meta_length = 0;
> +
> +	ib_dma_sync_single_for_cpu(ibmr->device, pi_mr->desc_map,
> +				   pi_mr->desc_size * pi_mr->max_descs,
> +				   DMA_TO_DEVICE);
> +
> +	n = mlx5_ib_sg_to_klms(pi_mr, data_sg, data_sg_nents, data_sg_offset,
> +			       meta_sg, meta_sg_nents, meta_sg_offset);
> +
> +	/* This is zero-based memory region */
> +	pi_mr->ibmr.iova = 0;

Are you aware that Yuval enabled zero based mr from rdma-core with the
follow patch?

https://marc.info/?l=linux-rdma&m=155919637918880&w=2

> +	ibmr->length = pi_mr->ibmr.length;
> +	ibmr->iova = pi_mr->ibmr.iova;
> +	ibmr->sig_attrs->meta_length = pi_mr->meta_length;
> +
> +	ib_dma_sync_single_for_device(ibmr->device, pi_mr->desc_map,
> +				      pi_mr->desc_size * pi_mr->max_descs,
> +				      DMA_TO_DEVICE);
> +
> +	return n;
> +}
> +
>  int mlx5_ib_map_mr_sg(struct ib_mr *ibmr, struct scatterlist *sg, int sg_nents,
>  		      unsigned int *sg_offset)
>  {
> @@ -1946,7 +2107,8 @@ int mlx5_ib_map_mr_sg(struct ib_mr *ibmr, struct scatterlist *sg, int sg_nents,
>  				   DMA_TO_DEVICE);
>  
>  	if (mr->access_mode == MLX5_MKC_ACCESS_MODE_KLMS)
> -		n = mlx5_ib_sg_to_klms(mr, sg, sg_nents, sg_offset);
> +		n = mlx5_ib_sg_to_klms(mr, sg, sg_nents, sg_offset, NULL, 0,
> +				       NULL);
>  	else
>  		n = ib_sg_to_pages(ibmr, sg, sg_nents, sg_offset,
>  				mlx5_set_page);
> -- 
> 2.16.3
> 



[Index of Archives]     [Linux USB Devel]     [Video for Linux]     [Linux Audio Users]     [Photo]     [Yosemite News]     [Yosemite Photos]     [Linux Kernel]     [Linux SCSI]     [XFree86]

  Powered by Linux