Re: [PATCH 06/20] RDMA/mlx5: Implement mlx5_ib_map_mr_sg_pi and mlx5_ib_alloc_mr_integrity

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

 



On Sun, Jun 02, 2019 at 05:22:18PM +0300, Max Gurtovoy wrote:
> 
> On 6/2/2019 12:13 PM, Shamir Rabinovitch wrote:
> > On Thu, May 30, 2019 at 04:25:17PM +0300, Max Gurtovoy wrote:
> > > mlx5_ib_map_mr_sg_pi() will map the PI and data dma mapped SG lists to the
> > > mlx5 memory region prior to the registration operation. In the new
> > > API, the mlx5 driver will allocate an internal memory region for the
> > > UMR operation to register both PI and data SG lists. The internal MR
> > > will use KLM mode in order to map 2 (possibly non-contiguous/non-align)
> > > SG lists using 1 memory key. In the new API, each ULP will use 1 memory
> > > region for the signature operation (instead of 3 in the old API). This
> > > memory region will have a key that will be exposed to remote server to
> > > perform RDMA operation. The internal memory key that will map the SG lists
> > > will stay private.
> > > 
> > > Signed-off-by: Max Gurtovoy <maxg@xxxxxxxxxxxx>
> > > Signed-off-by: Israel Rukshin <israelr@xxxxxxxxxxxx>
> > > ---
> > >   drivers/infiniband/hw/mlx5/main.c    |   2 +
> > >   drivers/infiniband/hw/mlx5/mlx5_ib.h |  11 +++
> > >   drivers/infiniband/hw/mlx5/mr.c      | 184 ++++++++++++++++++++++++++++++++---
> > >   3 files changed, 186 insertions(+), 11 deletions(-)
> > > 
> > > diff --git a/drivers/infiniband/hw/mlx5/main.c b/drivers/infiniband/hw/mlx5/main.c
> > > index abac70ad5c7c..b6588cdef1cf 100644
> > > --- a/drivers/infiniband/hw/mlx5/main.c
> > > +++ b/drivers/infiniband/hw/mlx5/main.c
> > > @@ -6126,6 +6126,7 @@ static void mlx5_ib_stage_flow_db_cleanup(struct mlx5_ib_dev *dev)
> > >   static const struct ib_device_ops mlx5_ib_dev_ops = {
> > >   	.add_gid = mlx5_ib_add_gid,
> > >   	.alloc_mr = mlx5_ib_alloc_mr,
> > > +	.alloc_mr_integrity = mlx5_ib_alloc_mr_integrity,
> > >   	.alloc_pd = mlx5_ib_alloc_pd,
> > >   	.alloc_ucontext = mlx5_ib_alloc_ucontext,
> > >   	.attach_mcast = mlx5_ib_mcg_attach,
> > > @@ -6155,6 +6156,7 @@ static const struct ib_device_ops mlx5_ib_dev_ops = {
> > >   	.get_dma_mr = mlx5_ib_get_dma_mr,
> > >   	.get_link_layer = mlx5_ib_port_link_layer,
> > >   	.map_mr_sg = mlx5_ib_map_mr_sg,
> > > +	.map_mr_sg_pi = mlx5_ib_map_mr_sg_pi,
> > >   	.mmap = mlx5_ib_mmap,
> > >   	.modify_cq = mlx5_ib_modify_cq,
> > >   	.modify_device = mlx5_ib_modify_device,
> > > diff --git a/drivers/infiniband/hw/mlx5/mlx5_ib.h b/drivers/infiniband/hw/mlx5/mlx5_ib.h
> > > index 40eb8be482e4..07bac37c3450 100644
> > > --- a/drivers/infiniband/hw/mlx5/mlx5_ib.h
> > > +++ b/drivers/infiniband/hw/mlx5/mlx5_ib.h
> > > @@ -587,6 +587,9 @@ struct mlx5_ib_mr {
> > >   	void			*descs;
> > >   	dma_addr_t		desc_map;
> > >   	int			ndescs;
> > > +	int			data_length;
> > > +	int			meta_ndescs;
> > > +	int			meta_length;
> > >   	int			max_descs;
> > >   	int			desc_size;
> > >   	int			access_mode;
> > > @@ -605,6 +608,7 @@ struct mlx5_ib_mr {
> > >   	int			access_flags; /* Needed for rereg MR */
> > >   	struct mlx5_ib_mr      *parent;
> > > +	struct mlx5_ib_mr      *pi_mr; /* Needed for IB_MR_TYPE_INTEGRITY */
> > >   	atomic_t		num_leaf_free;
> > >   	wait_queue_head_t       q_leaf_free;
> > >   	struct mlx5_async_work  cb_work;
> > > @@ -1148,8 +1152,15 @@ int mlx5_ib_rereg_user_mr(struct ib_mr *ib_mr, int flags, u64 start,
> > >   int mlx5_ib_dereg_mr(struct ib_mr *ibmr, struct ib_udata *udata);
> > >   struct ib_mr *mlx5_ib_alloc_mr(struct ib_pd *pd, enum ib_mr_type mr_type,
> > >   			       u32 max_num_sg, struct ib_udata *udata);
> > > +struct ib_mr *mlx5_ib_alloc_mr_integrity(struct ib_pd *pd,
> > > +					 u32 max_num_sg,
> > > +					 u32 max_num_meta_sg);
> > >   int mlx5_ib_map_mr_sg(struct ib_mr *ibmr, struct scatterlist *sg, int sg_nents,
> > >   		      unsigned int *sg_offset);
> > > +int mlx5_ib_map_mr_sg_pi(struct ib_mr *ibmr, struct scatterlist *data_sg,
> > > +			 int data_sg_nents, unsigned int *data_sg_offset,
> > > +			 struct scatterlist *meta_sg, int meta_sg_nents,
> > > +			 unsigned int *meta_sg_offset);
> > >   int mlx5_ib_process_mad(struct ib_device *ibdev, int mad_flags, u8 port_num,
> > >   			const struct ib_wc *in_wc, const struct ib_grh *in_grh,
> > >   			const struct ib_mad_hdr *in, size_t in_mad_size,
> > > diff --git a/drivers/infiniband/hw/mlx5/mr.c b/drivers/infiniband/hw/mlx5/mr.c
> > > index 5f09699fab98..6820d80c6a7f 100644
> > > --- a/drivers/infiniband/hw/mlx5/mr.c
> > > +++ b/drivers/infiniband/hw/mlx5/mr.c
> > > @@ -1639,16 +1639,22 @@ static void dereg_mr(struct mlx5_ib_dev *dev, struct mlx5_ib_mr *mr)
> > >   int mlx5_ib_dereg_mr(struct ib_mr *ibmr, struct ib_udata *udata)
> > >   {
> > > -	dereg_mr(to_mdev(ibmr->device), to_mmr(ibmr));
> > > +	struct mlx5_ib_mr *mmr = to_mmr(ibmr);
> > > +
> > > +	if (ibmr->type == IB_MR_TYPE_INTEGRITY)
> > > +		dereg_mr(to_mdev(mmr->pi_mr->ibmr.device), mmr->pi_mr);
> > > +
> > > +	dereg_mr(to_mdev(ibmr->device), mmr);
> > > +
> > >   	return 0;
> > >   }
> > > -struct ib_mr *mlx5_ib_alloc_mr(struct ib_pd *pd, enum ib_mr_type mr_type,
> > > -			       u32 max_num_sg, struct ib_udata *udata)
> > > +static struct mlx5_ib_mr *mlx5_ib_alloc_pi_mr(struct ib_pd *pd,
> > > +				u32 max_num_sg, u32 max_num_meta_sg)
> > >   {
> > >   	struct mlx5_ib_dev *dev = to_mdev(pd->device);
> > >   	int inlen = MLX5_ST_SZ_BYTES(create_mkey_in);
> > > -	int ndescs = ALIGN(max_num_sg, 4);
> > > +	int ndescs = ALIGN(max_num_sg + max_num_meta_sg, 4);
> > >   	struct mlx5_ib_mr *mr;
> > >   	void *mkc;
> > >   	u32 *in;
> > > @@ -1670,8 +1676,72 @@ struct ib_mr *mlx5_ib_alloc_mr(struct ib_pd *pd, enum ib_mr_type mr_type,
> > >   	MLX5_SET(mkc, mkc, qpn, 0xffffff);
> > >   	MLX5_SET(mkc, mkc, pd, to_mpd(pd)->pdn);
> > > +	mr->access_mode = MLX5_MKC_ACCESS_MODE_KLMS;
> > > +
> > > +	err = mlx5_alloc_priv_descs(pd->device, mr,
> > > +				    ndescs, sizeof(struct mlx5_klm));
> > > +	if (err)
> > > +		goto err_free_in;
> > > +	mr->desc_size = sizeof(struct mlx5_klm);
> > > +	mr->max_descs = ndescs;
> > > +
> > > +	MLX5_SET(mkc, mkc, access_mode_1_0, mr->access_mode & 0x3);
> > > +	MLX5_SET(mkc, mkc, access_mode_4_2, (mr->access_mode >> 2) & 0x7);
> > > +	MLX5_SET(mkc, mkc, umr_en, 1);
> > > +
> > > +	mr->ibmr.pd = pd;
> > > +	mr->ibmr.device = pd->device;
> > > +	err = mlx5_core_create_mkey(dev->mdev, &mr->mmkey, in, inlen);
> > > +	if (err)
> > > +		goto err_priv_descs;
> > > +
> > > +	mr->mmkey.type = MLX5_MKEY_MR;
> > > +	mr->ibmr.lkey = mr->mmkey.key;
> > > +	mr->ibmr.rkey = mr->mmkey.key;
> > > +	mr->umem = NULL;
> > > +	kfree(in);
> > > +
> > > +	return mr;
> > > +
> > > +err_priv_descs:
> > > +	mlx5_free_priv_descs(mr);
> > > +err_free_in:
> > > +	kfree(in);
> > > +err_free:
> > > +	kfree(mr);
> > > +	return ERR_PTR(err);
> > > +}
> > > +
> > > +static struct ib_mr *__mlx5_ib_alloc_mr(struct ib_pd *pd,
> > > +					enum ib_mr_type mr_type, u32 max_num_sg,
> > > +					u32 max_num_meta_sg)
> > > +{
> > > +	struct mlx5_ib_dev *dev = to_mdev(pd->device);
> > > +	int inlen = MLX5_ST_SZ_BYTES(create_mkey_in);
> > > +	int ndescs = ALIGN(max_num_sg, 4);
> > > +	struct mlx5_ib_mr *mr;
> > > +	void *mkc;
> > > +	u32 *in;
> > > +	int err;
> > > +
> > > +	mr = kzalloc(sizeof(*mr), GFP_KERNEL);
> > > +	if (!mr)
> > > +		return ERR_PTR(-ENOMEM);
> > > +
> > > +	in = kzalloc(inlen, GFP_KERNEL);
> > > +	if (!in) {
> > > +		err = -ENOMEM;
> > > +		goto err_free;
> > > +	}
> > > +
> > > +	mkc = MLX5_ADDR_OF(create_mkey_in, in, memory_key_mkey_entry);
> > > +	MLX5_SET(mkc, mkc, free, 1);
> > > +	MLX5_SET(mkc, mkc, qpn, 0xffffff);
> > > +	MLX5_SET(mkc, mkc, pd, to_mpd(pd)->pdn);
> > > +
> > >   	if (mr_type == IB_MR_TYPE_MEM_REG) {
> > >   		mr->access_mode = MLX5_MKC_ACCESS_MODE_MTT;
> > > +		MLX5_SET(mkc, mkc, translations_octword_size, ndescs);
> > >   		MLX5_SET(mkc, mkc, log_page_size, PAGE_SHIFT);
> > >   		err = mlx5_alloc_priv_descs(pd->device, mr,
> > >   					    ndescs, sizeof(struct mlx5_mtt));
> > > @@ -1682,6 +1752,7 @@ struct ib_mr *mlx5_ib_alloc_mr(struct ib_pd *pd, enum ib_mr_type mr_type,
> > >   		mr->max_descs = ndescs;
> > >   	} else if (mr_type == IB_MR_TYPE_SG_GAPS) {
> > >   		mr->access_mode = MLX5_MKC_ACCESS_MODE_KLMS;
> > > +		MLX5_SET(mkc, mkc, translations_octword_size, ndescs);
> > >   		err = mlx5_alloc_priv_descs(pd->device, mr,
> > >   					    ndescs, sizeof(struct mlx5_klm));
> > > @@ -1689,11 +1760,13 @@ struct ib_mr *mlx5_ib_alloc_mr(struct ib_pd *pd, enum ib_mr_type mr_type,
> > >   			goto err_free_in;
> > >   		mr->desc_size = sizeof(struct mlx5_klm);
> > >   		mr->max_descs = ndescs;
> > > -	} else if (mr_type == IB_MR_TYPE_SIGNATURE) {
> > > +	} else if (mr_type == IB_MR_TYPE_SIGNATURE ||
> > > +		   mr_type == IB_MR_TYPE_INTEGRITY) {
> > >   		u32 psv_index[2];
> > >   		MLX5_SET(mkc, mkc, bsf_en, 1);
> > >   		MLX5_SET(mkc, mkc, bsf_octword_size, MLX5_MKEY_BSF_OCTO_SIZE);
> > > +		MLX5_SET(mkc, mkc, translations_octword_size, 4);
> > >   		mr->sig = kzalloc(sizeof(*mr->sig), GFP_KERNEL);
> > >   		if (!mr->sig) {
> > >   			err = -ENOMEM;
> > > @@ -1714,6 +1787,14 @@ struct ib_mr *mlx5_ib_alloc_mr(struct ib_pd *pd, enum ib_mr_type mr_type,
> > >   		mr->sig->sig_err_exists = false;
> > >   		/* Next UMR, Arm SIGERR */
> > >   		++mr->sig->sigerr_count;
> > > +		if (mr_type == IB_MR_TYPE_INTEGRITY) {
> > > +			mr->pi_mr = mlx5_ib_alloc_pi_mr(pd, max_num_sg,
> > > +							max_num_meta_sg);
> > > +			if (IS_ERR(mr->pi_mr)) {
> > > +				err = PTR_ERR(mr->pi_mr);
> > > +				goto err_destroy_psv;
> > > +			}
> > > +		}
> > >   	} else {
> > >   		mlx5_ib_warn(dev, "Invalid mr type %d\n", mr_type);
> > >   		err = -EINVAL;
> > > @@ -1727,7 +1808,7 @@ struct ib_mr *mlx5_ib_alloc_mr(struct ib_pd *pd, enum ib_mr_type mr_type,
> > >   	mr->ibmr.device = pd->device;
> > >   	err = mlx5_core_create_mkey(dev->mdev, &mr->mmkey, in, inlen);
> > >   	if (err)
> > > -		goto err_destroy_psv;
> > > +		goto err_free_pi_mr;
> > >   	mr->mmkey.type = MLX5_MKEY_MR;
> > >   	mr->ibmr.lkey = mr->mmkey.key;
> > > @@ -1737,6 +1818,11 @@ struct ib_mr *mlx5_ib_alloc_mr(struct ib_pd *pd, enum ib_mr_type mr_type,
> > >   	return &mr->ibmr;
> > > +err_free_pi_mr:
> > > +	if (mr->pi_mr) {
> > > +		dereg_mr(to_mdev(mr->pi_mr->ibmr.device), mr->pi_mr);
> > > +		mr->pi_mr = NULL;
> > > +	}
> > >   err_destroy_psv:
> > >   	if (mr->sig) {
> > >   		if (mlx5_core_destroy_psv(dev->mdev,
> > > @@ -1758,6 +1844,19 @@ struct ib_mr *mlx5_ib_alloc_mr(struct ib_pd *pd, enum ib_mr_type mr_type,
> > >   	return ERR_PTR(err);
> > >   }
> > > +struct ib_mr *mlx5_ib_alloc_mr(struct ib_pd *pd, enum ib_mr_type mr_type,
> > > +			       u32 max_num_sg, struct ib_udata *udata)
> > > +{
> > > +	return __mlx5_ib_alloc_mr(pd, mr_type, max_num_sg, 0);
> > > +}
> > > +
> > > +struct ib_mr *mlx5_ib_alloc_mr_integrity(struct ib_pd *pd,
> > > +					 u32 max_num_sg, u32 max_num_meta_sg)
> > > +{
> > > +	return __mlx5_ib_alloc_mr(pd, IB_MR_TYPE_INTEGRITY, max_num_sg,
> > > +				  max_num_meta_sg);
> > > +}
> > > +
> > >   struct ib_mw *mlx5_ib_alloc_mw(struct ib_pd *pd, enum ib_mw_type type,
> > >   			       struct ib_udata *udata)
> > >   {
> > > @@ -1890,13 +1989,16 @@ static int
> > >   mlx5_ib_sg_to_klms(struct mlx5_ib_mr *mr,
> > >   		   struct scatterlist *sgl,
> > >   		   unsigned short sg_nents,
> > > -		   unsigned int *sg_offset_p)
> > > +		   unsigned int *sg_offset_p,
> > > +		   struct scatterlist *meta_sgl,
> > > +		   unsigned short meta_sg_nents,
> > > +		   unsigned int *meta_sg_offset_p)
> > >   {
> > >   	struct scatterlist *sg = sgl;
> > >   	struct mlx5_klm *klms = mr->descs;
> > >   	unsigned int sg_offset = sg_offset_p ? *sg_offset_p : 0;
> > >   	u32 lkey = mr->ibmr.pd->local_dma_lkey;
> > > -	int i;
> > > +	int i, j = 0;
> > >   	mr->ibmr.iova = sg_dma_address(sg) + sg_offset;
> > >   	mr->ibmr.length = 0;
> > > @@ -1911,12 +2013,36 @@ mlx5_ib_sg_to_klms(struct mlx5_ib_mr *mr,
> > >   		sg_offset = 0;
> > >   	}
> > > -	mr->ndescs = i;
> > >   	if (sg_offset_p)
> > >   		*sg_offset_p = sg_offset;
> > > -	return i;
> > > +	mr->ndescs = i;
> > > +	mr->data_length = mr->ibmr.length;
> > > +
> > > +	if (meta_sg_nents) {
> > > +		sg = meta_sgl;
> > > +		sg_offset = meta_sg_offset_p ? *meta_sg_offset_p : 0;
> > > +		for_each_sg(meta_sgl, sg, meta_sg_nents, j) {
> > > +			if (unlikely(i + j >= mr->max_descs))
> > > +				break;
> > > +			klms[i + j].va = cpu_to_be64(sg_dma_address(sg) +
> > > +						     sg_offset);
> > > +			klms[i + j].bcount = cpu_to_be32(sg_dma_len(sg) -
> > > +							 sg_offset);
> > > +			klms[i + j].key = cpu_to_be32(lkey);
> > > +			mr->ibmr.length += sg_dma_len(sg) - sg_offset;
> > > +
> > > +			sg_offset = 0;
> > > +		}
> > > +		if (meta_sg_offset_p)
> > > +			*meta_sg_offset_p = sg_offset;
> > > +
> > > +		mr->meta_ndescs = j;
> > > +		mr->meta_length = mr->ibmr.length - mr->data_length;
> > > +	}
> > > +
> > > +	return i + j;
> > >   }
> > >   static int mlx5_set_page(struct ib_mr *ibmr, u64 addr)
> > > @@ -1933,6 +2059,41 @@ static int mlx5_set_page(struct ib_mr *ibmr, u64 addr)
> > >   	return 0;
> > >   }
> > > +int mlx5_ib_map_mr_sg_pi(struct ib_mr *ibmr, struct scatterlist *data_sg,
> > > +			 int data_sg_nents, unsigned int *data_sg_offset,
> > > +			 struct scatterlist *meta_sg, int meta_sg_nents,
> > > +			 unsigned int *meta_sg_offset)
> > > +{
> > > +	struct mlx5_ib_mr *mr = to_mmr(ibmr);
> > > +	struct mlx5_ib_mr *pi_mr = mr->pi_mr;
> > > +	int n;
> > > +
> > > +	WARN_ON(ibmr->type != IB_MR_TYPE_INTEGRITY);
> > > +
> > > +	pi_mr->ndescs = 0;
> > > +	pi_mr->meta_ndescs = 0;
> > > +	pi_mr->meta_length = 0;
> > > +
> > > +	ib_dma_sync_single_for_cpu(ibmr->device, pi_mr->desc_map,
> > > +				   pi_mr->desc_size * pi_mr->max_descs,
> > > +				   DMA_TO_DEVICE);
> > > +
> > > +	n = mlx5_ib_sg_to_klms(pi_mr, data_sg, data_sg_nents, data_sg_offset,
> > > +			       meta_sg, meta_sg_nents, meta_sg_offset);
> > > +
> > > +	/* This is zero-based memory region */
> > > +	pi_mr->ibmr.iova = 0;
> > Are you aware that Yuval enabled zero based mr from rdma-core with the
> > follow patch?
> > 
> > https://marc.info/?l=linux-rdma&m=155919637918880&w=2
> 
> Well, AFAIU, his API uses user space reg-mr that mapped to some
> device->ops.reg_user_mr.
> 
> This series is kernel only. In user-land there is no MR mappings and all the
> MTTs are set during reg-mr.
> 
> In kernel we just allocate the mkey tables and update the addresses in
> runtime using UMR operation.
> 
> what is your concern here ?
> 
> 

Reading this comment from prior patch in this series gave the impression
that you deal with user MR: "Also introduce new mr types IB_MR_TYPE_USER" .

If we need (user) zero based MR, Yuval patch (rdma-core) can help.

Just wanted to know if you are aware to this change.




[Index of Archives]     [Linux USB Devel]     [Video for Linux]     [Linux Audio Users]     [Photo]     [Yosemite News]     [Yosemite Photos]     [Linux Kernel]     [Linux SCSI]     [XFree86]

  Powered by Linux