On Tue, May 28, 2019 at 08:14:39PM +0300, Yuval Shaia wrote: > The virtual address that is registered is used as a base for any address > used later in post_recv and post_send operations. > > On a virtualised environment this is not correct. > > A guest cannot register its memory so hypervisor maps the guest physical > address to a host virtual address and register it with the HW. Later on, > at datapath phase, the guest fills the SGEs with addresses from its > address space. > > Since HW cannot access guest virtual address space an extra translation > is needed to map those addresses to be based on the host virtual address > that was registered with the HW. > > To avoid this, a logical separation between the address that is > registered and the address that is used as a offset at datapath phase is > needed. > > This separation is already implemented in the lower layer part > (ibv_cmd_reg_mr) but blocked at the API level. > > Fix it by introducing a new API function that accepts a address from > guest virtual address space as well > > Signed-off-by: Yuval Shaia <yuval.shaia@xxxxxxxxxx> > v0 -> v1: > * Accept comment from Jason and change reg_mr callback signature > * Add the new API to libibverbs/libibverbs.map.in (please review > carefully as i really not sure if this is how it should be done) > libibverbs/compat-1_0.c | 2 +- > libibverbs/driver.h | 2 +- > libibverbs/dummy_ops.c | 2 +- > libibverbs/libibverbs.map.in | 1 + > libibverbs/verbs.c | 23 ++++++++++++++++++++++- > libibverbs/verbs.h | 7 +++++++ > providers/bnxt_re/verbs.c | 6 +++--- > providers/bnxt_re/verbs.h | 2 +- > providers/cxgb3/iwch.h | 4 ++-- > providers/cxgb3/verbs.c | 9 +++++---- > providers/cxgb4/libcxgb4.h | 4 ++-- > providers/cxgb4/verbs.c | 9 +++++---- > providers/hfi1verbs/hfiverbs.h | 4 ++-- > providers/hfi1verbs/verbs.c | 8 ++++---- > providers/hns/hns_roce_u.h | 2 +- > providers/hns/hns_roce_u_verbs.c | 6 +++--- > providers/i40iw/i40iw_umain.h | 2 +- > providers/i40iw/i40iw_uverbs.c | 8 ++++---- > providers/ipathverbs/ipathverbs.h | 4 ++-- > providers/ipathverbs/verbs.c | 8 ++++---- > providers/mlx4/mlx4.h | 4 ++-- > providers/mlx4/verbs.c | 7 +++---- > providers/mlx5/mlx5.h | 4 ++-- > providers/mlx5/verbs.c | 7 +++---- > providers/mthca/ah.c | 3 ++- > providers/mthca/mthca.h | 4 ++-- > providers/mthca/verbs.c | 6 +++--- > providers/nes/nes_umain.h | 2 +- > providers/nes/nes_uverbs.c | 9 ++++----- > providers/ocrdma/ocrdma_main.h | 2 +- > providers/ocrdma/ocrdma_verbs.c | 10 ++++------ > providers/qedr/qelr_main.h | 2 +- > providers/qedr/qelr_verbs.c | 11 ++++------- > providers/qedr/qelr_verbs.h | 4 ++-- > providers/rxe/rxe.c | 6 +++--- > providers/vmw_pvrdma/pvrdma.h | 4 ++-- > providers/vmw_pvrdma/verbs.c | 7 +++---- > 37 files changed, 114 insertions(+), 91 deletions(-) > > diff --git a/libibverbs/compat-1_0.c b/libibverbs/compat-1_0.c > index 695f89de..fbcc7514 100644 > +++ b/libibverbs/compat-1_0.c > @@ -171,7 +171,7 @@ struct ibv_context_ops_1_0 { > struct ibv_pd * (*alloc_pd)(struct ibv_context *context); > int (*dealloc_pd)(struct ibv_pd *pd); > struct ibv_mr * (*reg_mr)(struct ibv_pd *pd, void *addr, size_t length, > - int access); > + uint64_t hca_va, int access); > int (*dereg_mr)(struct ibv_mr *mr); > struct ibv_cq * (*create_cq)(struct ibv_context *context, int cqe, > struct ibv_comp_channel *channel, This one can't change, it is ABI > diff --git a/libibverbs/driver.h b/libibverbs/driver.h > index e4d624b2..ef27259a 100644 > +++ b/libibverbs/driver.h > @@ -338,7 +338,7 @@ struct verbs_context_ops { > uint64_t dm_offset, size_t length, > unsigned int access); > struct ibv_mr *(*reg_mr)(struct ibv_pd *pd, void *addr, size_t length, > - int access); > + uint64_t hca_va, int access); > int (*req_notify_cq)(struct ibv_cq *cq, int solicited_only); > int (*rereg_mr)(struct verbs_mr *vmr, int flags, struct ibv_pd *pd, > void *addr, size_t length, int access); > diff --git a/libibverbs/dummy_ops.c b/libibverbs/dummy_ops.c > index c861c3a0..61a8fbdf 100644 > +++ b/libibverbs/dummy_ops.c > @@ -410,7 +410,7 @@ static struct ibv_mr *reg_dm_mr(struct ibv_pd *pd, struct ibv_dm *dm, > } > > static struct ibv_mr *reg_mr(struct ibv_pd *pd, void *addr, size_t length, > - int access) > + uint64_t hca_va, int access) > { > errno = ENOSYS; > return NULL; > diff --git a/libibverbs/libibverbs.map.in b/libibverbs/libibverbs.map.in > index 87a1b9fc..523fd424 100644 > +++ b/libibverbs/libibverbs.map.in > @@ -94,6 +94,7 @@ IBVERBS_1.1 { > ibv_query_srq; > ibv_rate_to_mbps; > ibv_reg_mr; > + ibv_reg_mr_virt_as; > ibv_register_driver; > ibv_rereg_mr; > ibv_resize_cq; > diff --git a/libibverbs/verbs.c b/libibverbs/verbs.c > index 1766b9f5..3390aef8 100644 > +++ b/libibverbs/verbs.c > @@ -312,7 +312,28 @@ LATEST_SYMVER_FUNC(ibv_reg_mr, 1_1, "IBVERBS_1.1", > if (ibv_dontfork_range(addr, length)) > return NULL; > > - mr = get_ops(pd->context)->reg_mr(pd, addr, length, access); > + mr = get_ops(pd->context)->reg_mr(pd, addr, length, (uint64_t) addr, > + access); > + if (mr) { > + mr->context = pd->context; > + mr->pd = pd; > + mr->addr = addr; > + mr->length = length; > + } else > + ibv_dofork_range(addr, length); > + > + return mr; > +} > + > +struct ibv_mr *ibv_reg_mr_virt_as(struct ibv_pd *pd, void *addr, size_t length, > + uint64_t hca_va, int access) > +{ > + struct ibv_mr *mr; > + > + if (ibv_dontfork_range(addr, length)) > + return NULL; > + > + mr = get_ops(pd->context)->reg_mr(pd, addr, length, hca_va, access); > if (mr) { > mr->context = pd->context; > mr->pd = pd; > diff --git a/libibverbs/verbs.h b/libibverbs/verbs.h > index cb2d8439..adbf991b 100644 > +++ b/libibverbs/verbs.h > @@ -2372,6 +2372,13 @@ static inline int ibv_close_xrcd(struct ibv_xrcd *xrcd) > struct ibv_mr *ibv_reg_mr(struct ibv_pd *pd, void *addr, > size_t length, int access); > > +/** > + * ibv_reg_mr_virt_as - Register a memory region with a virtual offset > + * address > + */ > +struct ibv_mr *ibv_reg_mr_virt_as(struct ibv_pd *pd, void *addr, size_t length, > + uint64_t hca_va, int access); > + Needs a man page update too Other stuff looks right to me Jason