[RFC] verbs: Introduce a new reg_mr API for virtual address space

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

 



The virtual address that is registered is used as a base for any address
used later in post_recv and post_send operations.

On a virtualised environment this is not correct.

A guest cannot register its memory so hypervisor maps the guest physical
address to a host virtual address and register it with the HW. Later on,
at datapath phase, the guest fills the SGEs with addresses from its
address space.
Since HW cannot access guest virtual address space an extra translation
is needed map those addresses to be based on the host virtual address
that was registered with the HW.

To avoid this, a logical separation between the address that is
registered and the address that is used as a offset at datapath phase is
needed.
This separation is already implemented in the lower layer part
(ibv_cmd_reg_mr) but blocked at the API level.

Fix it by introducing a new API function that accepts a address from
guest virtual address space as well.

Signed-off-by: Yuval Shaia <yuval.shaia@xxxxxxxxxx>
---
 libibverbs/driver.h    |  3 +++
 libibverbs/dummy_ops.c | 10 ++++++++++
 libibverbs/verbs.h     | 26 ++++++++++++++++++++++++++
 providers/rxe/rxe.c    | 16 ++++++++++++----
 4 files changed, 51 insertions(+), 4 deletions(-)

diff --git a/libibverbs/driver.h b/libibverbs/driver.h
index e4d624b2..73bc10e6 100644
--- a/libibverbs/driver.h
+++ b/libibverbs/driver.h
@@ -339,6 +339,9 @@ struct verbs_context_ops {
 				    unsigned int access);
 	struct ibv_mr *(*reg_mr)(struct ibv_pd *pd, void *addr, size_t length,
 				 int access);
+	struct ibv_mr *(*reg_mr_virt_as)(struct ibv_pd *pd, void *addr,
+					 size_t length, uint64_t hca_va,
+					 int access);
 	int (*req_notify_cq)(struct ibv_cq *cq, int solicited_only);
 	int (*rereg_mr)(struct verbs_mr *vmr, int flags, struct ibv_pd *pd,
 			void *addr, size_t length, int access);
diff --git a/libibverbs/dummy_ops.c b/libibverbs/dummy_ops.c
index c861c3a0..aab61a17 100644
--- a/libibverbs/dummy_ops.c
+++ b/libibverbs/dummy_ops.c
@@ -416,6 +416,14 @@ static struct ibv_mr *reg_mr(struct ibv_pd *pd, void *addr, size_t length,
 	return NULL;
 }
 
+static struct ibv_mr *reg_mr_virt_as(struct ibv_pd *pd, void *addr,
+				     size_t length, uint64_t hca_va,
+				     int access)
+{
+	errno = ENOSYS;
+	return NULL;
+}
+
 static int req_notify_cq(struct ibv_cq *cq, int solicited_only)
 {
 	return ENOSYS;
@@ -508,6 +516,7 @@ const struct verbs_context_ops verbs_dummy_ops = {
 	read_counters,
 	reg_dm_mr,
 	reg_mr,
+	reg_mr_virt_as,
 	req_notify_cq,
 	rereg_mr,
 	resize_cq,
@@ -623,6 +632,7 @@ void verbs_set_ops(struct verbs_context *vctx,
 	SET_PRIV_OP(ctx, query_srq);
 	SET_OP(vctx, reg_dm_mr);
 	SET_PRIV_OP(ctx, reg_mr);
+	SET_OP(vctx, reg_mr_virt_as);
 	SET_OP(ctx, req_notify_cq);
 	SET_PRIV_OP(ctx, rereg_mr);
 	SET_PRIV_OP(ctx, resize_cq);
diff --git a/libibverbs/verbs.h b/libibverbs/verbs.h
index cb2d8439..8bcc8388 100644
--- a/libibverbs/verbs.h
+++ b/libibverbs/verbs.h
@@ -2037,6 +2037,9 @@ struct verbs_context {
 	struct ibv_mr *(*reg_dm_mr)(struct ibv_pd *pd, struct ibv_dm *dm,
 				    uint64_t dm_offset, size_t length,
 				    unsigned int access);
+	struct ibv_mr *(*reg_mr_virt_as)(struct ibv_pd *pd, void *addr,
+					 size_t length, uint64_t hca_va,
+					 int access);
 	struct ibv_dm *(*alloc_dm)(struct ibv_context *context,
 				   struct ibv_alloc_dm_attr *attr);
 	int (*free_dm)(struct ibv_dm *dm);
@@ -2574,6 +2577,29 @@ struct ibv_mr *ibv_reg_dm_mr(struct ibv_pd *pd, struct ibv_dm *dm,
 	return vctx->reg_dm_mr(pd, dm, dm_offset, length, access);
 }
 
+/**
+ * ibv_reg_mr_virt_as - Register MR for virtual address
+ * @pd - The PD to associated this MR with
+ * @addr - Address to register
+ * @length - Number of bytes to register
+ * @hca_va - Virtual Address
+ * @access - memory region access flags
+ */
+static inline
+struct ibv_mr *ibv_reg_mr_virt_as(struct ibv_pd *pd, void *addr, size_t length,
+				  uint64_t hca_va, int access)
+{
+	struct verbs_context *vctx = verbs_get_ctx_op(pd->context,
+						      reg_mr_virt_as);
+
+	if (!vctx) {
+		errno = ENOSYS;
+		return NULL;
+	}
+
+	return vctx->reg_mr_virt_as(pd, addr, length, hca_va, access);
+}
+
 /**
  * ibv_create_cq - Create a completion queue
  * @context - Context CQ will be attached to
diff --git a/providers/rxe/rxe.c b/providers/rxe/rxe.c
index 909c3f7b..e7246da9 100644
--- a/providers/rxe/rxe.c
+++ b/providers/rxe/rxe.c
@@ -122,8 +122,9 @@ static int rxe_dealloc_pd(struct ibv_pd *pd)
 	return ret;
 }
 
-static struct ibv_mr *rxe_reg_mr(struct ibv_pd *pd, void *addr, size_t length,
-				 int access)
+static struct ibv_mr *rxe_reg_mr_virt_as(struct ibv_pd *pd, void *addr,
+					 size_t length, uint64_t hca_va,
+					 int access)
 {
 	struct verbs_mr *vmr;
 	struct ibv_reg_mr cmd;
@@ -134,8 +135,8 @@ static struct ibv_mr *rxe_reg_mr(struct ibv_pd *pd, void *addr, size_t length,
 	if (!vmr)
 		return NULL;
 
-	ret = ibv_cmd_reg_mr(pd, addr, length, (uintptr_t)addr, access, vmr,
-			     &cmd, sizeof cmd, &resp, sizeof resp);
+	ret = ibv_cmd_reg_mr(pd, addr, length, hca_va, access, vmr, &cmd,
+			     sizeof(cmd), &resp, sizeof(resp));
 	if (ret) {
 		free(vmr);
 		return NULL;
@@ -144,6 +145,12 @@ static struct ibv_mr *rxe_reg_mr(struct ibv_pd *pd, void *addr, size_t length,
 	return &vmr->ibv_mr;
 }
 
+static struct ibv_mr *rxe_reg_mr(struct ibv_pd *pd, void *addr, size_t length,
+				 int access)
+{
+	return rxe_reg_mr_virt_as(pd, addr, length, (uint64_t) addr, access);
+}
+
 static int rxe_dereg_mr(struct verbs_mr *vmr)
 {
 	int ret;
@@ -836,6 +843,7 @@ static const struct verbs_context_ops rxe_ctx_ops = {
 	.alloc_pd = rxe_alloc_pd,
 	.dealloc_pd = rxe_dealloc_pd,
 	.reg_mr = rxe_reg_mr,
+	.reg_mr_virt_as = rxe_reg_mr_virt_as,
 	.dereg_mr = rxe_dereg_mr,
 	.create_cq = rxe_create_cq,
 	.poll_cq = rxe_poll_cq,
-- 
2.20.1




[Index of Archives]     [Linux USB Devel]     [Video for Linux]     [Linux Audio Users]     [Photo]     [Yosemite News]     [Yosemite Photos]     [Linux Kernel]     [Linux SCSI]     [XFree86]

  Powered by Linux