From: Matan Barak <matanb@xxxxxxxxxxxx> Memory re-registeration is a feature that enables one to change the attributes of a memory region, including PD, translation (address and length) and access flags. Signed-off-by: Matan Barak <matanb@xxxxxxxxxxxx> Signed-off-by: Or Gerlitz <ogerlitz@xxxxxxxxxxxx> --- include/infiniband/driver.h | 5 +++ include/infiniband/kern-abi.h | 20 +++++++++++ include/infiniband/verbs.h | 11 +++++- man/ibv_rereg_mr.3 | 72 +++++++++++++++++++++++++++++++++++++++++ src/cmd.c | 29 ++++++++++++++++ src/libibverbs.map | 1 + src/verbs.c | 55 +++++++++++++++++++++++++++++++ 7 files changed, 191 insertions(+), 2 deletions(-) create mode 100644 man/ibv_rereg_mr.3 diff --git a/include/infiniband/driver.h b/include/infiniband/driver.h index 5cc092b..4aa2a03 100644 --- a/include/infiniband/driver.h +++ b/include/infiniband/driver.h @@ -128,6 +128,11 @@ int ibv_cmd_reg_mr(struct ibv_pd *pd, void *addr, size_t length, struct ibv_mr *mr, struct ibv_reg_mr *cmd, size_t cmd_size, struct ibv_reg_mr_resp *resp, size_t resp_size); +int ibv_cmd_rereg_mr(struct ibv_mr *mr, uint32_t flags, void *addr, + size_t length, uint64_t hca_va, int access, + struct ibv_pd *pd, struct ibv_rereg_mr *cmd, + size_t cmd_sz, struct ibv_rereg_mr_resp *resp, + size_t resp_sz); int ibv_cmd_dereg_mr(struct ibv_mr *mr); int ibv_cmd_create_cq(struct ibv_context *context, int cqe, struct ibv_comp_channel *channel, diff --git a/include/infiniband/kern-abi.h b/include/infiniband/kern-abi.h index 20812c3..6001a7e 100644 --- a/include/infiniband/kern-abi.h +++ b/include/infiniband/kern-abi.h @@ -333,6 +333,26 @@ struct ibv_reg_mr_resp { __u32 rkey; }; +struct ibv_rereg_mr { + __u32 command; + __u16 in_words; + __u16 out_words; + __u64 response; + __u32 mr_handle; + __u32 flags; + __u64 start; + __u64 length; + __u64 hca_va; + __u32 pd_handle; + __u32 access_flags; + __u64 driver_data[0]; +}; + +struct ibv_rereg_mr_resp { + __u32 lkey; + __u32 rkey; +}; + struct ibv_dereg_mr { __u32 command; __u16 in_words; diff --git a/include/infiniband/verbs.h b/include/infiniband/verbs.h index 045a42b..4b30880 100644 --- a/include/infiniband/verbs.h +++ b/include/infiniband/verbs.h @@ -342,7 +342,8 @@ enum ibv_rereg_mr_flags { IBV_REREG_MR_CHANGE_TRANSLATION = (1 << 0), IBV_REREG_MR_CHANGE_PD = (1 << 1), IBV_REREG_MR_CHANGE_ACCESS = (1 << 2), - IBV_REREG_MR_KEEP_VALID = (1 << 3) + IBV_REREG_MR_KEEP_VALID = (1 << 3), + IBV_REREG_MR_FLAGS_SUPPORTED = ((IBV_REREG_MR_KEEP_VALID << 1) - 1) }; struct ibv_mr { @@ -886,7 +887,7 @@ struct ibv_context_ops { int (*dealloc_pd)(struct ibv_pd *pd); struct ibv_mr * (*reg_mr)(struct ibv_pd *pd, void *addr, size_t length, int access); - struct ibv_mr * (*rereg_mr)(struct ibv_mr *mr, + int (*rereg_mr)(struct ibv_mr *mr, int flags, struct ibv_pd *pd, void *addr, size_t length, @@ -1162,6 +1163,12 @@ struct ibv_mr *ibv_reg_mr(struct ibv_pd *pd, void *addr, size_t length, int access); /** + * ibv_rereg_mr - Re-Register a memory region + */ +int ibv_rereg_mr(struct ibv_mr *mr, int flags, + struct ibv_pd *pd, void *addr, + size_t length, uint64_t access); +/** * ibv_dereg_mr - Deregister a memory region */ int ibv_dereg_mr(struct ibv_mr *mr); diff --git a/man/ibv_rereg_mr.3 b/man/ibv_rereg_mr.3 new file mode 100644 index 0000000..b13f610 --- /dev/null +++ b/man/ibv_rereg_mr.3 @@ -0,0 +1,72 @@ +.\" -*- nroff -*- +.\" +.TH IBV_REREG_MR 3 2014-08-27 libibverbs "Libibverbs Programmer's Manual" +.SH "NAME" +ibv_rereg_mr \- re-register a memory region (MR) +.SH "SYNOPSIS" +.nf +.B #include <infiniband/verbs.h> +.sp +.BI "int ibv_rereg_mr(struct ibv_mr " "*mr" ", int " " flags" , +.BI " struct ibv_pd * " "pd" ", void " " *addr", +.BI " size_t " " length" ", uint64_t " " access", +.BI " struct ibv_rereg_mr_attr *" " attr"); +.fi +.fi +.SH "DESCRIPTION" +.B ibv_rereg_mr() +Modifies the attributes of an existing memory region (MR) +.I mr\fR. +Conceptually, this call performs the functions deregister memory region +followed by register memory region. Where possible, +resources are reused instead of deallocated and reallocated. +.PP +.I flags\fR +is a bit-mask used to indicate which of the following properties of the memory region are being modified. Flags should be a combination (bit field) of: +.PP +.TP +.B IBV_REREG_MR_CHANGE_TRANSLATION \fR Change translation (location and length) +.TP +.B IBV_REREG_MR_CHANGE_PD \fR Change protection domain +.TP +.B IBV_REREG_MR_CHANGE_ACCESS \fR Change access flags +.PP +When +.B IBV_REREG_MR_CHANGE_PD +is used, +.I pd\fR +represents the new PD this MR should be registered to. +.br +When +.B IBV_REREG_MR_CHANGE_TRANSLATION +is used, +.I addr\fR. +represents the virtual address (user-space pointer) of the new MR, while +.I length\fR +represents its length. +.PP +The access and other flags are represented in the field +.I access\fR. +This field describes the desired memory protection attributes; it is either 0 or the bitwise OR of one or more of the following flags: +.PP +.TP +.B IBV_ACCESS_LOCAL_WRITE \fR Enable Local Write Access +.TP +.B IBV_ACCESS_REMOTE_WRITE \fR Enable Remote Write Access +.TP +.B IBV_ACCESS_REMOTE_READ\fR Enable Remote Read Access +.TP +.I attr\fR +is available for future extensions. +.SH "RETURN VALUE" +.B ibv_rereg_mr() +returns 0 on success, or the value of errno on failure (which indicates the failure reason). +.SH "NOTES" +If the memory re-registration call failed, the MR can't be used. +Even on a failure, the user still needs to call ibv_dereg_mr on this MR. +.SH "SEE ALSO" +.BR ibv_reg_mr (3), +.BR ibv_dereg_mr (3), +.SH "AUTHORS" +.TP +Matan Barak <matanb@xxxxxxxxxxxx> diff --git a/src/cmd.c b/src/cmd.c index 04477f7..e037aa5 100644 --- a/src/cmd.c +++ b/src/cmd.c @@ -267,6 +267,35 @@ int ibv_cmd_reg_mr(struct ibv_pd *pd, void *addr, size_t length, return 0; } +int ibv_cmd_rereg_mr(struct ibv_mr *mr, uint32_t flags, void *addr, + size_t length, uint64_t hca_va, int access, + struct ibv_pd *pd, struct ibv_rereg_mr *cmd, + size_t cmd_sz, struct ibv_rereg_mr_resp *resp, + size_t resp_sz) +{ + IBV_INIT_CMD_RESP(cmd, cmd_sz, REREG_MR, resp, resp_sz); + + cmd->mr_handle = mr->handle; + cmd->flags = flags; + cmd->start = (uintptr_t) addr; + cmd->length = length; + cmd->hca_va = hca_va; + cmd->pd_handle = (flags & IBV_REREG_MR_CHANGE_PD) ? pd->handle : 0; + cmd->access_flags = access; + + if (write(mr->context->cmd_fd, cmd, cmd_sz) != cmd_sz) + return errno; + + (void) VALGRIND_MAKE_MEM_DEFINED(resp, resp_sz); + + mr->lkey = resp->lkey; + mr->rkey = resp->rkey; + if (flags & IBV_REREG_MR_CHANGE_PD) + mr->context = pd->context; + + return 0; +} + int ibv_cmd_dereg_mr(struct ibv_mr *mr) { struct ibv_dereg_mr cmd; diff --git a/src/libibverbs.map b/src/libibverbs.map index 30212f3..307f412 100644 --- a/src/libibverbs.map +++ b/src/libibverbs.map @@ -108,5 +108,6 @@ IBVERBS_1.1 { ibv_cmd_create_srq_ex; ibv_cmd_create_qp_ex; ibv_cmd_open_qp; + ibv_cmd_rereg_mr; } IBVERBS_1.0; diff --git a/src/verbs.c b/src/verbs.c index a6aae70..2e5abde 100644 --- a/src/verbs.c +++ b/src/verbs.c @@ -223,6 +223,61 @@ struct ibv_mr *__ibv_reg_mr(struct ibv_pd *pd, void *addr, } default_symver(__ibv_reg_mr, ibv_reg_mr); +int __ibv_rereg_mr(struct ibv_mr *mr, int flags, + struct ibv_pd *pd, void *addr, + size_t length, uint64_t access) +{ + int dofork_onfail = 0; + int err; + void *old_addr; + size_t old_len; + + if (flags & ~IBV_REREG_MR_FLAGS_SUPPORTED) + return errno = EINVAL; + + if ((flags & IBV_REREG_MR_CHANGE_TRANSLATION) && + (0 >= length)) + return errno = EINVAL; + + if (!(flags & IBV_REREG_MR_CHANGE_ACCESS)) + access = 0; + + if ((flags & IBV_REREG_MR_CHANGE_TRANSLATION) && + (addr == NULL)) + return errno = EINVAL; + + if (NULL == mr->context->ops.rereg_mr) + return errno = ENOSYS; + + /* If address will be allocated internally we should + call don't fork later after having addr. + */ + if (flags & IBV_REREG_MR_CHANGE_TRANSLATION) { + err = ibv_dontfork_range(addr, length); + if (err) + return err; + dofork_onfail = 1; + } + + old_addr = mr->addr; + old_len = mr->length; + err = mr->context->ops.rereg_mr(mr, flags, pd, addr, length, access); + if (!err) { + if (flags & IBV_REREG_MR_CHANGE_TRANSLATION) { + ibv_dofork_range(old_addr, old_len); + mr->addr = addr; + mr->length = length; + } + if (flags & IBV_REREG_MR_CHANGE_PD) + mr->pd = pd; + } else if (dofork_onfail) { + ibv_dofork_range(addr, length); + } + + return err; +} +default_symver(__ibv_rereg_mr, ibv_rereg_mr); + int __ibv_dereg_mr(struct ibv_mr *mr) { int ret; -- 1.7.1 -- To unsubscribe from this list: send the line "unsubscribe linux-rdma" in the body of a message to majordomo@xxxxxxxxxxxxxxx More majordomo info at http://vger.kernel.org/majordomo-info.html