From: Moni Shoua <monis@xxxxxxxxxxxx> Introduce a new verb named ibv_advise_mr(), it includes: - The application interface. - The command interface with the kernel. A detailed man page describes the verb's purpose and its usage. Signed-off-by: Moni Shoua <monis@xxxxxxxxxxxx> Signed-off-by: Aviad Yehezkel <aviadye@xxxxxxxxxxxx> Signed-off-by: Yishai Hadas <yishaih@xxxxxxxxxxxx> --- libibverbs/CMakeLists.txt | 1 + libibverbs/cmd_mr.c | 57 +++++++++++++++++ libibverbs/driver.h | 10 +++ libibverbs/dummy_ops.c | 11 ++++ libibverbs/libibverbs.map.in | 1 + libibverbs/man/CMakeLists.txt | 1 + libibverbs/man/ibv_advise_mr.3.md | 125 ++++++++++++++++++++++++++++++++++++++ libibverbs/verbs.h | 28 +++++++++ libibverbs/verbs_api.h | 6 ++ 9 files changed, 240 insertions(+) create mode 100644 libibverbs/cmd_mr.c create mode 100644 libibverbs/man/ibv_advise_mr.3.md diff --git a/libibverbs/CMakeLists.txt b/libibverbs/CMakeLists.txt index ddf5995..2ddafd6 100644 --- a/libibverbs/CMakeLists.txt +++ b/libibverbs/CMakeLists.txt @@ -36,6 +36,7 @@ rdma_library(ibverbs "${CMAKE_CURRENT_BINARY_DIR}/libibverbs.map" cmd_fallback.c cmd_flow_action.c cmd_ioctl.c + cmd_mr.c compat-1_0.c device.c dummy_ops.c diff --git a/libibverbs/cmd_mr.c b/libibverbs/cmd_mr.c new file mode 100644 index 0000000..c6a9eb0 --- /dev/null +++ b/libibverbs/cmd_mr.c @@ -0,0 +1,57 @@ + +/* + * Copyright (c) 2018 Mellanox Technologies, Ltd. All rights reserved. + * + * This software is available to you under a choice of one of two + * licenses. You may choose to be licensed under the terms of the GNU + * General Public License (GPL) Version 2, available from the file + * COPYING in the main directory of this source tree, or the + * OpenIB.org BSD license below: + * + * Redistribution and use in source and binary forms, with or + * without modification, are permitted provided that the following + * conditions are met: + * + * - Redistributions of source code must retain the above + * copyright notice, this list of conditions and the following + * disclaimer. + * + * - Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials + * provided with the distribution. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +#include <infiniband/cmd_ioctl.h> +#include <rdma/ib_user_ioctl_cmds.h> +#include <infiniband/driver.h> +#include <infiniband/cmd_write.h> + +int ibv_cmd_advise_mr(struct ibv_pd *pd, + enum ibv_advise_mr_advice advice, + uint32_t flags, + struct ibv_sge *sg_list, + uint32_t num_sge) +{ + DECLARE_COMMAND_BUFFER(cmd, UVERBS_OBJECT_MR, + UVERBS_METHOD_ADVISE_MR, + 4); + + fill_attr_in_obj(cmd, UVERBS_ATTR_ADVISE_MR_PD_HANDLE, pd->handle); + fill_attr_const_in(cmd, UVERBS_ATTR_ADVISE_MR_ADVICE, advice); + fill_attr_in_uint32(cmd, UVERBS_ATTR_ADVISE_MR_FLAGS, flags); + fill_attr_in_ptr_array(cmd, UVERBS_ATTR_ADVISE_MR_SGE_LIST, + sg_list, num_sge); + + return execute_ioctl(pd->context, cmd); + +} diff --git a/libibverbs/driver.h b/libibverbs/driver.h index adf46c3..fb562d4 100644 --- a/libibverbs/driver.h +++ b/libibverbs/driver.h @@ -218,6 +218,11 @@ struct verbs_counters { * Keep sorted. */ struct verbs_context_ops { + int (*advise_mr)(struct ibv_pd *pd, + enum ibv_advise_mr_advice advice, + uint32_t flags, + struct ibv_sge *sg_list, + uint32_t num_sges); struct ibv_dm *(*alloc_dm)(struct ibv_context *context, struct ibv_alloc_dm_attr *attr); struct ibv_mw *(*alloc_mw)(struct ibv_pd *pd, enum ibv_mw_type type); @@ -442,6 +447,11 @@ int ibv_cmd_rereg_mr(struct verbs_mr *vmr, uint32_t flags, void *addr, size_t cmd_sz, struct ib_uverbs_rereg_mr_resp *resp, size_t resp_sz); int ibv_cmd_dereg_mr(struct verbs_mr *vmr); +int ibv_cmd_advise_mr(struct ibv_pd *pd, + enum ibv_advise_mr_advice advice, + uint32_t flags, + struct ibv_sge *sg_list, + uint32_t num_sge); int ibv_cmd_alloc_mw(struct ibv_pd *pd, enum ibv_mw_type type, struct ibv_mw *mw, struct ibv_alloc_mw *cmd, size_t cmd_size, diff --git a/libibverbs/dummy_ops.c b/libibverbs/dummy_ops.c index 43f8430..a5e9380 100644 --- a/libibverbs/dummy_ops.c +++ b/libibverbs/dummy_ops.c @@ -33,6 +33,15 @@ #include "ibverbs.h" #include <errno.h> +static int advise_mr(struct ibv_pd *pd, + enum ibv_advise_mr_advice advice, + uint32_t flags, + struct ibv_sge *sg_list, + uint32_t num_sges) +{ + return ENOSYS; +} + static struct ibv_dm *alloc_dm(struct ibv_context *context, struct ibv_alloc_dm_attr *attr) { @@ -436,6 +445,7 @@ static int resize_cq(struct ibv_cq *cq, int cqe) * Keep sorted. */ const struct verbs_context_ops verbs_dummy_ops = { + advise_mr, alloc_dm, alloc_mw, alloc_null_mr, @@ -550,6 +560,7 @@ void verbs_set_ops(struct verbs_context *vctx, } \ } while (0) + SET_OP(vctx, advise_mr); SET_OP(vctx, alloc_dm); SET_OP(ctx, alloc_mw); SET_OP(vctx, alloc_null_mr); diff --git a/libibverbs/libibverbs.map.in b/libibverbs/libibverbs.map.in index c489c76..4bffb1b 100644 --- a/libibverbs/libibverbs.map.in +++ b/libibverbs/libibverbs.map.in @@ -120,6 +120,7 @@ IBVERBS_PRIVATE_@IBVERBS_PABI_VERSION@ { __ioctl_final_num_attrs; _verbs_init_and_alloc_context; execute_ioctl; + ibv_cmd_advise_mr; ibv_cmd_alloc_dm; ibv_cmd_alloc_mw; ibv_cmd_alloc_pd; diff --git a/libibverbs/man/CMakeLists.txt b/libibverbs/man/CMakeLists.txt index 6efd671..4d5abef 100644 --- a/libibverbs/man/CMakeLists.txt +++ b/libibverbs/man/CMakeLists.txt @@ -1,4 +1,5 @@ rdma_man_pages( + ibv_advise_mr.3.md ibv_alloc_dm.3 ibv_alloc_mw.3 ibv_alloc_null_mr.3.md diff --git a/libibverbs/man/ibv_advise_mr.3.md b/libibverbs/man/ibv_advise_mr.3.md new file mode 100644 index 0000000..ce7747f --- /dev/null +++ b/libibverbs/man/ibv_advise_mr.3.md @@ -0,0 +1,125 @@ +--- +date: 2018-10-19 +footer: libibverbs +header: "Libibverbs Programmer's Manual" +layout: page +license: 'Licensed under the OpenIB.org BSD license (FreeBSD Variant) - See COPYING.md' +section: 3 +title: IBV_ADVISE_MR +--- + +# NAME + +ibv_advise_mr - Gives advice or directions to the kernel about an + address range belongs to a memory region (MR). + +# SYNOPSIS + +```c +#include <infiniband/verbs.h> + +int ibv_advise_mr(struct ibv_pd *pd, + enum ibv_advise_mr_advice advice, + uint32_t flags, + struct ibv_sge *sg_list, + uint32_t num_sge) +``` + +# DESCRIPTION + +**ibv_advise_mr()** Give advice or directions to the kernel about an +address range belonging to a memory region (MR). +Applications that are aware of future access patterns can use this verb +in order to leverage this knowledge to improve system or +application performance. + +**Conventional advice values** +*IBV_ADVISE_MR_ADVICE_PREFETCH* +: Pre-fetch a range of an on-demand paging MR. + Make pages present before the actual IO is conducted. + This would provide a way to reduce latency by overlapping paging-in + and either compute time or IO to other ranges. + +# ARGUMENTS +*pd* +: The protection domain (PD) associated with the MR. + +*advice* +: The requested advise value (as listed above). + +*flags* +: Describes the properties of the advise operation + **Conventional advice values** + *IBV_ADVISE_MR_FLAG_SYNC* + : Request to be a synchronized operation + *IBV_ADVISE_MR_FLAG_WRITE_ACCESS* + : When using IBV_ADVISE_OP_PREFETCH advise value, one should + specify this flag to allow pre-fetching with a future write + access (The MR must allow write access). + The default pre-fetching behavior is read only access. + +*sg_list* +: Pointer to the s/g array + When using IBV_ADVISE_OP_PREFETCH advise value, all the lkeys of all + the scatter gatther elements (SGEs) must be associated with ODP MRs + (MRs that were registered with IBV_ACCESS_ON_DEMAND). + +*num_sge* +: Number of elements in the the s/g array + +# RETURN VALUE + +**ibv_advise_mr()** returns 0 when the call was successful, or the value + of errno on failure (which indicates the failure reason). + +*ENOSYS* +: libibverbs or provider driver doesn't support the ibv_advise_mr() verb. + +*ENOTSUP* +: The advise operation isn't supported. + +*EFAULT* +: In one of the following: + o When the range requested is out of the MR bounds, or when parts of + it are not part of the process address space. + o One of the lkeys provided in the scatter gather list is invalid or + with wrong write access. + +*EINVAL* +: In one of the following: + o The PD is invalid. + o The flags are invalid. + +# NOTES + +An application may pre-fetch any address range within an ODP MR when using the +IBV_ADVISE_MR_ADVICE_PREFETCH advice. +Semantically, this operation is best-effort. That means the kernel does not +guarantee that underlying pages are updated in the HCA or the pre-fetched pages +would remain resident. + +When using IBV_ADVISE_MR_ADVICE_PREFETCH advice, the operation will be done in +the following stages: + o Page in the user pages to memory (pages aren't pinned). + o Get the dma mapping of these user pages. + o Post the underlaying pages transalations to the HCA. + +If **IBV_ADVISE_MR_FLAG_SYNC** is specified then the underlying pages are +guarantteed to be updated in the HCA before returning SUCCESS. +Otherwise the driver can choose to postpone the posting of the new trasalations +to the HCA. +When performing a local RDMA access operation it is recommended to use +IBV_ADVISE_MR_FLAG_SYNC flag with IBV_ADVISE_MR_ADVICE_PREFETCH advice to +increase probability that the pages transaltions are valid in the HCA +and avoid future page faults. + +# SEE ALSO + +**ibv_reg_mr**(3), +**ibv_rereg_mr**(3), +**ibv_dereg_mr**(3) + +# AUTHOR + +Aviad Yehezkel <aviadye@xxxxxxxxxxxx> + diff --git a/libibverbs/verbs.h b/libibverbs/verbs.h index 93e0430..4107703 100644 --- a/libibverbs/verbs.h +++ b/libibverbs/verbs.h @@ -1786,6 +1786,11 @@ struct ibv_values_ex { struct verbs_context { /* "grows up" - new fields go here */ + int (*advise_mr)(struct ibv_pd *pd, + enum ibv_advise_mr_advice advice, + uint32_t flags, + struct ibv_sge *sg_list, + uint32_t num_sges); struct ibv_mr *(*alloc_null_mr)(struct ibv_pd *pd); int (*read_counters)(struct ibv_counters *counters, uint64_t *counters_value, @@ -2210,6 +2215,29 @@ struct ibv_comp_channel *ibv_create_comp_channel(struct ibv_context *context); int ibv_destroy_comp_channel(struct ibv_comp_channel *channel); /** + * ibv_advise_mr - Gives advice about an address range in MRs + * @pd - protection domain of all MRs for which the advice is for + * @advice - type of advice + * @flags - advice modifiers + * @sg_list - an array of memory ranges + * @num_sge - number of elements in the array + */ +static inline int ibv_advise_mr(struct ibv_pd *pd, + enum ibv_advise_mr_advice advice, + uint32_t flags, + struct ibv_sge *sg_list, + uint32_t num_sge) +{ + struct verbs_context *vctx; + + vctx = verbs_get_ctx_op(pd->context, advise_mr); + if (!vctx) + return ENOSYS; + + return vctx->advise_mr(pd, advice, flags, sg_list, num_sge); +} + +/** * ibv_alloc_dm - Allocate device memory * @context - Context DM will be attached to * @attr - Attributes to allocate the DM with diff --git a/libibverbs/verbs_api.h b/libibverbs/verbs_api.h index 4ac1335..743b305 100644 --- a/libibverbs/verbs_api.h +++ b/libibverbs/verbs_api.h @@ -85,6 +85,12 @@ #define ibv_flow_action_esp_encap ib_uverbs_flow_action_esp_encap #define ibv_flow_action_esp ib_uverbs_flow_action_esp +#define ibv_advise_mr_advice ib_uverbs_advise_mr_advice +#define IBV_ADVISE_MR_ADVICE_PREFETCH IB_UVERBS_ADVISE_MR_ADVICE_PREFETCH + +#define IBV_ADVISE_MR_FLAG_SYNC IB_UVERBS_ADVISE_MR_FLAG_SYNC +#define IBV_ADVISE_MR_FLAG_WRITE_ACCESS IB_UVERBS_ADVISE_MR_FLAG_WRITE_ACCESS + #define IBV_QPF_GRH_REQUIRED IB_UVERBS_QPF_GRH_REQUIRED #endif -- 1.8.3.1