An independent communication path is one that shares no hardware resources with other communication paths. From a Verbs perspective, an independent path is the one obtained by the first QP in a context. The next QPs of the context may or may not share hardware resources amongst themselves; the mapping of the resources to the QPs is provider-specific. Sharing resources can hurt throughput in certain cases. When only one thread uses the independent path, we term it an uncontended independent path. Today, the user has no way to request for an independent path for an arbitrary QP within a context. To create multiple independent paths, the Verbs user must create mulitple contexts with 1 QP per context. However, this translates to significant hardware-resource wastage: 89% in the case of the ConnectX-4 mlx5 device. This RFC patch allows the user to request for uncontended independent communication paths in Verbs through an "independent" flag during Thread Domain (TD) creation. The patch also provides a first-draft implementation of uncontended independent paths in the mlx5 provider. In mlx5, every even-odd pair of TDs share the same UAR page, which is not case when the user creates multiple contexts with one TD per context. When the user requests for an independent TD, the driver will dynamically allocate a new UAR page and map bfreg_0 of that UAR to the TD. bfreg_1 of the UAR belonging to an independent TD is never used and is essentially wasted. Hence, there must be a maximum number of independent paths allowed within a context since the hardware resources are limited. This would be half of the maximum number of dynamic UARs allowed per context. Signed-off-by: Rohit Zambre <rzambre@xxxxxxx> --- libibverbs/verbs.h | 1 + providers/mlx5/mlx5.c | 3 +++ providers/mlx5/mlx5.h | 2 ++ providers/mlx5/verbs.c | 51 +++++++++++++++++++++++++++++++++++--------------- 4 files changed, 42 insertions(+), 15 deletions(-) diff --git a/libibverbs/verbs.h b/libibverbs/verbs.h index eb57824..b5fa56f 100644 --- a/libibverbs/verbs.h +++ b/libibverbs/verbs.h @@ -561,6 +561,7 @@ struct ibv_pd { }; struct ibv_td_init_attr { + int independent; uint32_t comp_mask; }; diff --git a/providers/mlx5/mlx5.c b/providers/mlx5/mlx5.c index 3a3fc47..b8fa5ce 100644 --- a/providers/mlx5/mlx5.c +++ b/providers/mlx5/mlx5.c @@ -1056,6 +1056,9 @@ static struct verbs_context *mlx5_alloc_context(struct ibv_device *ibdev, context->max_srq_recv_wr = resp.max_srq_recv_wr; context->num_dyn_bfregs = resp.num_dyn_bfregs; + context->max_ind_dyn_paths = context->num_dyn_bfregs / MLX5_NUM_NON_FP_BFREGS_PER_UAR / 2; + context->count_ind_dyn_paths = 0; + if (context->num_dyn_bfregs) { context->count_dyn_bfregs = calloc(context->num_dyn_bfregs, sizeof(*context->count_dyn_bfregs)); diff --git a/providers/mlx5/mlx5.h b/providers/mlx5/mlx5.h index f0f376c..74bf10d 100644 --- a/providers/mlx5/mlx5.h +++ b/providers/mlx5/mlx5.h @@ -295,6 +295,8 @@ struct mlx5_context { uint16_t flow_action_flags; uint64_t max_dm_size; uint32_t eth_min_inline_size; + uint32_t max_ind_dyn_paths; + uint32_t count_ind_dyn_paths; }; struct mlx5_bitmap { diff --git a/providers/mlx5/verbs.c b/providers/mlx5/verbs.c index 71728c8..b28ed9e 100644 --- a/providers/mlx5/verbs.c +++ b/providers/mlx5/verbs.c @@ -164,19 +164,32 @@ static void mlx5_put_bfreg_index(struct mlx5_context *ctx, uint32_t bfreg_dyn_in { pthread_mutex_lock(&ctx->dyn_bfregs_mutex); ctx->count_dyn_bfregs[bfreg_dyn_index]--; + if (bfreg_dyn_index < ctx->max_ind_dyn_paths * MLX5_NUM_NON_FP_BFREGS_PER_UAR) + ctx->count_ind_dyn_paths--; pthread_mutex_unlock(&ctx->dyn_bfregs_mutex); } -static int mlx5_get_bfreg_index(struct mlx5_context *ctx) +static int mlx5_get_bfreg_index(struct mlx5_context *ctx, int independent) { int i; pthread_mutex_lock(&ctx->dyn_bfregs_mutex); - for (i = 0; i < ctx->num_dyn_bfregs; i++) { - if (!ctx->count_dyn_bfregs[i]) { - ctx->count_dyn_bfregs[i]++; - pthread_mutex_unlock(&ctx->dyn_bfregs_mutex); - return i; + if (independent) { + for (i = 0; i < ctx->max_ind_dyn_paths * MLX5_NUM_NON_FP_BFREGS_PER_UAR; i+=MLX5_NUM_NON_FP_BFREGS_PER_UAR) { + if (!ctx->count_dyn_bfregs[i]) { + ctx->count_dyn_bfregs[i]++; + ctx->count_ind_dyn_paths++; + pthread_mutex_unlock(&ctx->dyn_bfregs_mutex); + return i; + } + } + } else { + for (i = ctx->max_ind_dyn_paths * MLX5_NUM_NON_FP_BFREGS_PER_UAR; i < ctx->num_dyn_bfregs; i++) { + if (!ctx->count_dyn_bfregs[i]) { + ctx->count_dyn_bfregs[i]++; + pthread_mutex_unlock(&ctx->dyn_bfregs_mutex); + return i; + } } } @@ -186,7 +199,7 @@ static int mlx5_get_bfreg_index(struct mlx5_context *ctx) } /* Returns a dedicated BF to be used by a thread domain */ -static struct mlx5_bf *mlx5_attach_dedicated_bf(struct ibv_context *context) +static struct mlx5_bf *mlx5_attach_dedicated_bf(struct ibv_context *context, int independent) { struct mlx5_uar_info uar; struct mlx5_context *ctx = to_mctx(context); @@ -198,7 +211,7 @@ static struct mlx5_bf *mlx5_attach_dedicated_bf(struct ibv_context *context) int mmap_bf_index; int num_bfregs_per_page; - bfreg_dyn_index = mlx5_get_bfreg_index(ctx); + bfreg_dyn_index = mlx5_get_bfreg_index(ctx, independent); if (bfreg_dyn_index < 0) { errno = ENOENT; return NULL; @@ -212,13 +225,15 @@ static struct mlx5_bf *mlx5_attach_dedicated_bf(struct ibv_context *context) num_bfregs_per_page = ctx->num_uars_per_page * MLX5_NUM_NON_FP_BFREGS_PER_UAR; uar_page_index = bfreg_dyn_index / num_bfregs_per_page; - /* The first bf index of each page will hold the mapped area address of the UAR */ - mmap_bf_index = ctx->start_dyn_bfregs_index + (uar_page_index * num_bfregs_per_page); + if (!independent) { + /* The first bf index of each page will hold the mapped area address of the UAR */ + mmap_bf_index = ctx->start_dyn_bfregs_index + (uar_page_index * num_bfregs_per_page); - pthread_mutex_lock(&ctx->dyn_bfregs_mutex); - if (ctx->bfs[mmap_bf_index].uar) { - /* UAR was already mapped, set its matching bfreg */ - goto set_reg; + pthread_mutex_lock(&ctx->dyn_bfregs_mutex); + if (ctx->bfs[mmap_bf_index].uar) { + /* UAR was already mapped, set its matching bfreg */ + goto set_reg; + } } ctx->bfs[mmap_bf_index].uar = mlx5_mmap(&uar, uar_page_index, context->cmd_fd, dev->page_size, @@ -261,19 +276,25 @@ static void mlx5_detach_dedicated_bf(struct ibv_context *context, struct mlx5_bf struct ibv_td *mlx5_alloc_td(struct ibv_context *context, struct ibv_td_init_attr *init_attr) { struct mlx5_td *td; + struct mlx5_context *mctx = to_mctx(context); if (init_attr->comp_mask) { errno = EINVAL; return NULL; } + if (init_attr->independent && (mctx->count_ind_dyn_paths >= mctx->max_ind_dyn_paths)) { + errno = EINVAL; + return NULL; + } + td = calloc(1, sizeof(*td)); if (!td) { errno = ENOMEM; return NULL; } - td->bf = mlx5_attach_dedicated_bf(context); + td->bf = mlx5_attach_dedicated_bf(context, init_attr->independent); if (!td->bf) { free(td); return NULL; -- 1.8.3.1 -- To unsubscribe from this list: send the line "unsubscribe linux-rdma" in the body of a message to majordomo@xxxxxxxxxxxxxxx More majordomo info at http://vger.kernel.org/majordomo-info.html