[RFC PATCH] verbs: Introduce mlx5: Implement uncontended independent communication paths

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

 



An independent communication path is one that shares no hardware resources
with other communication paths. From a Verbs perspective, an independent
path is the one obtained by the first QP in a context. The next QPs of the
context may or may not share hardware resources amongst themselves; the
mapping of the resources to the QPs is provider-specific. Sharing resources
can hurt throughput in certain cases. When only one thread uses the
independent path, we term it an uncontended independent path.

Today, the user has no way to request for an independent path for an
arbitrary QP within a context. To create multiple independent paths, the
Verbs user must create mulitple contexts with 1 QP per context. However,
this translates to significant hardware-resource wastage: 89% in the case
of the ConnectX-4 mlx5 device.

This RFC patch allows the user to request for uncontended independent
communication paths in Verbs through an "independent" flag during Thread
Domain (TD) creation. The patch also provides a first-draft implementation
of uncontended independent paths in the mlx5 provider.

In mlx5, every even-odd pair of TDs share the same UAR page, which is not
case when the user creates multiple contexts with one TD per context. When
the user requests for an independent TD, the driver will dynamically
allocate a new UAR page and map bfreg_0 of that UAR to the TD. bfreg_1 of
the UAR belonging to an independent TD is never used and is essentially
wasted. Hence, there must be a maximum number of independent paths allowed
within a context since the hardware resources are limited. This would be
half of the maximum number of dynamic UARs allowed per context.

Signed-off-by: Rohit Zambre <rzambre@xxxxxxx>
---
 libibverbs/verbs.h     |  1 +
 providers/mlx5/mlx5.c  |  3 +++
 providers/mlx5/mlx5.h  |  2 ++
 providers/mlx5/verbs.c | 51 +++++++++++++++++++++++++++++++++++---------------
 4 files changed, 42 insertions(+), 15 deletions(-)

diff --git a/libibverbs/verbs.h b/libibverbs/verbs.h
index eb57824..b5fa56f 100644
--- a/libibverbs/verbs.h
+++ b/libibverbs/verbs.h
@@ -561,6 +561,7 @@ struct ibv_pd {
 };
 
 struct ibv_td_init_attr {
+	int independent;
 	uint32_t comp_mask;
 };
 
diff --git a/providers/mlx5/mlx5.c b/providers/mlx5/mlx5.c
index 3a3fc47..b8fa5ce 100644
--- a/providers/mlx5/mlx5.c
+++ b/providers/mlx5/mlx5.c
@@ -1056,6 +1056,9 @@ static struct verbs_context *mlx5_alloc_context(struct ibv_device *ibdev,
 	context->max_srq_recv_wr = resp.max_srq_recv_wr;
 	context->num_dyn_bfregs = resp.num_dyn_bfregs;
 
+	context->max_ind_dyn_paths = context->num_dyn_bfregs / MLX5_NUM_NON_FP_BFREGS_PER_UAR / 2;
+	context->count_ind_dyn_paths = 0;
+
 	if (context->num_dyn_bfregs) {
 		context->count_dyn_bfregs = calloc(context->num_dyn_bfregs,
 						   sizeof(*context->count_dyn_bfregs));
diff --git a/providers/mlx5/mlx5.h b/providers/mlx5/mlx5.h
index f0f376c..74bf10d 100644
--- a/providers/mlx5/mlx5.h
+++ b/providers/mlx5/mlx5.h
@@ -295,6 +295,8 @@ struct mlx5_context {
 	uint16_t			flow_action_flags;
 	uint64_t			max_dm_size;
 	uint32_t                        eth_min_inline_size;
+	uint32_t			max_ind_dyn_paths;
+	uint32_t			count_ind_dyn_paths;
 };
 
 struct mlx5_bitmap {
diff --git a/providers/mlx5/verbs.c b/providers/mlx5/verbs.c
index 71728c8..b28ed9e 100644
--- a/providers/mlx5/verbs.c
+++ b/providers/mlx5/verbs.c
@@ -164,19 +164,32 @@ static void mlx5_put_bfreg_index(struct mlx5_context *ctx, uint32_t bfreg_dyn_in
 {
 	pthread_mutex_lock(&ctx->dyn_bfregs_mutex);
 	ctx->count_dyn_bfregs[bfreg_dyn_index]--;
+	if (bfreg_dyn_index < ctx->max_ind_dyn_paths * MLX5_NUM_NON_FP_BFREGS_PER_UAR)
+		ctx->count_ind_dyn_paths--;
 	pthread_mutex_unlock(&ctx->dyn_bfregs_mutex);
 }
 
-static int mlx5_get_bfreg_index(struct mlx5_context *ctx)
+static int mlx5_get_bfreg_index(struct mlx5_context *ctx, int independent)
 {
 	int i;
 
 	pthread_mutex_lock(&ctx->dyn_bfregs_mutex);
-	for (i = 0; i < ctx->num_dyn_bfregs; i++) {
-		if (!ctx->count_dyn_bfregs[i]) {
-			ctx->count_dyn_bfregs[i]++;
-			pthread_mutex_unlock(&ctx->dyn_bfregs_mutex);
-			return i;
+	if (independent) {
+		for (i = 0; i < ctx->max_ind_dyn_paths * MLX5_NUM_NON_FP_BFREGS_PER_UAR; i+=MLX5_NUM_NON_FP_BFREGS_PER_UAR) {
+			if (!ctx->count_dyn_bfregs[i]) {
+				ctx->count_dyn_bfregs[i]++;
+				ctx->count_ind_dyn_paths++;
+				pthread_mutex_unlock(&ctx->dyn_bfregs_mutex);
+				return i;
+			}
+		}
+	} else {
+		for (i = ctx->max_ind_dyn_paths * MLX5_NUM_NON_FP_BFREGS_PER_UAR; i < ctx->num_dyn_bfregs; i++) {
+			if (!ctx->count_dyn_bfregs[i]) {
+				ctx->count_dyn_bfregs[i]++;
+				pthread_mutex_unlock(&ctx->dyn_bfregs_mutex);
+				return i;
+			}
 		}
 	}
 
@@ -186,7 +199,7 @@ static int mlx5_get_bfreg_index(struct mlx5_context *ctx)
 }
 
 /* Returns a dedicated BF to be used by a thread domain */
-static struct mlx5_bf *mlx5_attach_dedicated_bf(struct ibv_context *context)
+static struct mlx5_bf *mlx5_attach_dedicated_bf(struct ibv_context *context, int independent)
 {
 	struct mlx5_uar_info uar;
 	struct mlx5_context *ctx = to_mctx(context);
@@ -198,7 +211,7 @@ static struct mlx5_bf *mlx5_attach_dedicated_bf(struct ibv_context *context)
 	int mmap_bf_index;
 	int num_bfregs_per_page;
 
-	bfreg_dyn_index = mlx5_get_bfreg_index(ctx);
+	bfreg_dyn_index = mlx5_get_bfreg_index(ctx, independent);
 	if (bfreg_dyn_index < 0) {
 		errno = ENOENT;
 		return NULL;
@@ -212,13 +225,15 @@ static struct mlx5_bf *mlx5_attach_dedicated_bf(struct ibv_context *context)
 	num_bfregs_per_page = ctx->num_uars_per_page * MLX5_NUM_NON_FP_BFREGS_PER_UAR;
 	uar_page_index = bfreg_dyn_index / num_bfregs_per_page;
 
-	/* The first bf index of each page will hold the mapped area address of the UAR */
-	mmap_bf_index = ctx->start_dyn_bfregs_index + (uar_page_index * num_bfregs_per_page);
+	if (!independent) {
+		/* The first bf index of each page will hold the mapped area address of the UAR */
+		mmap_bf_index = ctx->start_dyn_bfregs_index + (uar_page_index * num_bfregs_per_page);
 
-	pthread_mutex_lock(&ctx->dyn_bfregs_mutex);
-	if (ctx->bfs[mmap_bf_index].uar) {
-		/* UAR was already mapped, set its matching bfreg */
-		goto set_reg;
+		pthread_mutex_lock(&ctx->dyn_bfregs_mutex);
+		if (ctx->bfs[mmap_bf_index].uar) {
+			/* UAR was already mapped, set its matching bfreg */
+			goto set_reg;
+		}
 	}
 
 	ctx->bfs[mmap_bf_index].uar = mlx5_mmap(&uar, uar_page_index, context->cmd_fd, dev->page_size,
@@ -261,19 +276,25 @@ static void mlx5_detach_dedicated_bf(struct ibv_context *context, struct mlx5_bf
 struct ibv_td *mlx5_alloc_td(struct ibv_context *context, struct ibv_td_init_attr *init_attr)
 {
 	struct mlx5_td	*td;
+	struct mlx5_context *mctx = to_mctx(context);
 
 	if (init_attr->comp_mask) {
 		errno = EINVAL;
 		return NULL;
 	}
 
+	if (init_attr->independent && (mctx->count_ind_dyn_paths >= mctx->max_ind_dyn_paths)) {
+		errno = EINVAL;
+		return NULL;
+	}
+
 	td = calloc(1, sizeof(*td));
 	if (!td) {
 		errno = ENOMEM;
 		return NULL;
 	}
 
-	td->bf = mlx5_attach_dedicated_bf(context);
+	td->bf = mlx5_attach_dedicated_bf(context, init_attr->independent);
 	if (!td->bf) {
 		free(td);
 		return NULL;
-- 
1.8.3.1

--
To unsubscribe from this list: send the line "unsubscribe linux-rdma" in
the body of a message to majordomo@xxxxxxxxxxxxxxx
More majordomo info at  http://vger.kernel.org/majordomo-info.html



[Index of Archives]     [Linux USB Devel]     [Video for Linux]     [Linux Audio Users]     [Photo]     [Yosemite News]     [Yosemite Photos]     [Linux Kernel]     [Linux SCSI]     [XFree86]

  Powered by Linux