[PATCH libmlx5 V1 4/6] Add ibv_query_values support

Matan Barak <matanb@xxxxxxxxxxxx> · Thu, 3 Dec 2015 18:02:49 +0200

In order to query the current HCA's core clock, libmlx5 should
support ibv_query_values verb. Querying the hardware's cycles
register is done by mmaping this register to user-space.
Therefore, when libmlx5 initializes we mmap the cycles register.
This assumes the machine's architecture places the PCI and memory in
the same address space.
The page offset is passed through init_context vendor's data.

Signed-off-by: Matan Barak <matanb@xxxxxxxxxxxx>
---
 src/mlx5-abi.h | 10 +++++++++-
 src/mlx5.c     | 37 +++++++++++++++++++++++++++++++++++++
 src/mlx5.h     | 10 +++++++++-
 src/verbs.c    | 46 ++++++++++++++++++++++++++++++++++++++++++++++
 4 files changed, 101 insertions(+), 2 deletions(-)

diff --git a/src/mlx5-abi.h b/src/mlx5-abi.h
index 769ea81..43d4906 100644
--- a/src/mlx5-abi.h
+++ b/src/mlx5-abi.h
@@ -55,7 +55,11 @@ struct mlx5_alloc_ucontext {
 	__u32				total_num_uuars;
 	__u32				num_low_latency_uuars;
 	__u32				flags;
-	__u32				reserved;
+	__u32				comp_mask;
+};
+
+enum mlx5_ib_alloc_ucontext_resp_mask {
+	MLX5_IB_ALLOC_UCONTEXT_RESP_MASK_CORE_CLOCK_OFFSET = 1UL << 0,
 };
 
 struct mlx5_alloc_ucontext_resp {
@@ -72,6 +76,10 @@ struct mlx5_alloc_ucontext_resp {
 	__u16				num_ports;
 	__u8				cqe_version;
 	__u8				reserved;
+	__u32				comp_mask;
+	__u32				response_length;
+	__u32				reserved2;
+	__u64				hca_core_clock_offset;
 };
 
 struct mlx5_alloc_pd_resp {
diff --git a/src/mlx5.c b/src/mlx5.c
index 229d99d..c455c08 100644
--- a/src/mlx5.c
+++ b/src/mlx5.c
@@ -524,6 +524,30 @@ static int single_threaded_app(void)
 	return 0;
 }
 
+static int mlx5_map_internal_clock(struct mlx5_device *mdev,
+				   struct ibv_context *ibv_ctx)
+{
+	struct mlx5_context *context = to_mctx(ibv_ctx);
+	void *hca_clock_page;
+	off_t offset = 0;
+
+	set_command(MLX5_MMAP_GET_CORE_CLOCK_CMD, &offset);
+	hca_clock_page = mmap(NULL, mdev->page_size,
+			      PROT_READ, MAP_SHARED, ibv_ctx->cmd_fd,
+			      mdev->page_size * offset);
+
+	if (hca_clock_page == MAP_FAILED) {
+		fprintf(stderr, PFX
+			"Warning: Timestamp available,\n"
+			"but failed to mmap() hca core clock page.\n");
+		return -1;
+	}
+
+	context->hca_core_clock = hca_clock_page +
+		(context->core_clock.offset & (mdev->page_size - 1));
+	return 0;
+}
+
 static int mlx5_init_context(struct verbs_device *vdev,
 			     struct ibv_context *ctx, int cmd_fd)
 {
@@ -647,6 +671,15 @@ static int mlx5_init_context(struct verbs_device *vdev,
 		context->bfs[j].uuarn = j;
 	}
 
+	context->hca_core_clock = NULL;
+	if (resp.response_length + sizeof(resp.ibv_resp) >=
+	    offsetof(struct mlx5_alloc_ucontext_resp, hca_core_clock_offset) +
+	    sizeof(resp.hca_core_clock_offset) &&
+	    resp.comp_mask & MLX5_IB_ALLOC_UCONTEXT_RESP_MASK_CORE_CLOCK_OFFSET) {
+		context->core_clock.offset = resp.hca_core_clock_offset;
+		mlx5_map_internal_clock(mdev, ctx);
+	}
+
 	mlx5_spinlock_init(&context->lock32);
 
 	context->prefer_bf = get_always_bf();
@@ -664,6 +697,7 @@ static int mlx5_init_context(struct verbs_device *vdev,
 	verbs_set_ctx_op(v_ctx, create_srq_ex, mlx5_create_srq_ex);
 	verbs_set_ctx_op(v_ctx, get_srq_num, mlx5_get_srq_num);
 	verbs_set_ctx_op(v_ctx, query_device_ex, mlx5_query_device_ex);
+	verbs_set_ctx_op(v_ctx, query_values, mlx5_query_values);
 	verbs_set_ctx_op(v_ctx, create_cq_ex, mlx5_create_cq_ex);
 	if (context->cqe_version && context->cqe_version == 1)
 		verbs_set_ctx_op(v_ctx, poll_cq_ex, mlx5_poll_cq_v1_ex);
@@ -697,6 +731,9 @@ static void mlx5_cleanup_context(struct verbs_device *device,
 		if (context->uar[i])
 			munmap(context->uar[i], page_size);
 	}
+	if (context->hca_core_clock)
+		munmap(context->hca_core_clock - context->core_clock.offset,
+		       page_size);
 	close_debug_file(context);
 }
 
diff --git a/src/mlx5.h b/src/mlx5.h
index 0c0b027..b5bcfaa 100644
--- a/src/mlx5.h
+++ b/src/mlx5.h
@@ -117,7 +117,8 @@ enum {
 
 enum {
 	MLX5_MMAP_GET_REGULAR_PAGES_CMD    = 0,
-	MLX5_MMAP_GET_CONTIGUOUS_PAGES_CMD = 1
+	MLX5_MMAP_GET_CONTIGUOUS_PAGES_CMD = 1,
+	MLX5_MMAP_GET_CORE_CLOCK_CMD    = 5
 };
 
 #define MLX5_CQ_PREFIX "MLX_CQ"
@@ -307,6 +308,11 @@ struct mlx5_context {
 	struct mlx5_spinlock            hugetlb_lock;
 	struct list_head                hugetlb_list;
 	uint8_t				cqe_version;
+	struct {
+		uint64_t                offset;
+		uint64_t                mask;
+	} core_clock;
+	void			       *hca_core_clock;
 };
 
 struct mlx5_bitmap {
@@ -585,6 +591,8 @@ int mlx5_query_device_ex(struct ibv_context *context,
 			 const struct ibv_query_device_ex_input *input,
 			 struct ibv_device_attr_ex *attr,
 			 size_t attr_size);
+int mlx5_query_values(struct ibv_context *context,
+		      struct ibv_values_ex *values);
 struct ibv_qp *mlx5_create_qp_ex(struct ibv_context *context,
 				 struct ibv_qp_init_attr_ex *attr);
 int mlx5_query_port(struct ibv_context *context, uint8_t port,
diff --git a/src/verbs.c b/src/verbs.c
index 1dbee60..5d732a2 100644
--- a/src/verbs.c
+++ b/src/verbs.c
@@ -79,6 +79,52 @@ int mlx5_query_device(struct ibv_context *context, struct ibv_device_attr *attr)
 	return 0;
 }
 
+#define READL(ptr) (*((uint32_t *)(ptr)))
+static int mlx5_read_clock(struct ibv_context *context, uint64_t *cycles)
+{
+	unsigned int clockhi, clocklo, clockhi1;
+	int i;
+	struct mlx5_context *ctx = to_mctx(context);
+
+	if (!ctx->hca_core_clock)
+		return -EOPNOTSUPP;
+
+	/* Handle wraparound */
+	for (i = 0; i < 2; i++) {
+		clockhi = ntohl(READL(ctx->hca_core_clock));
+		clocklo = ntohl(READL(ctx->hca_core_clock + 4));
+		clockhi1 = ntohl(READL(ctx->hca_core_clock));
+		if (clockhi == clockhi1)
+			break;
+	}
+
+	*cycles = (uint64_t)clockhi << 32 | (uint64_t)clocklo;
+
+	return 0;
+}
+
+int mlx5_query_values(struct ibv_context *context,
+		      struct ibv_values_ex *values)
+{
+	uint32_t comp_mask = 0;
+	int err = 0;
+
+	if (values->comp_mask & IBV_VALUES_MASK_RAW_CLOCK) {
+		uint64_t cycles;
+
+		err = mlx5_read_clock(context, &cycles);
+		if (!err) {
+			values->raw_clock.tv_sec = 0;
+			values->raw_clock.tv_nsec = cycles;
+			comp_mask |= IBV_VALUES_MASK_RAW_CLOCK;
+		}
+	}
+
+	values->comp_mask = comp_mask;
+
+	return err;
+}
+
 int mlx5_query_port(struct ibv_context *context, uint8_t port,
 		     struct ibv_port_attr *attr)
 {
-- 
2.1.0

--
To unsubscribe from this list: send the line "unsubscribe linux-rdma" in
the body of a message to majordomo@xxxxxxxxxxxxxxx
More majordomo info at  http://vger.kernel.org/majordomo-info.html