Today, the provider ensures safe multi-thread access to any CQ. However, if the user can guarantee that a CQ will be accessed from only one thread, the lock on the CQ is still unnecessarily taken in the critical data path. The current standard Verbs API for CQ-creation doesn't take any attributes or objects to allow the user to convey single-threaded information to the provider. Thus, the current standard API needs to be modified. This RFC patch modifies the current ibv_create_cq to take the recently introduced Thread Domain standard verbs object. An object assigned to a Thread Domain is guaranteed by the user to be accessed from only one thread. So, if the user assigns a CQ to a Thread Domain, the provider can use this information to disable locking that protects the CQ against concurrent accesses. Other Verbs objects, such as the QP, are assigned to a Thread Domain via the Parent Domain object, which is nothing but an extended Protection Domain. This patch does not use the same methodology since it would limit what is currently allowed in the design space of Verbs applications. Today, QPs belonging to different Protection/Parent Domains can be bound to the same CQ. The CQ is not associated to any PD. Extending ibv_create_cq to take in a Parent Domain will enforce the isolating purpose of the PD on the CQ and will not allow a QP from another PD to be bound to it. I have included the diffs only for libibverbs and the mlx5 provider as an example. The control of locking in mlx5 is based on top of the changes introduced in [1]. The summary of diffs is just to give an idea of what a complete incorporation would look like. It includes extending the create_cq functions of each provider to take in the Thread Domain, and adding a NULL argument to the current ibv_create_cq calls. [1] https://patchwork.kernel.org/patch/10367417/ Signed-off-by: Rohit Zambre <rzambre@xxxxxxx> --- ibacm/prov/acmp/src/acmp.c | 2 +- libibverbs/compat-1_0.c | 2 +- libibverbs/driver.h | 2 +- libibverbs/dummy_ops.c | 2 +- libibverbs/examples/rc_pingpong.c | 2 +- libibverbs/examples/srq_pingpong.c | 2 +- libibverbs/examples/uc_pingpong.c | 2 +- libibverbs/examples/ud_pingpong.c | 2 +- libibverbs/examples/xsrq_pingpong.c | 4 ++-- libibverbs/verbs.c | 7 ++++--- libibverbs/verbs.h | 6 ++++-- librdmacm/cma.c | 4 ++-- librdmacm/examples/cmatose.c | 4 ++-- librdmacm/examples/mckey.c | 2 +- librdmacm/examples/rping.c | 2 +- librdmacm/examples/udaddy.c | 2 +- librdmacm/rsocket.c | 2 +- providers/bnxt_re/verbs.c | 3 ++- providers/bnxt_re/verbs.h | 2 +- providers/cxgb3/iwch.h | 2 +- providers/cxgb3/verbs.c | 3 ++- providers/cxgb4/libcxgb4.h | 2 +- providers/cxgb4/verbs.c | 3 ++- providers/hfi1verbs/hfiverbs.h | 4 ++-- providers/hfi1verbs/verbs.c | 4 ++-- providers/hns/hns_roce_u.h | 2 +- providers/hns/hns_roce_u_verbs.c | 2 +- providers/i40iw/i40iw_umain.h | 2 +- providers/i40iw/i40iw_uverbs.c | 4 +++- providers/ipathverbs/ipathverbs.h | 4 ++-- providers/ipathverbs/verbs.c | 4 ++-- providers/mlx4/mlx4.h | 2 +- providers/mlx4/verbs.c | 2 +- providers/mlx5/mlx5.h | 2 +- providers/mlx5/verbs.c | 9 ++++++--- providers/mthca/mthca.h | 2 +- providers/mthca/verbs.c | 2 +- providers/nes/nes_umain.h | 2 +- providers/nes/nes_uverbs.c | 2 +- providers/ocrdma/ocrdma_main.h | 2 +- providers/ocrdma/ocrdma_verbs.c | 2 +- providers/qedr/qelr_main.h | 2 +- providers/qedr/qelr_verbs.c | 2 +- providers/qedr/qelr_verbs.h | 2 +- providers/rxe/rxe.c | 2 +- providers/vmw_pvrdma/cq.c | 2 +- providers/vmw_pvrdma/pvrdma.h | 2 +- srp_daemon/srp_handle_traps.c | 4 ++-- 48 files changed, 72 insertions(+), 61 deletions(-) diff --git a/libibverbs/compat-1_0.c b/libibverbs/compat-1_0.c index 695f89d..a00d4f5 100644 --- a/libibverbs/compat-1_0.c +++ b/libibverbs/compat-1_0.c @@ -709,7 +709,7 @@ COMPAT_SYMVER_FUNC(ibv_create_cq, 1_0, "IBVERBS_1.0", return NULL; real_cq = ibv_create_cq(context->real_context, cqe, cq_context, - channel, comp_vector); + channel, comp_vector, NULL); if (!real_cq) { free(cq); return NULL; diff --git a/libibverbs/driver.h b/libibverbs/driver.h index ca61827..7b3e5f7 100644 --- a/libibverbs/driver.h +++ b/libibverbs/driver.h @@ -216,7 +216,7 @@ struct verbs_context_ops { struct ibv_ah_attr *attr); struct ibv_cq *(*create_cq)(struct ibv_context *context, int cqe, struct ibv_comp_channel *channel, - int comp_vector); + int comp_vector, struct ibv_td *td); struct ibv_cq_ex *(*create_cq_ex)( struct ibv_context *context, struct ibv_cq_init_attr_ex *init_attr); diff --git a/libibverbs/dummy_ops.c b/libibverbs/dummy_ops.c index 2b8b18f..fd87cc2 100644 --- a/libibverbs/dummy_ops.c +++ b/libibverbs/dummy_ops.c @@ -100,7 +100,7 @@ static struct ibv_ah *create_ah(struct ibv_pd *pd, struct ibv_ah_attr *attr) static struct ibv_cq *create_cq(struct ibv_context *context, int cqe, struct ibv_comp_channel *channel, - int comp_vector) + int comp_vector, struct ibv_td *td) { errno = ENOSYS; return NULL; diff --git a/libibverbs/verbs.c b/libibverbs/verbs.c index 3b5c310..b6a8690 100644 --- a/libibverbs/verbs.c +++ b/libibverbs/verbs.c @@ -365,12 +365,13 @@ out: LATEST_SYMVER_FUNC(ibv_create_cq, 1_1, "IBVERBS_1.1", struct ibv_cq *, - struct ibv_context *context, int cqe, void *cq_context, - struct ibv_comp_channel *channel, int comp_vector) + struct ibv_context *context, int cqe, + void *cq_context, struct ibv_comp_channel *channel, + int comp_vector, struct ibv_td *td) { struct ibv_cq *cq; - cq = context->ops.create_cq(context, cqe, channel, comp_vector); + cq = context->ops.create_cq(context, cqe, channel, comp_vector, td); if (cq) verbs_init_cq(cq, context, channel, cq_context); diff --git a/libibverbs/verbs.h b/libibverbs/verbs.h index eb57824..72d68ac 100644 --- a/libibverbs/verbs.h +++ b/libibverbs/verbs.h @@ -1632,7 +1632,7 @@ struct ibv_context_ops { int (*dealloc_mw)(struct ibv_mw *mw); struct ibv_cq * (*create_cq)(struct ibv_context *context, int cqe, struct ibv_comp_channel *channel, - int comp_vector); + int comp_vector, struct ibv_td *td); int (*poll_cq)(struct ibv_cq *cq, int num_entries, struct ibv_wc *wc); int (*req_notify_cq)(struct ibv_cq *cq, int solicited_only); void (*cq_event)(struct ibv_cq *cq); @@ -2173,11 +2173,13 @@ struct ibv_mr *ibv_reg_dm_mr(struct ibv_pd *pd, struct ibv_dm *dm, * May be NULL if completion events will not be used. * @comp_vector - Completion vector used to signal completion events. * Must be >= 0 and < context->num_comp_vectors. + * @td - Thread Domain that CQ will be part of */ struct ibv_cq *ibv_create_cq(struct ibv_context *context, int cqe, void *cq_context, struct ibv_comp_channel *channel, - int comp_vector); + int comp_vector, + struct ibv_td *td); /** * ibv_create_cq_ex - Create a completion queue diff --git a/providers/mlx5/mlx5.h b/providers/mlx5/mlx5.h index 019a97c..be70d63 100644 --- a/providers/mlx5/mlx5.h +++ b/providers/mlx5/mlx5.h @@ -717,7 +717,7 @@ int mlx5_bind_mw(struct ibv_qp *qp, struct ibv_mw *mw, struct ibv_cq *mlx5_create_cq(struct ibv_context *context, int cqe, struct ibv_comp_channel *channel, - int comp_vector); + int comp_vector, struct ibv_td *td); struct ibv_cq_ex *mlx5_create_cq_ex(struct ibv_context *context, struct ibv_cq_init_attr_ex *cq_attr); int mlx5_cq_fill_pfns(struct mlx5_cq *cq, diff --git a/providers/mlx5/verbs.c b/providers/mlx5/verbs.c index 3e7ce92..be8c1c7 100644 --- a/providers/mlx5/verbs.c +++ b/providers/mlx5/verbs.c @@ -598,6 +598,7 @@ static struct ibv_cq_ex *create_cq(struct ibv_context *context, int ret; int ncqe; int rc; + int thread_safe; struct mlx5_context *mctx = to_mctx(context); FILE *fp = to_mctx(context)->dbg_fp; @@ -645,7 +646,8 @@ static struct ibv_cq_ex *create_cq(struct ibv_context *context, memset(&cmd, 0, sizeof cmd); cq->cons_index = 0; - if (mlx5_spinlock_init(&cq->lock, !mlx5_single_threaded)) + thread_safe = mlx5_single_threaded || (cq_attr->flags & IBV_CREATE_CQ_ATTR_SINGLE_THREADED); + if (mlx5_spinlock_init(&cq->lock, !thread_safe)) goto err; ncqe = align_queue_size(cq_attr->cqe + 1); @@ -767,12 +769,13 @@ err: struct ibv_cq *mlx5_create_cq(struct ibv_context *context, int cqe, struct ibv_comp_channel *channel, - int comp_vector) + int comp_vector, struct ibv_td *td) { struct ibv_cq_ex *cq; struct ibv_cq_init_attr_ex cq_attr = {.cqe = cqe, .channel = channel, .comp_vector = comp_vector, - .wc_flags = IBV_WC_STANDARD_FLAGS}; + .wc_flags = IBV_WC_STANDARD_FLAGS, + .flags = (td) ? IBV_CREATE_CQ_ATTR_SINGLE_THREADED : 0}; if (cqe <= 0) { errno = EINVAL; -- 1.8.3.1 -- To unsubscribe from this list: send the line "unsubscribe linux-rdma" in the body of a message to majordomo@xxxxxxxxxxxxxxx More majordomo info at http://vger.kernel.org/majordomo-info.html