> From: Bernard Metzler <BMT@xxxxxxxxxxxxxx> > Sent: Friday, August 30, 2019 3:08 PM > > External Email > > ---------------------------------------------------------------------- > -----"Michal Kalderon" <michal.kalderon@xxxxxxxxxxx> wrote: ----- > > Hi Michael, > > I tried this patch. It unfortunately panics immediately when siw gets used. I'll > investigate further. Some comments in line. Thanks for testing, > > Thanks > Bernard. > > >To: <mkalderon@xxxxxxxxxxx>, <aelior@xxxxxxxxxxx>, <jgg@xxxxxxxx>, > ><dledford@xxxxxxxxxx>, <bmt@xxxxxxxxxxxxxx>, > <galpress@xxxxxxxxxx>, > ><sleybo@xxxxxxxxxx>, <leon@xxxxxxxxxx> > >From: "Michal Kalderon" <michal.kalderon@xxxxxxxxxxx> > >Date: 08/27/2019 03:31PM > >Cc: <linux-rdma@xxxxxxxxxxxxxxx>, "Michal Kalderon" > ><michal.kalderon@xxxxxxxxxxx>, "Ariel Elior" > ><ariel.elior@xxxxxxxxxxx> > >Subject: [EXTERNAL] [PATCH v8 rdma-next 4/7] RDMA/siw: Use the > common > >mmap_xa helpers > > > >Remove the functions related to managing the mmap_xa database. > >This code is now common in ib_core. Use the common API's instead. > > > >Signed-off-by: Ariel Elior <ariel.elior@xxxxxxxxxxx> > >Signed-off-by: Michal Kalderon <michal.kalderon@xxxxxxxxxxx> > >--- > > drivers/infiniband/sw/siw/siw.h | 20 ++- > > drivers/infiniband/sw/siw/siw_main.c | 1 + > > drivers/infiniband/sw/siw/siw_verbs.c | 223 > >+++++++++++++++++++--------------- > > drivers/infiniband/sw/siw/siw_verbs.h | 1 + > > 4 files changed, 144 insertions(+), 101 deletions(-) > > > >diff --git a/drivers/infiniband/sw/siw/siw.h > >b/drivers/infiniband/sw/siw/siw.h > >index 77b1aabf6ff3..d48cd42ae43e 100644 > >--- a/drivers/infiniband/sw/siw/siw.h > >+++ b/drivers/infiniband/sw/siw/siw.h > >@@ -220,7 +220,7 @@ struct siw_cq { > > u32 cq_get; > > u32 num_cqe; > > bool kernel_verbs; > >- u32 xa_cq_index; /* mmap information for CQE array */ > >+ u64 cq_key; /* mmap information for CQE array */ > > u32 id; /* For debugging only */ > > }; > > > >@@ -263,7 +263,7 @@ struct siw_srq { > > u32 rq_put; > > u32 rq_get; > > u32 num_rqe; /* max # of wqe's allowed */ > >- u32 xa_srq_index; /* mmap information for SRQ array */ > >+ u64 srq_key; /* mmap information for SRQ array */ > > char armed; /* inform user if limit hit */ > > char kernel_verbs; /* '1' if kernel client */ }; @@ -477,8 +477,8 @@ > >struct siw_qp { > > u8 layer : 4, etype : 4; > > u8 ecode; > > } term_info; > >- u32 xa_sq_index; /* mmap information for SQE array */ > >- u32 xa_rq_index; /* mmap information for RQE array */ > >+ u64 sq_key; /* mmap information for SQE array */ > >+ u64 rq_key; /* mmap information for RQE array */ > > struct rcu_head rcu; > > }; > > > >@@ -503,6 +503,12 @@ struct iwarp_msg_info { > > int (*rx_data)(struct siw_qp *qp); > > }; > > > >+struct siw_user_mmap_entry { > >+ struct rdma_user_mmap_entry rdma_entry; > >+ u64 address; > >+ u64 length; > > This unsigned 64 range for an mmap length seems to be quite ambitious. I do > not expect we are mmapping objects of that size. That range would be also > not supported by 32bit architectures - a user cannot mmap more than size_t. > So size_t might be more appropriate? This throughout all the proposed > mmap_xa helpers. I will take a look but size_t is 64 bits anyway on a 64 bit system. > > >+}; > >+ > > /* Global siw parameters. Currently set in siw_main.c */ extern const > >bool zcopy_tx; extern const bool try_gso; @@ -607,6 +613,12 @@ static > >inline struct siw_mr *to_siw_mr(struct ib_mr *base_mr) > > return container_of(base_mr, struct siw_mr, base_mr); } > > > >+static inline struct siw_user_mmap_entry * to_siw_mmap_entry(struct > >+rdma_user_mmap_entry *rdma_mmap) { > >+ return container_of(rdma_mmap, struct siw_user_mmap_entry, > >rdma_entry); > >+} > >+ > > static inline struct siw_qp *siw_qp_id2obj(struct siw_device *sdev, > >int id) { > > struct siw_qp *qp; > >diff --git a/drivers/infiniband/sw/siw/siw_main.c > >b/drivers/infiniband/sw/siw/siw_main.c > >index 05a92f997f60..46c0bb59489d 100644 > >--- a/drivers/infiniband/sw/siw/siw_main.c > >+++ b/drivers/infiniband/sw/siw/siw_main.c > >@@ -298,6 +298,7 @@ static const struct ib_device_ops siw_device_ops = > >{ > > .iw_rem_ref = siw_qp_put_ref, > > .map_mr_sg = siw_map_mr_sg, > > .mmap = siw_mmap, > >+ .mmap_free = siw_mmap_free, > > .modify_qp = siw_verbs_modify_qp, > > .modify_srq = siw_modify_srq, > > .poll_cq = siw_poll_cq, > >diff --git a/drivers/infiniband/sw/siw/siw_verbs.c > >b/drivers/infiniband/sw/siw/siw_verbs.c > >index 03176a3d1e18..9e049241051e 100644 > >--- a/drivers/infiniband/sw/siw/siw_verbs.c > >+++ b/drivers/infiniband/sw/siw/siw_verbs.c > >@@ -34,44 +34,21 @@ static char ib_qp_state_to_string[IB_QPS_ERR + > >1][sizeof("RESET")] = { > > [IB_QPS_ERR] = "ERR" > > }; > > > >-static u32 siw_create_uobj(struct siw_ucontext *uctx, void *vaddr, > >u32 size) > >+void siw_mmap_free(struct rdma_user_mmap_entry *rdma_entry) > > { > >- struct siw_uobj *uobj; > >- struct xa_limit limit = XA_LIMIT(0, SIW_UOBJ_MAX_KEY); > >- u32 key; > >+ struct siw_user_mmap_entry *entry = > to_siw_mmap_entry(rdma_entry); > > > >- uobj = kzalloc(sizeof(*uobj), GFP_KERNEL); > >- if (!uobj) > >- return SIW_INVAL_UOBJ_KEY; > >- > >- if (xa_alloc_cyclic(&uctx->xa, &key, uobj, limit, > >&uctx->uobj_nextkey, > >- GFP_KERNEL) < 0) { > >- kfree(uobj); > >- return SIW_INVAL_UOBJ_KEY; > >- } > >- uobj->size = PAGE_ALIGN(size); > >- uobj->addr = vaddr; > >- > >- return key; > >-} > >- > >-static struct siw_uobj *siw_get_uobj(struct siw_ucontext *uctx, > >- unsigned long off, u32 size) > >-{ > >- struct siw_uobj *uobj = xa_load(&uctx->xa, off); > >- > >- if (uobj && uobj->size == size) > >- return uobj; > >- > >- return NULL; > >+ vfree((void *)entry->address); > >+ kfree(entry); > > } > > > > int siw_mmap(struct ib_ucontext *ctx, struct vm_area_struct *vma) { > > struct siw_ucontext *uctx = to_siw_ctx(ctx); > >- struct siw_uobj *uobj; > >- unsigned long off = vma->vm_pgoff; > >+ unsigned long off = vma->vm_pgoff << PAGE_SHIFT; > >+ struct rdma_user_mmap_entry *rdma_entry; > > int size = vma->vm_end - vma->vm_start; > >+ struct siw_user_mmap_entry *entry; > > int rv = -EINVAL; > > > > /* > >@@ -81,15 +58,26 @@ int siw_mmap(struct ib_ucontext *ctx, struct > >vm_area_struct *vma) > > pr_warn("siw: mmap not page aligned\n"); > > goto out; > > } > >- uobj = siw_get_uobj(uctx, off, size); > >- if (!uobj) { > >+ rdma_entry = rdma_user_mmap_entry_get(&uctx->base_ucontext, > off, > >+ size, vma); > >+ if (!rdma_entry) { > > siw_dbg(&uctx->sdev->base_dev, "mmap lookup failed: %lu, > %u\n", > > off, size); > > goto out; > > } > >- rv = remap_vmalloc_range(vma, uobj->addr, 0); > >- if (rv) > >+ entry = to_siw_mmap_entry(rdma_entry); > >+ if (entry->length != size) { > >+ siw_dbg(&uctx->sdev->base_dev, > >+ "key[%#lx] does not have valid length[%#x] > expected[%#llx]\n", > >+ off, size, entry->length); > >+ return -EINVAL; > >+ } > >+ > >+ rv = remap_vmalloc_range(vma, (void *)entry->address, 0); > >+ if (rv) { > > pr_warn("remap_vmalloc_range failed: %lu, %u\n", off, > size); > >+ rdma_user_mmap_entry_put(&uctx->base_ucontext, > rdma_entry); > >+ } > > out: > > return rv; > > } > >@@ -105,7 +93,7 @@ int siw_alloc_ucontext(struct ib_ucontext *base_ctx, > >struct ib_udata *udata) > > rv = -ENOMEM; > > goto err_out; > > } > >- xa_init_flags(&ctx->xa, XA_FLAGS_ALLOC); > >+ > > ctx->uobj_nextkey = 0; > > ctx->sdev = sdev; > > > >@@ -135,19 +123,7 @@ int siw_alloc_ucontext(struct ib_ucontext > >*base_ctx, struct ib_udata *udata) void siw_dealloc_ucontext(struct > >ib_ucontext *base_ctx) { > > struct siw_ucontext *uctx = to_siw_ctx(base_ctx); > >- void *entry; > >- unsigned long index; > > > >- /* > >- * Make sure all user mmap objects are gone. Since QP, CQ > >- * and SRQ destroy routines destroy related objects, nothing > >- * should be found here. > >- */ > >- xa_for_each(&uctx->xa, index, entry) { > >- kfree(xa_erase(&uctx->xa, index)); > >- pr_warn("siw: dropping orphaned uobj at %lu\n", index); > >- } > >- xa_destroy(&uctx->xa); > > atomic_dec(&uctx->sdev->num_ctx); > > } > > > >@@ -293,6 +269,28 @@ void siw_qp_put_ref(struct ib_qp *base_qp) > > siw_qp_put(to_siw_qp(base_qp)); > > } > > > >+static int siw_user_mmap_entry_insert(struct ib_ucontext *ucontext, > >+ u64 address, u64 length, > >+ u64 *key) > >+{ > >+ struct siw_user_mmap_entry *entry = kzalloc(sizeof(*entry), > >GFP_KERNEL); > >+ > >+ if (!entry) > >+ return -ENOMEM; > >+ > >+ entry->address = address; > >+ entry->length = length; > >+ > >+ *key = rdma_user_mmap_entry_insert(ucontext, &entry- > >rdma_entry, > >+ length); > >+ if (*key == RDMA_USER_MMAP_INVALID) { > >+ kfree(entry); > >+ return -ENOMEM; > >+ } > >+ > >+ return 0; > >+} > >+ > > /* > > * siw_create_qp() > > * > >@@ -317,6 +315,8 @@ struct ib_qp *siw_create_qp(struct ib_pd *pd, > > struct siw_cq *scq = NULL, *rcq = NULL; > > unsigned long flags; > > int num_sqe, num_rqe, rv = 0; > >+ u64 length; > >+ u64 key; > > > > siw_dbg(base_dev, "create new QP\n"); > > > >@@ -380,8 +380,8 @@ struct ib_qp *siw_create_qp(struct ib_pd *pd, > > spin_lock_init(&qp->orq_lock); > > > > qp->kernel_verbs = !udata; > >- qp->xa_sq_index = SIW_INVAL_UOBJ_KEY; > >- qp->xa_rq_index = SIW_INVAL_UOBJ_KEY; > >+ qp->sq_key = RDMA_USER_MMAP_INVALID; > >+ qp->rq_key = RDMA_USER_MMAP_INVALID; > > > > rv = siw_qp_add(sdev, qp); > > if (rv) > >@@ -459,26 +459,41 @@ struct ib_qp *siw_create_qp(struct ib_pd *pd, > > uresp.qp_id = qp_id(qp); > > > > if (qp->sendq) { > >- qp->xa_sq_index = > >- siw_create_uobj(uctx, qp->sendq, > >- num_sqe * sizeof(struct siw_sqe)); > >+ length = num_sqe * sizeof(struct siw_sqe); > >+ rv = siw_user_mmap_entry_insert(&uctx- > >base_ucontext, > >+ (uintptr_t)qp- > >sendq, > >+ length, &key); > >+ if (!rv) > > Shall be 'if (rv)' > The new 'siw_user_mmap_entry_insert()' seem to return zero for success. > Same issue afrer all other invocations of > siw_user_mmap_entry_insert() further down in that file. Perhaps this is causing the panic you're seeing ? > > >+ goto err_out_xa; > >+ > >+ /* If entry was inserted successfully, qp->sendq > >+ * will be freed by siw_mmap_free > >+ */ > >+ qp->sendq = NULL; > > qp->sendq points to the SQ array. Zeroing this pointer will leave > siw with no idea where the WQE's are. It will panic de-referencing [NULL + > current position in ring buffer]. Same for RQ, SRQ and CQ. Qp->sendq is only used in kernel mode, and only set to NULL is user-space mode Where it is allocated and mapped in user, so the user will be the one accessing the rings And not kernel, unless I'm missing something. > > > >+ qp->sq_key = key; > > } > >+ > > if (qp->recvq) { > >- qp->xa_rq_index = > >- siw_create_uobj(uctx, qp->recvq, > >- num_rqe * sizeof(struct siw_rqe)); > >- } > >- if (qp->xa_sq_index == SIW_INVAL_UOBJ_KEY || > >- qp->xa_rq_index == SIW_INVAL_UOBJ_KEY) { > >- rv = -ENOMEM; > >- goto err_out_xa; > >+ length = num_rqe * sizeof(struct siw_rqe); > >+ rv = siw_user_mmap_entry_insert(&uctx- > >base_ucontext, > >+ (uintptr_t)qp->recvq, > >+ length, &key); > >+ if (!rv) > >+ goto err_out_mmap_rem; > >+ > >+ /* If entry was inserted successfully, qp->recvq will > >+ * be freed by siw_mmap_free > >+ */ > >+ qp->recvq = NULL; > > see above same > > >+ qp->rq_key = key; > > } > >- uresp.sq_key = qp->xa_sq_index << PAGE_SHIFT; > >- uresp.rq_key = qp->xa_rq_index << PAGE_SHIFT; > > Changes here would require changes to the siw user lib The key received from the API is shifed by PAGE_SHIFT already, so you're left with the same interface, There are no changes required for siw lib. > > >+ > >+ uresp.sq_key = qp->sq_key; > >+ uresp.rq_key = qp->rq_key; > > > > if (udata->outlen < sizeof(uresp)) { > > rv = -EINVAL; > >- goto err_out_xa; > >+ goto err_out_mmap_rem; > > } > > rv = ib_copy_to_udata(udata, &uresp, sizeof(uresp)); > > if (rv) > >@@ -496,21 +511,23 @@ struct ib_qp *siw_create_qp(struct ib_pd *pd, > > > > return qp->ib_qp; > > > >+err_out_mmap_rem: > >+ if (udata) { > >+ rdma_user_mmap_entry_remove(&uctx->base_ucontext, > qp->sq_key); > >+ rdma_user_mmap_entry_remove(&uctx->base_ucontext, > qp->rq_key); > >+ } > >+ > > err_out_xa: > > xa_erase(&sdev->qp_xa, qp_id(qp)); > > err_out: > > kfree(siw_base_qp); > > > > if (qp) { > >- if (qp->xa_sq_index != SIW_INVAL_UOBJ_KEY) > >- kfree(xa_erase(&uctx->xa, qp->xa_sq_index)); > >- if (qp->xa_rq_index != SIW_INVAL_UOBJ_KEY) > >- kfree(xa_erase(&uctx->xa, qp->xa_rq_index)); > >- > > vfree(qp->sendq); > > vfree(qp->recvq); > > kfree(qp); > > } > >+ > > atomic_dec(&sdev->num_qp); > > > > return ERR_PTR(rv); > >@@ -620,10 +637,10 @@ int siw_destroy_qp(struct ib_qp *base_qp, struct > >ib_udata *udata) > > qp->attrs.flags |= SIW_QP_IN_DESTROY; > > qp->rx_stream.rx_suspend = 1; > > > >- if (uctx && qp->xa_sq_index != SIW_INVAL_UOBJ_KEY) > >- kfree(xa_erase(&uctx->xa, qp->xa_sq_index)); > >- if (uctx && qp->xa_rq_index != SIW_INVAL_UOBJ_KEY) > >- kfree(xa_erase(&uctx->xa, qp->xa_rq_index)); > >+ if (uctx) { > >+ rdma_user_mmap_entry_remove(&uctx->base_ucontext, > qp->sq_key); > >+ rdma_user_mmap_entry_remove(&uctx->base_ucontext, > qp->rq_key); > >+ } > > > > down_write(&qp->state_lock); > > > >@@ -993,8 +1010,8 @@ void siw_destroy_cq(struct ib_cq *base_cq, struct > >ib_udata *udata) > > > > siw_cq_flush(cq); > > > >- if (ctx && cq->xa_cq_index != SIW_INVAL_UOBJ_KEY) > >- kfree(xa_erase(&ctx->xa, cq->xa_cq_index)); > >+ if (ctx) > >+ rdma_user_mmap_entry_remove(&ctx->base_ucontext, > cq->cq_key); > > > > atomic_dec(&sdev->num_cq); > > > >@@ -1031,7 +1048,7 @@ int siw_create_cq(struct ib_cq *base_cq, const > >struct ib_cq_init_attr *attr, > > size = roundup_pow_of_two(size); > > cq->base_cq.cqe = size; > > cq->num_cqe = size; > >- cq->xa_cq_index = SIW_INVAL_UOBJ_KEY; > >+ cq->cq_key = RDMA_USER_MMAP_INVALID; > > > > if (!udata) { > > cq->kernel_verbs = 1; > >@@ -1057,16 +1074,21 @@ int siw_create_cq(struct ib_cq *base_cq, const > >struct ib_cq_init_attr *attr, > > struct siw_ucontext *ctx = > > rdma_udata_to_drv_context(udata, struct > siw_ucontext, > > base_ucontext); > >+ u64 length = size * sizeof(struct siw_cqe) + > >+ sizeof(struct siw_cq_ctrl); > > > >- cq->xa_cq_index = > >- siw_create_uobj(ctx, cq->queue, > >- size * sizeof(struct siw_cqe) + > >- sizeof(struct siw_cq_ctrl)); > >- if (cq->xa_cq_index == SIW_INVAL_UOBJ_KEY) { > >- rv = -ENOMEM; > >+ rv = siw_user_mmap_entry_insert(&ctx->base_ucontext, > >+ (uintptr_t)cq->queue, > >+ length, &cq->cq_key); > >+ if (!rv) > > if (rv) > > goto err_out; > >- } > >- uresp.cq_key = cq->xa_cq_index << PAGE_SHIFT; > >+ > >+ /* If entry was inserted successfully, cq->queue will be freed > >+ * by siw_mmap_free > >+ */ > >+ cq->queue = NULL; > > causes panic Should be set to null only if there is udata. I will fix. > >+ > >+ uresp.cq_key = cq->cq_key; > > uresp.cq_id = cq->id; > > uresp.num_cqe = size; > > > >@@ -1087,8 +1109,9 @@ int siw_create_cq(struct ib_cq *base_cq, const > >struct ib_cq_init_attr *attr, > > struct siw_ucontext *ctx = > > rdma_udata_to_drv_context(udata, struct > siw_ucontext, > > base_ucontext); > >- if (cq->xa_cq_index != SIW_INVAL_UOBJ_KEY) > >- kfree(xa_erase(&ctx->xa, cq->xa_cq_index)); > >+ if (udata) > >+ rdma_user_mmap_entry_remove(&ctx- > >base_ucontext, > >+ cq->cq_key); > > vfree(cq->queue); > > } > > atomic_dec(&sdev->num_cq); > >@@ -1490,7 +1513,7 @@ int siw_create_srq(struct ib_srq *base_srq, > > } > > srq->max_sge = attrs->max_sge; > > srq->num_rqe = roundup_pow_of_two(attrs->max_wr); > >- srq->xa_srq_index = SIW_INVAL_UOBJ_KEY; > >+ srq->srq_key = RDMA_USER_MMAP_INVALID; > > srq->limit = attrs->srq_limit; > > if (srq->limit) > > srq->armed = 1; > >@@ -1509,15 +1532,19 @@ int siw_create_srq(struct ib_srq *base_srq, > > } > > if (udata) { > > struct siw_uresp_create_srq uresp = {}; > >+ u64 length = srq->num_rqe * sizeof(struct siw_rqe); > > > >- srq->xa_srq_index = siw_create_uobj( > >- ctx, srq->recvq, srq->num_rqe * sizeof(struct > siw_rqe)); > >- > >- if (srq->xa_srq_index == SIW_INVAL_UOBJ_KEY) { > >- rv = -ENOMEM; > >+ rv = siw_user_mmap_entry_insert(&ctx->base_ucontext, > >+ (uintptr_t)srq->recvq, > >+ length, &srq->srq_key); > >+ if (!rv) > > if (rv) > > > goto err_out; > >- } > >- uresp.srq_key = srq->xa_srq_index; > >+ > >+ /* If entry was inserted successfully, srq->recvq will be freed > >+ * by siw_mmap_free > >+ */ > >+ srq->recvq = NULL; > > causes panic > > >+ uresp.srq_key = srq->srq_key; > > uresp.num_rqe = srq->num_rqe; > > > > if (udata->outlen < sizeof(uresp)) { @@ -1536,8 +1563,9 @@ > int > >siw_create_srq(struct ib_srq *base_srq, > > > > err_out: > > if (srq->recvq) { > >- if (ctx && srq->xa_srq_index != SIW_INVAL_UOBJ_KEY) > >- kfree(xa_erase(&ctx->xa, srq->xa_srq_index)); > >+ if (udata) > >+ rdma_user_mmap_entry_remove(&ctx- > >base_ucontext, > >+ srq->srq_key); > > vfree(srq->recvq); > > } > > atomic_dec(&sdev->num_srq); > >@@ -1623,8 +1651,9 @@ void siw_destroy_srq(struct ib_srq *base_srq, > >struct ib_udata *udata) > > rdma_udata_to_drv_context(udata, struct siw_ucontext, > > base_ucontext); > > > >- if (ctx && srq->xa_srq_index != SIW_INVAL_UOBJ_KEY) > >- kfree(xa_erase(&ctx->xa, srq->xa_srq_index)); > >+ if (ctx) > >+ rdma_user_mmap_entry_remove(&ctx->base_ucontext, > >+ srq->srq_key); > > > > vfree(srq->recvq); > > atomic_dec(&sdev->num_srq); > >diff --git a/drivers/infiniband/sw/siw/siw_verbs.h > >b/drivers/infiniband/sw/siw/siw_verbs.h > >index 1910869281cb..1a731989fad6 100644 > >--- a/drivers/infiniband/sw/siw/siw_verbs.h > >+++ b/drivers/infiniband/sw/siw/siw_verbs.h > >@@ -83,6 +83,7 @@ void siw_destroy_srq(struct ib_srq *base_srq, struct > >ib_udata *udata); int siw_post_srq_recv(struct ib_srq *base_srq, const > >struct ib_recv_wr *wr, > > const struct ib_recv_wr **bad_wr); int siw_mmap(struct > >ib_ucontext *ctx, struct vm_area_struct *vma); > >+void siw_mmap_free(struct rdma_user_mmap_entry *rdma_entry); > > void siw_qp_event(struct siw_qp *qp, enum ib_event_type type); void > >siw_cq_event(struct siw_cq *cq, enum ib_event_type type); void > >siw_srq_event(struct siw_srq *srq, enum ib_event_type type); > >-- > >2.14.5 > > > >