Howdy- > On Aug 18, 2020, at 8:05 AM, Leon Romanovsky <leon@xxxxxxxxxx> wrote: > > From: Jason Gunthorpe <jgg@xxxxxxxxxx> > > In almost all cases rdma_accept() is called under the handler_mutex by > ULPs from their handler callbacks. The one exception was ucma which did > not get the handler_mutex. It turns out that the RPC/RDMA server also does not invoke rdma_accept() from its CM event handler. See net/sunrpc/xprtrdma/svc_rdma_transport.c:svc_rdma_accept() When lock debugging is enabled, the lockdep assertion in rdma_accept() fires on every RPC/RDMA connection. I'm not quite sure what to do about this. > To improve the understand-ability of the locking scheme obtain the mutex > for ucma as well. > > This improves how ucma works by allowing it to directly use handler_mutex > for some of its internal locking against the handler callbacks intead of > the global file->mut lock. > > There does not seem to be a serious bug here, other than a DISCONNECT event > can be delivered concurrently with accept succeeding. > > Signed-off-by: Jason Gunthorpe <jgg@xxxxxxxxxx> > Signed-off-by: Leon Romanovsky <leonro@xxxxxxxxxxxx> > --- > drivers/infiniband/core/cma.c | 25 ++++++++++++++++++++++--- > drivers/infiniband/core/ucma.c | 12 ++++++++---- > include/rdma/rdma_cm.h | 5 +++++ > 3 files changed, 35 insertions(+), 7 deletions(-) > > diff --git a/drivers/infiniband/core/cma.c b/drivers/infiniband/core/cma.c > index 26de0dab60bb..78641858abe2 100644 > --- a/drivers/infiniband/core/cma.c > +++ b/drivers/infiniband/core/cma.c > @@ -4154,14 +4154,15 @@ static int cma_send_sidr_rep(struct rdma_id_private *id_priv, > int __rdma_accept(struct rdma_cm_id *id, struct rdma_conn_param *conn_param, > const char *caller) > { > - struct rdma_id_private *id_priv; > + struct rdma_id_private *id_priv = > + container_of(id, struct rdma_id_private, id); > int ret; > > - id_priv = container_of(id, struct rdma_id_private, id); > + lockdep_assert_held(&id_priv->handler_mutex); > > rdma_restrack_set_task(&id_priv->res, caller); > > - if (!cma_comp(id_priv, RDMA_CM_CONNECT)) > + if (READ_ONCE(id_priv->state) != RDMA_CM_CONNECT) > return -EINVAL; > > if (!id->qp && conn_param) { > @@ -4214,6 +4215,24 @@ int __rdma_accept_ece(struct rdma_cm_id *id, struct rdma_conn_param *conn_param, > } > EXPORT_SYMBOL(__rdma_accept_ece); > > +void rdma_lock_handler(struct rdma_cm_id *id) > +{ > + struct rdma_id_private *id_priv = > + container_of(id, struct rdma_id_private, id); > + > + mutex_lock(&id_priv->handler_mutex); > +} > +EXPORT_SYMBOL(rdma_lock_handler); > + > +void rdma_unlock_handler(struct rdma_cm_id *id) > +{ > + struct rdma_id_private *id_priv = > + container_of(id, struct rdma_id_private, id); > + > + mutex_unlock(&id_priv->handler_mutex); > +} > +EXPORT_SYMBOL(rdma_unlock_handler); > + > int rdma_notify(struct rdma_cm_id *id, enum ib_event_type event) > { > struct rdma_id_private *id_priv; > diff --git a/drivers/infiniband/core/ucma.c b/drivers/infiniband/core/ucma.c > index dd12931f3038..add1ece38739 100644 > --- a/drivers/infiniband/core/ucma.c > +++ b/drivers/infiniband/core/ucma.c > @@ -1162,16 +1162,20 @@ static ssize_t ucma_accept(struct ucma_file *file, const char __user *inbuf, > > if (cmd.conn_param.valid) { > ucma_copy_conn_param(ctx->cm_id, &conn_param, &cmd.conn_param); > - mutex_lock(&file->mut); > mutex_lock(&ctx->mutex); > + rdma_lock_handler(ctx->cm_id); > ret = __rdma_accept_ece(ctx->cm_id, &conn_param, NULL, &ece); > - mutex_unlock(&ctx->mutex); > - if (!ret) > + if (!ret) { > + /* The uid must be set atomically with the handler */ > ctx->uid = cmd.uid; > - mutex_unlock(&file->mut); > + } > + rdma_unlock_handler(ctx->cm_id); > + mutex_unlock(&ctx->mutex); > } else { > mutex_lock(&ctx->mutex); > + rdma_lock_handler(ctx->cm_id); > ret = __rdma_accept_ece(ctx->cm_id, NULL, NULL, &ece); > + rdma_unlock_handler(ctx->cm_id); > mutex_unlock(&ctx->mutex); > } > ucma_put_ctx(ctx); > diff --git a/include/rdma/rdma_cm.h b/include/rdma/rdma_cm.h > index cf5da2ae49bf..c1334c9a7aa8 100644 > --- a/include/rdma/rdma_cm.h > +++ b/include/rdma/rdma_cm.h > @@ -253,6 +253,8 @@ int rdma_listen(struct rdma_cm_id *id, int backlog); > int __rdma_accept(struct rdma_cm_id *id, struct rdma_conn_param *conn_param, > const char *caller); > > +void rdma_lock_handler(struct rdma_cm_id *id); > +void rdma_unlock_handler(struct rdma_cm_id *id); > int __rdma_accept_ece(struct rdma_cm_id *id, struct rdma_conn_param *conn_param, > const char *caller, struct rdma_ucm_ece *ece); > > @@ -270,6 +272,9 @@ int __rdma_accept_ece(struct rdma_cm_id *id, struct rdma_conn_param *conn_param, > * In the case of error, a reject message is sent to the remote side and the > * state of the qp associated with the id is modified to error, such that any > * previously posted receive buffers would be flushed. > + * > + * This function is for use by kernel ULPs and must be called from under the > + * handler callback. > */ > #define rdma_accept(id, conn_param) \ > __rdma_accept((id), (conn_param), KBUILD_MODNAME) > -- > 2.26.2 > -- Chuck Lever