This fixes various issues found when testing disconnections and other various fault injections under a KASAN-enabled kernel. - Swap the names of `rpcrdma_ep_destroy` and `rpcrdma_ep_put` to reflect what the functions are doing. - In the cleanup of `rpcrdma_ep_create` do `kfree` on the EP only for the case where no one knows about it, and avoid a double free (what `rpcrdma_ep_destroy` did, followed by `kfree`). No need to set `r_xprt->rx_ep` to NULL because we are doing that only in the success path. - Don't up the EP ref in `RDMA_CM_EVENT_ESTABLISHED` but do it sooner before `rdma_connect`. This is needed so that an early wake up of `wait_event_interruptible` in `rpcrdma_xprt_connect` in case of a failure does not see a freed EP. - Still try to follow the rule that the last decref of EP performs `rdma_destroy_id(id)`. - Only path to disengage an EP is `rpcrdma_xprt_disconnect`, whether it is actually connected or not. This makes the error paths of `rpcrdma_xprt_connect` clearer. - Add a mutex in `rpcrdma_ep_destroy` to guard against concurrent calls to `rpcrdma_xprt_disconnect` coming from either `rpcrdma_xprt_connect` or `xprt_rdma_close`. Signed-off-by: Dan Aloni <dan@xxxxxxxxxxxx> --- net/sunrpc/xprtrdma/transport.c | 2 ++ net/sunrpc/xprtrdma/verbs.c | 50 ++++++++++++++++++++++----------- net/sunrpc/xprtrdma/xprt_rdma.h | 1 + 3 files changed, 37 insertions(+), 16 deletions(-) diff --git a/net/sunrpc/xprtrdma/transport.c b/net/sunrpc/xprtrdma/transport.c index 0c4af7f5e241..50c28be6b694 100644 --- a/net/sunrpc/xprtrdma/transport.c +++ b/net/sunrpc/xprtrdma/transport.c @@ -350,6 +350,8 @@ xprt_setup_rdma(struct xprt_create *args) xprt_rdma_format_addresses(xprt, sap); new_xprt = rpcx_to_rdmax(xprt); + mutex_init(&new_xprt->rx_flush); + rc = rpcrdma_buffer_create(new_xprt); if (rc) { xprt_rdma_free_addresses(xprt); diff --git a/net/sunrpc/xprtrdma/verbs.c b/net/sunrpc/xprtrdma/verbs.c index 2ae348377806..c66871bbb894 100644 --- a/net/sunrpc/xprtrdma/verbs.c +++ b/net/sunrpc/xprtrdma/verbs.c @@ -84,7 +84,7 @@ static void rpcrdma_rep_destroy(struct rpcrdma_rep *rep); static void rpcrdma_reps_unmap(struct rpcrdma_xprt *r_xprt); static void rpcrdma_mrs_create(struct rpcrdma_xprt *r_xprt); static void rpcrdma_mrs_destroy(struct rpcrdma_xprt *r_xprt); -static int rpcrdma_ep_destroy(struct rpcrdma_ep *ep); +static int rpcrdma_ep_put(struct rpcrdma_ep *ep); static struct rpcrdma_regbuf * rpcrdma_regbuf_alloc(size_t size, enum dma_data_direction direction, gfp_t flags); @@ -99,6 +99,9 @@ static void rpcrdma_xprt_drain(struct rpcrdma_xprt *r_xprt) { struct rdma_cm_id *id = r_xprt->rx_ep->re_id; + if (!id->qp) + return; + /* Flush Receives, then wait for deferred Reply work * to complete. */ @@ -266,7 +269,6 @@ rpcrdma_cm_event_handler(struct rdma_cm_id *id, struct rdma_cm_event *event) xprt_force_disconnect(xprt); goto disconnected; case RDMA_CM_EVENT_ESTABLISHED: - kref_get(&ep->re_kref); ep->re_connect_status = 1; rpcrdma_update_cm_private(ep, &event->param.conn); trace_xprtrdma_inline_thresh(ep); @@ -289,7 +291,8 @@ rpcrdma_cm_event_handler(struct rdma_cm_id *id, struct rdma_cm_event *event) ep->re_connect_status = -ECONNABORTED; disconnected: xprt_force_disconnect(xprt); - return rpcrdma_ep_destroy(ep); + wake_up_all(&ep->re_connect_wait); + return rpcrdma_ep_put(ep); default: break; } @@ -345,11 +348,11 @@ static struct rdma_cm_id *rpcrdma_create_id(struct rpcrdma_xprt *r_xprt, return ERR_PTR(rc); } -static void rpcrdma_ep_put(struct kref *kref) +static void rpcrdma_ep_destroy(struct kref *kref) { struct rpcrdma_ep *ep = container_of(kref, struct rpcrdma_ep, re_kref); - if (ep->re_id->qp) { + if (ep->re_id && ep->re_id->qp) { rdma_destroy_qp(ep->re_id); ep->re_id->qp = NULL; } @@ -373,9 +376,9 @@ static void rpcrdma_ep_put(struct kref *kref) * %0 if @ep still has a positive kref count, or * %1 if @ep was destroyed successfully. */ -static int rpcrdma_ep_destroy(struct rpcrdma_ep *ep) +static int rpcrdma_ep_put(struct rpcrdma_ep *ep) { - return kref_put(&ep->re_kref, rpcrdma_ep_put); + return kref_put(&ep->re_kref, rpcrdma_ep_destroy); } static int rpcrdma_ep_create(struct rpcrdma_xprt *r_xprt) @@ -492,11 +495,12 @@ static int rpcrdma_ep_create(struct rpcrdma_xprt *r_xprt) return 0; out_destroy: - rpcrdma_ep_destroy(ep); + rpcrdma_ep_put(ep); rdma_destroy_id(id); + return rc; + out_free: kfree(ep); - r_xprt->rx_ep = NULL; return rc; } @@ -510,6 +514,7 @@ int rpcrdma_xprt_connect(struct rpcrdma_xprt *r_xprt) { struct rpc_xprt *xprt = &r_xprt->rx_xprt; struct rpcrdma_ep *ep; + struct rdma_cm_id *id; int rc; retry: @@ -518,6 +523,7 @@ int rpcrdma_xprt_connect(struct rpcrdma_xprt *r_xprt) if (rc) return rc; ep = r_xprt->rx_ep; + id = ep->re_id; ep->re_connect_status = 0; xprt_clear_connected(xprt); @@ -529,7 +535,10 @@ int rpcrdma_xprt_connect(struct rpcrdma_xprt *r_xprt) if (rc) goto out; - rc = rdma_connect(ep->re_id, &ep->re_remote_cma); + /* From this point on, CM events can discard our EP */ + kref_get(&ep->re_kref); + + rc = rdma_connect(id, &ep->re_remote_cma); if (rc) goto out; @@ -546,14 +555,17 @@ int rpcrdma_xprt_connect(struct rpcrdma_xprt *r_xprt) rc = rpcrdma_reqs_setup(r_xprt); if (rc) { - rpcrdma_xprt_disconnect(r_xprt); + ep->re_connect_status = rc; goto out; } + rpcrdma_mrs_create(r_xprt); + trace_xprtrdma_connect(r_xprt, 0); + return rc; out: - if (rc) - ep->re_connect_status = rc; + ep->re_connect_status = rc; + rpcrdma_xprt_disconnect(r_xprt); trace_xprtrdma_connect(r_xprt, rc); return rc; } @@ -570,12 +582,15 @@ int rpcrdma_xprt_connect(struct rpcrdma_xprt *r_xprt) */ void rpcrdma_xprt_disconnect(struct rpcrdma_xprt *r_xprt) { - struct rpcrdma_ep *ep = r_xprt->rx_ep; + struct rpcrdma_ep *ep; struct rdma_cm_id *id; int rc; + mutex_lock(&r_xprt->rx_flush); + + ep = r_xprt->rx_ep; if (!ep) - return; + goto out; id = ep->re_id; rc = rdma_disconnect(id); @@ -587,10 +602,13 @@ void rpcrdma_xprt_disconnect(struct rpcrdma_xprt *r_xprt) rpcrdma_mrs_destroy(r_xprt); rpcrdma_sendctxs_destroy(r_xprt); - if (rpcrdma_ep_destroy(ep)) + if (rpcrdma_ep_put(ep)) rdma_destroy_id(id); r_xprt->rx_ep = NULL; + +out: + mutex_unlock(&r_xprt->rx_flush); } /* Fixed-size circular FIFO queue. This implementation is wait-free and diff --git a/net/sunrpc/xprtrdma/xprt_rdma.h b/net/sunrpc/xprtrdma/xprt_rdma.h index 0a16fdb09b2c..288645a9437f 100644 --- a/net/sunrpc/xprtrdma/xprt_rdma.h +++ b/net/sunrpc/xprtrdma/xprt_rdma.h @@ -417,6 +417,7 @@ struct rpcrdma_xprt { struct delayed_work rx_connect_worker; struct rpc_timeout rx_timeout; struct rpcrdma_stats rx_stats; + struct mutex rx_flush; }; #define rpcx_to_rdmax(x) container_of(x, struct rpcrdma_xprt, rx_xprt) -- 2.25.4