Re: [PATCH v2] SUNRPC: More fixes for backlog congestion

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

 




> On May 26, 2021, at 7:02 AM, trondmy@xxxxxxxxxx wrote:
> 
> From: Trond Myklebust <trond.myklebust@xxxxxxxxxxxxxxx>
> 
> Ensure that we fix the XPRT_CONGESTED starvation issue for RDMA as well
> as socket based transports.
> Ensure we always initialise the request after waking up from the backlog
> list.

Out of interest, what prompted this commit? Code audit,
or misbehavior?


> Fixes: e877a88d1f06 ("SUNRPC in case of backlog, hand free slots directly to waiting task")
> Signed-off-by: Trond Myklebust <trond.myklebust@xxxxxxxxxxxxxxx>
> ---
> v2: Ensure we release the RDMA reply buffer
> 
> include/linux/sunrpc/xprt.h     |  2 ++
> net/sunrpc/xprt.c               | 58 ++++++++++++++++-----------------
> net/sunrpc/xprtrdma/transport.c | 12 +++----
> net/sunrpc/xprtrdma/verbs.c     | 18 ++++++++--
> net/sunrpc/xprtrdma/xprt_rdma.h |  1 +
> 5 files changed, 52 insertions(+), 39 deletions(-)
> 
> diff --git a/include/linux/sunrpc/xprt.h b/include/linux/sunrpc/xprt.h
> index d81fe8b364d0..61b622e334ee 100644
> --- a/include/linux/sunrpc/xprt.h
> +++ b/include/linux/sunrpc/xprt.h
> @@ -368,6 +368,8 @@ struct rpc_xprt *	xprt_alloc(struct net *net, size_t size,
> 				unsigned int num_prealloc,
> 				unsigned int max_req);
> void			xprt_free(struct rpc_xprt *);
> +void			xprt_add_backlog(struct rpc_xprt *xprt, struct rpc_task *task);
> +bool			xprt_wake_up_backlog(struct rpc_xprt *xprt, struct rpc_rqst *req);
> 
> static inline int
> xprt_enable_swap(struct rpc_xprt *xprt)
> diff --git a/net/sunrpc/xprt.c b/net/sunrpc/xprt.c
> index 5b3981fd3783..3509a7f139b9 100644
> --- a/net/sunrpc/xprt.c
> +++ b/net/sunrpc/xprt.c
> @@ -1607,11 +1607,18 @@ xprt_transmit(struct rpc_task *task)
> 	spin_unlock(&xprt->queue_lock);
> }
> 
> -static void xprt_add_backlog(struct rpc_xprt *xprt, struct rpc_task *task)
> +static void xprt_complete_request_init(struct rpc_task *task)
> +{
> +	if (task->tk_rqstp)
> +		xprt_request_init(task);
> +}
> +
> +void xprt_add_backlog(struct rpc_xprt *xprt, struct rpc_task *task)
> {
> 	set_bit(XPRT_CONGESTED, &xprt->state);
> -	rpc_sleep_on(&xprt->backlog, task, NULL);
> +	rpc_sleep_on(&xprt->backlog, task, xprt_complete_request_init);
> }
> +EXPORT_SYMBOL_GPL(xprt_add_backlog);
> 
> static bool __xprt_set_rq(struct rpc_task *task, void *data)
> {
> @@ -1619,14 +1626,13 @@ static bool __xprt_set_rq(struct rpc_task *task, void *data)
> 
> 	if (task->tk_rqstp == NULL) {
> 		memset(req, 0, sizeof(*req));	/* mark unused */
> -		task->tk_status = -EAGAIN;
> 		task->tk_rqstp = req;
> 		return true;
> 	}
> 	return false;
> }
> 
> -static bool xprt_wake_up_backlog(struct rpc_xprt *xprt, struct rpc_rqst *req)
> +bool xprt_wake_up_backlog(struct rpc_xprt *xprt, struct rpc_rqst *req)
> {
> 	if (rpc_wake_up_first(&xprt->backlog, __xprt_set_rq, req) == NULL) {
> 		clear_bit(XPRT_CONGESTED, &xprt->state);
> @@ -1634,6 +1640,7 @@ static bool xprt_wake_up_backlog(struct rpc_xprt *xprt, struct rpc_rqst *req)
> 	}
> 	return true;
> }
> +EXPORT_SYMBOL_GPL(xprt_wake_up_backlog);
> 
> static bool xprt_throttle_congested(struct rpc_xprt *xprt, struct rpc_task *task)
> {
> @@ -1643,7 +1650,7 @@ static bool xprt_throttle_congested(struct rpc_xprt *xprt, struct rpc_task *task
> 		goto out;
> 	spin_lock(&xprt->reserve_lock);
> 	if (test_bit(XPRT_CONGESTED, &xprt->state)) {
> -		rpc_sleep_on(&xprt->backlog, task, NULL);
> +		xprt_add_backlog(xprt, task);
> 		ret = true;
> 	}
> 	spin_unlock(&xprt->reserve_lock);
> @@ -1812,10 +1819,6 @@ xprt_request_init(struct rpc_task *task)
> 	struct rpc_xprt *xprt = task->tk_xprt;
> 	struct rpc_rqst	*req = task->tk_rqstp;
> 
> -	if (req->rq_task)
> -		/* Already initialized */
> -		return;
> -
> 	req->rq_task	= task;
> 	req->rq_xprt    = xprt;
> 	req->rq_buffer  = NULL;
> @@ -1876,10 +1879,8 @@ void xprt_retry_reserve(struct rpc_task *task)
> 	struct rpc_xprt *xprt = task->tk_xprt;
> 
> 	task->tk_status = 0;
> -	if (task->tk_rqstp != NULL) {
> -		xprt_request_init(task);
> +	if (task->tk_rqstp != NULL)
> 		return;
> -	}
> 
> 	task->tk_status = -EAGAIN;
> 	xprt_do_reserve(xprt, task);
> @@ -1904,24 +1905,21 @@ void xprt_release(struct rpc_task *task)
> 	}
> 
> 	xprt = req->rq_xprt;
> -	if (xprt) {
> -		xprt_request_dequeue_xprt(task);
> -		spin_lock(&xprt->transport_lock);
> -		xprt->ops->release_xprt(xprt, task);
> -		if (xprt->ops->release_request)
> -			xprt->ops->release_request(task);
> -		xprt_schedule_autodisconnect(xprt);
> -		spin_unlock(&xprt->transport_lock);
> -		if (req->rq_buffer)
> -			xprt->ops->buf_free(task);
> -		xdr_free_bvec(&req->rq_rcv_buf);
> -		xdr_free_bvec(&req->rq_snd_buf);
> -		if (req->rq_cred != NULL)
> -			put_rpccred(req->rq_cred);
> -		if (req->rq_release_snd_buf)
> -			req->rq_release_snd_buf(req);
> -	} else
> -		xprt = task->tk_xprt;
> +	xprt_request_dequeue_xprt(task);
> +	spin_lock(&xprt->transport_lock);
> +	xprt->ops->release_xprt(xprt, task);
> +	if (xprt->ops->release_request)
> +		xprt->ops->release_request(task);
> +	xprt_schedule_autodisconnect(xprt);
> +	spin_unlock(&xprt->transport_lock);
> +	if (req->rq_buffer)
> +		xprt->ops->buf_free(task);
> +	xdr_free_bvec(&req->rq_rcv_buf);
> +	xdr_free_bvec(&req->rq_snd_buf);
> +	if (req->rq_cred != NULL)
> +		put_rpccred(req->rq_cred);
> +	if (req->rq_release_snd_buf)
> +		req->rq_release_snd_buf(req);
> 
> 	task->tk_rqstp = NULL;
> 	if (likely(!bc_prealloc(req)))
> diff --git a/net/sunrpc/xprtrdma/transport.c b/net/sunrpc/xprtrdma/transport.c
> index 09953597d055..19a49d26b1e4 100644
> --- a/net/sunrpc/xprtrdma/transport.c
> +++ b/net/sunrpc/xprtrdma/transport.c
> @@ -520,9 +520,8 @@ xprt_rdma_alloc_slot(struct rpc_xprt *xprt, struct rpc_task *task)
> 	return;
> 
> out_sleep:
> -	set_bit(XPRT_CONGESTED, &xprt->state);
> -	rpc_sleep_on(&xprt->backlog, task, NULL);
> 	task->tk_status = -EAGAIN;
> +	xprt_add_backlog(xprt, task);
> }
> 
> /**
> @@ -537,10 +536,11 @@ xprt_rdma_free_slot(struct rpc_xprt *xprt, struct rpc_rqst *rqst)
> 	struct rpcrdma_xprt *r_xprt =
> 		container_of(xprt, struct rpcrdma_xprt, rx_xprt);
> 
> -	memset(rqst, 0, sizeof(*rqst));
> -	rpcrdma_buffer_put(&r_xprt->rx_buf, rpcr_to_rdmar(rqst));
> -	if (unlikely(!rpc_wake_up_next(&xprt->backlog)))
> -		clear_bit(XPRT_CONGESTED, &xprt->state);
> +	rpcrdma_reply_put(&r_xprt->rx_buf, rpcr_to_rdmar(rqst));
> +	if (!xprt_wake_up_backlog(xprt, rqst)) {
> +		memset(rqst, 0, sizeof(*rqst));
> +		rpcrdma_buffer_put(&r_xprt->rx_buf, rpcr_to_rdmar(rqst));
> +	}
> }
> 
> static bool rpcrdma_check_regbuf(struct rpcrdma_xprt *r_xprt,
> diff --git a/net/sunrpc/xprtrdma/verbs.c b/net/sunrpc/xprtrdma/verbs.c
> index 1e965a380896..649c23518ec0 100644
> --- a/net/sunrpc/xprtrdma/verbs.c
> +++ b/net/sunrpc/xprtrdma/verbs.c
> @@ -1200,6 +1200,20 @@ rpcrdma_mr_get(struct rpcrdma_xprt *r_xprt)
> 	return mr;
> }
> 
> +/**
> + * rpcrdma_reply_put - Put reply buffers back into pool
> + * @buffers: buffer pool
> + * @req: object to return
> + *
> + */
> +void rpcrdma_reply_put(struct rpcrdma_buffer *buffers, struct rpcrdma_req *req)
> +{
> +	if (req->rl_reply) {
> +		rpcrdma_rep_put(buffers, req->rl_reply);
> +		req->rl_reply = NULL;
> +	}
> +}
> +
> /**
>  * rpcrdma_buffer_get - Get a request buffer
>  * @buffers: Buffer pool from which to obtain a buffer
> @@ -1228,9 +1242,7 @@ rpcrdma_buffer_get(struct rpcrdma_buffer *buffers)
>  */
> void rpcrdma_buffer_put(struct rpcrdma_buffer *buffers, struct rpcrdma_req *req)
> {
> -	if (req->rl_reply)
> -		rpcrdma_rep_put(buffers, req->rl_reply);
> -	req->rl_reply = NULL;
> +	rpcrdma_reply_put(buffers, req);
> 
> 	spin_lock(&buffers->rb_lock);
> 	list_add(&req->rl_list, &buffers->rb_send_bufs);
> diff --git a/net/sunrpc/xprtrdma/xprt_rdma.h b/net/sunrpc/xprtrdma/xprt_rdma.h
> index 436ad7312614..5d231d94e944 100644
> --- a/net/sunrpc/xprtrdma/xprt_rdma.h
> +++ b/net/sunrpc/xprtrdma/xprt_rdma.h
> @@ -479,6 +479,7 @@ struct rpcrdma_req *rpcrdma_buffer_get(struct rpcrdma_buffer *);
> void rpcrdma_buffer_put(struct rpcrdma_buffer *buffers,
> 			struct rpcrdma_req *req);
> void rpcrdma_rep_put(struct rpcrdma_buffer *buf, struct rpcrdma_rep *rep);
> +void rpcrdma_reply_put(struct rpcrdma_buffer *buffers, struct rpcrdma_req *req);
> 
> bool rpcrdma_regbuf_realloc(struct rpcrdma_regbuf *rb, size_t size,
> 			    gfp_t flags);
> -- 
> 2.31.1
> 

--
Chuck Lever







[Index of Archives]     [Linux Filesystem Development]     [Linux USB Development]     [Linux Media Development]     [Video for Linux]     [Linux NILFS]     [Linux Audio Users]     [Yosemite Info]     [Linux SCSI]

  Powered by Linux