Re: [PATCH] io_uring: fix IO hang in io_wq_put_and_exit from do_exit()

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

 



On 2023/8/31 15:42, Ming Lei wrote:
> io_wq_put_and_exit() is called from do_exit(), but all requests in io_wq
> aren't cancelled in io_uring_cancel_generic() called from do_exit().
> Meantime io_wq IO code path may share resource with normal iopoll code
> path.
> 
> So if any HIPRI request is pending in io_wq_submit_work(), this request
> may not get resouce for moving on, given iopoll isn't possible in
> io_wq_put_and_exit().
> 
> The issue can be triggered when terminating 't/io_uring -n4 /dev/nullb0'
> with default null_blk parameters.
> 
> Fix it by always cancelling all requests in io_wq from io_uring_cancel_generic(),
> and this way is reasonable because io_wq destroying follows cancelling
> requests immediately. Based on one patch from Chengming.

Thanks much for this work, I'm still learning these code, so maybe some
silly questions below.

> 
> Closes: https://lore.kernel.org/linux-block/3893581.1691785261@xxxxxxxxxxxxxxxxxxxxxx/
> Reported-by: David Howells <dhowells@xxxxxxxxxx>
> Cc: Chengming Zhou <zhouchengming@xxxxxxxxxxxxx>,
> Signed-off-by: Ming Lei <ming.lei@xxxxxxxxxx>
> ---
>  io_uring/io_uring.c | 40 ++++++++++++++++++++++++++++------------
>  1 file changed, 28 insertions(+), 12 deletions(-)
> 
> diff --git a/io_uring/io_uring.c b/io_uring/io_uring.c
> index e7675355048d..18d5ab969c29 100644
> --- a/io_uring/io_uring.c
> +++ b/io_uring/io_uring.c
> @@ -144,7 +144,7 @@ struct io_defer_entry {
>  
>  static bool io_uring_try_cancel_requests(struct io_ring_ctx *ctx,
>  					 struct task_struct *task,
> -					 bool cancel_all);
> +					 bool cancel_all, bool *wq_cancelled);
>  
>  static void io_queue_sqe(struct io_kiocb *req);
>  
> @@ -3049,7 +3049,7 @@ static __cold void io_ring_exit_work(struct work_struct *work)
>  		if (ctx->flags & IORING_SETUP_DEFER_TASKRUN)
>  			io_move_task_work_from_local(ctx);
>  
> -		while (io_uring_try_cancel_requests(ctx, NULL, true))
> +		while (io_uring_try_cancel_requests(ctx, NULL, true, NULL))
>  			cond_resched();
>  
>  		if (ctx->sq_data) {
> @@ -3231,12 +3231,13 @@ static __cold bool io_uring_try_cancel_iowq(struct io_ring_ctx *ctx)
>  
>  static __cold bool io_uring_try_cancel_requests(struct io_ring_ctx *ctx,
>  						struct task_struct *task,
> -						bool cancel_all)
> +						bool cancel_all, bool *wq_cancelled)
>  {
> -	struct io_task_cancel cancel = { .task = task, .all = cancel_all, };
> +	struct io_task_cancel cancel = { .task = task, .all = true, };
>  	struct io_uring_task *tctx = task ? task->io_uring : NULL;
>  	enum io_wq_cancel cret;
>  	bool ret = false;
> +	bool wq_active = false;
>  
>  	/* set it so io_req_local_work_add() would wake us up */
>  	if (ctx->flags & IORING_SETUP_DEFER_TASKRUN) {
> @@ -3249,7 +3250,7 @@ static __cold bool io_uring_try_cancel_requests(struct io_ring_ctx *ctx,
>  		return false;
>  
>  	if (!task) {
> -		ret |= io_uring_try_cancel_iowq(ctx);
> +		wq_active = io_uring_try_cancel_iowq(ctx);
>  	} else if (tctx && tctx->io_wq) {
>  		/*
>  		 * Cancels requests of all rings, not only @ctx, but
> @@ -3257,11 +3258,20 @@ static __cold bool io_uring_try_cancel_requests(struct io_ring_ctx *ctx,
>  		 */
>  		cret = io_wq_cancel_cb(tctx->io_wq, io_cancel_task_cb,
>  				       &cancel, true);
> -		ret |= (cret != IO_WQ_CANCEL_NOTFOUND);
> +		wq_active = (cret != IO_WQ_CANCEL_NOTFOUND);
>  	}
> +	ret |= wq_active;
> +	if (wq_cancelled)
> +		*wq_cancelled = !wq_active;

Here it seems "wq_cancelled" means no any pending or running work anymore.

Why not just use the return value "loop", instead of using this new "wq_cancelled"?

If return value "loop" is true, we know there is still any request need to cancel,
so we will loop the cancel process until there is no any request.

Ah, I guess you may want to cover one case: !wq_active && loop == true

>  
> -	/* SQPOLL thread does its own polling */
> -	if ((!(ctx->flags & IORING_SETUP_SQPOLL) && cancel_all) ||
> +	/*
> +	 * SQPOLL thread does its own polling
> +	 *
> +	 * io_wq may share IO resources(such as requests) with iopoll, so
> +	 * iopoll requests have to be reapped for providing forward
> +	 * progress to io_wq cancelling
> +	 */
> +	if (!(ctx->flags & IORING_SETUP_SQPOLL) ||
>  	    (ctx->sq_data && ctx->sq_data->thread == current)) {
>  		while (!wq_list_empty(&ctx->iopoll_list)) {
>  			io_iopoll_try_reap_events(ctx);
> @@ -3313,11 +3323,12 @@ __cold void io_uring_cancel_generic(bool cancel_all, struct io_sq_data *sqd)
>  	atomic_inc(&tctx->in_cancel);
>  	do {
>  		bool loop = false;
> +		bool wq_cancelled;
>  
>  		io_uring_drop_tctx_refs(current);
>  		/* read completions before cancelations */
>  		inflight = tctx_inflight(tctx, !cancel_all);
> -		if (!inflight)
> +		if (!inflight && !tctx->io_wq)
>  			break;
>  

I think this inflight check should put after the cancel loop, because the
cancel loop make sure there is no any request need to cancel, then we can
loop inflight checking to make sure all inflight requests to complete.

>  		if (!sqd) {
> @@ -3326,20 +3337,25 @@ __cold void io_uring_cancel_generic(bool cancel_all, struct io_sq_data *sqd)
>  				if (node->ctx->sq_data)
>  					continue;
>  				loop |= io_uring_try_cancel_requests(node->ctx,
> -							current, cancel_all);
> +							current, cancel_all,
> +							&wq_cancelled);
>  			}
>  		} else {
>  			list_for_each_entry(ctx, &sqd->ctx_list, sqd_list)
>  				loop |= io_uring_try_cancel_requests(ctx,
>  								     current,
> -								     cancel_all);
> +								     cancel_all,
> +								     &wq_cancelled);
>  		}
>  
> -		if (loop) {
> +		if (!wq_cancelled || (inflight && loop)) {
>  			cond_resched();
>  			continue;
>  		}

So here we just need to check "loop", continue cancel if loop is true?

Thanks!

>  
> +		if (!inflight)
> +			break;
> +
>  		prepare_to_wait(&tctx->wait, &wait, TASK_INTERRUPTIBLE);
>  		io_run_task_work();
>  		io_uring_drop_tctx_refs(current);



[Index of Archives]     [Linux RAID]     [Linux SCSI]     [Linux ATA RAID]     [IDE]     [Linux Wireless]     [Linux Kernel]     [ATH6KL]     [Linux Bluetooth]     [Linux Netdev]     [Kernel Newbies]     [Security]     [Git]     [Netfilter]     [Bugtraq]     [Yosemite News]     [MIPS Linux]     [ARM Linux]     [Linux Security]     [Device Mapper]

  Powered by Linux