Re: [PATCH 1/1] io-wq: forcefully cancel on io-wq destroy

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

 



On 30/03/2021 19:38, Pavel Begunkov wrote:
> [  491.222908] INFO: task thread-exit:2490 blocked for more than 122 seconds.
> [  491.222957] Call Trace:
> [  491.222967]  __schedule+0x36b/0x950
> [  491.222985]  schedule+0x68/0xe0
> [  491.222994]  schedule_timeout+0x209/0x2a0
> [  491.223003]  ? tlb_flush_mmu+0x28/0x140
> [  491.223013]  wait_for_completion+0x8b/0xf0
> [  491.223023]  io_wq_destroy_manager+0x24/0x60
> [  491.223037]  io_wq_put_and_exit+0x18/0x30
> [  491.223045]  io_uring_clean_tctx+0x76/0xa0
> [  491.223061]  __io_uring_files_cancel+0x1b9/0x2e0
> [  491.223068]  ? blk_finish_plug+0x26/0x40
> [  491.223085]  do_exit+0xc0/0xb40
> [  491.223099]  ? syscall_trace_enter.isra.0+0x1a1/0x1e0
> [  491.223109]  __x64_sys_exit+0x1b/0x20
> [  491.223117]  do_syscall_64+0x38/0x50
> [  491.223131]  entry_SYSCALL_64_after_hwframe+0x44/0xae
> [  491.223177] INFO: task iou-mgr-2490:2491 blocked for more than 122 seconds.
> [  491.223194] Call Trace:
> [  491.223198]  __schedule+0x36b/0x950
> [  491.223206]  ? pick_next_task_fair+0xcf/0x3e0
> [  491.223218]  schedule+0x68/0xe0
> [  491.223225]  schedule_timeout+0x209/0x2a0
> [  491.223236]  wait_for_completion+0x8b/0xf0
> [  491.223246]  io_wq_manager+0xf1/0x1d0
> [  491.223255]  ? recalc_sigpending+0x1c/0x60
> [  491.223265]  ? io_wq_cpu_online+0x40/0x40
> [  491.223272]  ret_from_fork+0x22/0x30
> 
> When io-wq worker exits and sees IO_WQ_BIT_EXIT it tries not cancel all
> left requests but to execute them, hence we may wait for the exiting
> task for long until someone pushes it, e.g. with SIGKILL. Actively
> cancel pending work items on io-wq destruction.

The trace is from slightly modified thread-exit, and it doesn't
hang forever, but can be killed with ctrl+c or whatever. Also,
predictably breaks thread-exit test. 


> 
> note: io_run_cancel() moved up without any changes.
> 
> Signed-off-by: Pavel Begunkov <asml.silence@xxxxxxxxx>
> ---
>  fs/io-wq.c | 50 +++++++++++++++++++++++++++++++++++---------------
>  1 file changed, 35 insertions(+), 15 deletions(-)
> 
> diff --git a/fs/io-wq.c b/fs/io-wq.c
> index 7434eb40ca8c..5fa5e0fd40d6 100644
> --- a/fs/io-wq.c
> +++ b/fs/io-wq.c
> @@ -342,6 +342,20 @@ static void io_wait_on_hash(struct io_wqe *wqe, unsigned int hash)
>  	spin_unlock(&wq->hash->wait.lock);
>  }
>  
> +static struct io_wq_work *io_get_work_all(struct io_wqe *wqe)
> +	__must_hold(wqe->lock)
> +{
> +	struct io_wq_work_list *list = &wqe->work_list;
> +	struct io_wq_work_node *node = list->first;
> +	int i;
> +
> +	list->first = list->last = NULL;
> +	for (i = 0; i < IO_WQ_NR_HASH_BUCKETS; i++)
> +		wqe->hash_tail[i] = NULL;
> +
> +	return node ? container_of(node, struct io_wq_work, list) : NULL;
> +}
> +
>  static struct io_wq_work *io_get_next_work(struct io_wqe *wqe)
>  	__must_hold(wqe->lock)
>  {
> @@ -410,6 +424,17 @@ static void io_assign_current_work(struct io_worker *worker,
>  
>  static void io_wqe_enqueue(struct io_wqe *wqe, struct io_wq_work *work);
>  
> +static void io_run_cancel(struct io_wq_work *work, struct io_wqe *wqe)
> +{
> +	struct io_wq *wq = wqe->wq;
> +
> +	do {
> +		work->flags |= IO_WQ_WORK_CANCEL;
> +		wq->do_work(work);
> +		work = wq->free_work(work);
> +	} while (work);
> +}
> +
>  static void io_worker_handle_work(struct io_worker *worker)
>  	__releases(wqe->lock)
>  {
> @@ -518,11 +543,17 @@ static int io_wqe_worker(void *data)
>  	}
>  
>  	if (test_bit(IO_WQ_BIT_EXIT, &wq->state)) {
> +		struct io_wq_work *work, *next;
> +
>  		raw_spin_lock_irq(&wqe->lock);
> -		if (!wq_list_empty(&wqe->work_list))
> -			io_worker_handle_work(worker);
> -		else
> -			raw_spin_unlock_irq(&wqe->lock);
> +		work = io_get_all_items(wqe);
> +		raw_spin_unlock_irq(&wqe->lock);
> +
> +		while (work) {
> +			next = wq_next_work(work);
> +			io_get_work_all(work, wqe);
> +			work = next;
> +		}
>  	}
>  
>  	io_worker_exit(worker);
> @@ -748,17 +779,6 @@ static int io_wq_manager(void *data)
>  	do_exit(0);
>  }
>  
> -static void io_run_cancel(struct io_wq_work *work, struct io_wqe *wqe)
> -{
> -	struct io_wq *wq = wqe->wq;
> -
> -	do {
> -		work->flags |= IO_WQ_WORK_CANCEL;
> -		wq->do_work(work);
> -		work = wq->free_work(work);
> -	} while (work);
> -}
> -
>  static void io_wqe_insert_work(struct io_wqe *wqe, struct io_wq_work *work)
>  {
>  	unsigned int hash;
> 

-- 
Pavel Begunkov



[Index of Archives]     [Linux Samsung SoC]     [Linux Rockchip SoC]     [Linux Actions SoC]     [Linux for Synopsys ARC Processors]     [Linux NFS]     [Linux NILFS]     [Linux USB Devel]     [Video for Linux]     [Linux Audio Users]     [Yosemite News]     [Linux Kernel]     [Linux SCSI]


  Powered by Linux