If we put the io-wq from io_uring, we really want it to exit. Provide a helper that does that for us. Couple that with not having the manager hold a reference to the 'wq' and the normal SQPOLL exit will tear down the io-wq context appropriate. On the io-wq side, our wq context is per task, so only the task itself is manipulating ->manager and hence it's safe to check and clear without any extra locking. We just need to ensure that the manager task stays around, in case it exits. Signed-off-by: Jens Axboe <axboe@xxxxxxxxx> --- fs/io-wq.c | 29 +++++++++++++++++++---------- fs/io-wq.h | 1 + fs/io_uring.c | 2 +- 3 files changed, 21 insertions(+), 11 deletions(-) diff --git a/fs/io-wq.c b/fs/io-wq.c index 23f431747cd2..65ae35ca8dba 100644 --- a/fs/io-wq.c +++ b/fs/io-wq.c @@ -748,7 +748,7 @@ static int io_wq_manager(void *data) sprintf(buf, "iou-mgr-%d", wq->task_pid); set_task_comm(current, buf); current->flags |= PF_IO_WORKER; - wq->manager = current; + wq->manager = get_task_struct(current); complete(&wq->started); @@ -764,9 +764,7 @@ static int io_wq_manager(void *data) /* we might not ever have created any workers */ if (atomic_read(&wq->worker_refs)) wait_for_completion(&wq->worker_done); - wq->manager = NULL; complete(&wq->exited); - io_wq_put(wq); do_exit(0); } @@ -809,8 +807,6 @@ static int io_wq_fork_manager(struct io_wq *wq) return 0; reinit_completion(&wq->worker_done); - clear_bit(IO_WQ_BIT_EXIT, &wq->state); - refcount_inc(&wq->refs); current->flags |= PF_IO_WORKER; ret = io_wq_fork_thread(io_wq_manager, wq); current->flags &= ~PF_IO_WORKER; @@ -1082,6 +1078,16 @@ struct io_wq *io_wq_create(unsigned bounded, struct io_wq_data *data) return ERR_PTR(ret); } +static void io_wq_destroy_manager(struct io_wq *wq) +{ + if (wq->manager) { + wake_up_process(wq->manager); + wait_for_completion(&wq->exited); + put_task_struct(wq->manager); + wq->manager = NULL; + } +} + static void io_wq_destroy(struct io_wq *wq) { int node; @@ -1089,10 +1095,7 @@ static void io_wq_destroy(struct io_wq *wq) cpuhp_state_remove_instance_nocalls(io_wq_online, &wq->cpuhp_node); set_bit(IO_WQ_BIT_EXIT, &wq->state); - if (wq->manager) { - wake_up_process(wq->manager); - wait_for_completion(&wq->exited); - } + io_wq_destroy_manager(wq); rcu_read_lock(); for_each_node(node) @@ -1110,7 +1113,6 @@ static void io_wq_destroy(struct io_wq *wq) io_wq_put_hash(wq->hash); kfree(wq->wqes); kfree(wq); - } void io_wq_put(struct io_wq *wq) @@ -1119,6 +1121,13 @@ void io_wq_put(struct io_wq *wq) io_wq_destroy(wq); } +void io_wq_put_and_exit(struct io_wq *wq) +{ + set_bit(IO_WQ_BIT_EXIT, &wq->state); + io_wq_destroy_manager(wq); + io_wq_put(wq); +} + static bool io_wq_worker_affinity(struct io_worker *worker, void *data) { struct task_struct *task = worker->task; diff --git a/fs/io-wq.h b/fs/io-wq.h index b6ca12b60c35..f6ef433df8a8 100644 --- a/fs/io-wq.h +++ b/fs/io-wq.h @@ -114,6 +114,7 @@ struct io_wq_data { struct io_wq *io_wq_create(unsigned bounded, struct io_wq_data *data); void io_wq_put(struct io_wq *wq); +void io_wq_put_and_exit(struct io_wq *wq); void io_wq_enqueue(struct io_wq *wq, struct io_wq_work *work); void io_wq_hash_work(struct io_wq_work *work, void *val); diff --git a/fs/io_uring.c b/fs/io_uring.c index 904bf0fecc36..cb65e54c1b09 100644 --- a/fs/io_uring.c +++ b/fs/io_uring.c @@ -8857,7 +8857,7 @@ void __io_uring_files_cancel(struct files_struct *files) if (files) { io_uring_remove_task_files(tctx); if (tctx->io_wq) { - io_wq_put(tctx->io_wq); + io_wq_put_and_exit(tctx->io_wq); tctx->io_wq = NULL; } } -- 2.30.1