There is an old problem with io-wq cancellation where requests should be killed and are in io-wq but are not discoverable, e.g. in @next_hashed or @linked vars of io_worker_handle_work(). It adds some unreliability to individual request canellation, but also may potentially get __io_uring_cancel() stuck. For instance: 1) An __io_uring_cancel()'s cancellation round have not found any request but there are some as desribed. 2) __io_uring_cancel() goes to sleep 3) Then workers wake up and try to execute those hidden requests that happen to be unbound. As we already cancel all requests of io-wq there, set IO_WQ_BIT_EXIT in advance, so preventing 3) from executing unbound requests. The workers will initially break looping because of getting a signal as they are threads of the dying/exec()'ing user task. Cc: stable@xxxxxxxxxxxxxxx Signed-off-by: Pavel Begunkov <asml.silence@xxxxxxxxx> --- p.s. hard to tell the exact commit to blame due to all the changes in cancellation schemes fs/io-wq.c | 20 +++++++++----------- fs/io-wq.h | 2 +- fs/io_uring.c | 6 ++++++ 3 files changed, 16 insertions(+), 12 deletions(-) diff --git a/fs/io-wq.c b/fs/io-wq.c index 5361a9b4b47b..de9b7ba3ba01 100644 --- a/fs/io-wq.c +++ b/fs/io-wq.c @@ -979,13 +979,16 @@ static bool io_task_work_match(struct callback_head *cb, void *data) return cwd->wqe->wq == data; } +void io_wq_exit_start(struct io_wq *wq) +{ + set_bit(IO_WQ_BIT_EXIT, &wq->state); +} + static void io_wq_exit_workers(struct io_wq *wq) { struct callback_head *cb; int node; - set_bit(IO_WQ_BIT_EXIT, &wq->state); - if (!wq->task) return; @@ -1020,8 +1023,6 @@ static void io_wq_destroy(struct io_wq *wq) cpuhp_state_remove_instance_nocalls(io_wq_online, &wq->cpuhp_node); - io_wq_exit_workers(wq); - for_each_node(node) { struct io_wqe *wqe = wq->wqes[node]; struct io_cb_cancel_data match = { @@ -1036,16 +1037,13 @@ static void io_wq_destroy(struct io_wq *wq) kfree(wq); } -void io_wq_put(struct io_wq *wq) -{ - if (refcount_dec_and_test(&wq->refs)) - io_wq_destroy(wq); -} - void io_wq_put_and_exit(struct io_wq *wq) { + WARN_ON_ONCE(!test_bit(IO_WQ_BIT_EXIT, &wq->state)); + io_wq_exit_workers(wq); - io_wq_put(wq); + if (refcount_dec_and_test(&wq->refs)) + io_wq_destroy(wq); } static bool io_wq_worker_affinity(struct io_worker *worker, void *data) diff --git a/fs/io-wq.h b/fs/io-wq.h index 0e6d310999e8..af2df0680ee2 100644 --- a/fs/io-wq.h +++ b/fs/io-wq.h @@ -122,7 +122,7 @@ struct io_wq_data { }; struct io_wq *io_wq_create(unsigned bounded, struct io_wq_data *data); -void io_wq_put(struct io_wq *wq); +void io_wq_exit_start(struct io_wq *wq); void io_wq_put_and_exit(struct io_wq *wq); void io_wq_enqueue(struct io_wq *wq, struct io_wq_work *work); diff --git a/fs/io_uring.c b/fs/io_uring.c index 5f82954004f6..6af8ca0cb01c 100644 --- a/fs/io_uring.c +++ b/fs/io_uring.c @@ -9078,6 +9078,9 @@ static void io_uring_cancel_sqpoll(struct io_sq_data *sqd) if (!current->io_uring) return; + if (tctx->io_wq) + io_wq_exit_start(tctx->io_wq); + WARN_ON_ONCE(!sqd || sqd->thread != current); atomic_inc(&tctx->in_idle); @@ -9112,6 +9115,9 @@ void __io_uring_cancel(struct files_struct *files) DEFINE_WAIT(wait); s64 inflight; + if (tctx->io_wq) + io_wq_exit_start(tctx->io_wq); + /* make sure overflow events are dropped */ atomic_inc(&tctx->in_idle); do { -- 2.31.1