We still offload the cancelation to a workqueue, as not to introduce dependencies between the exiting task waiting on cleanup, and that task needing to run task_work to complete the process. This means that once the final ring put is done, any request that was inflight and needed cancelation will be done as well. Notably requests that hold references to files - once the ring fd close is done, we will have dropped any of those references too. Signed-off-by: Jens Axboe <axboe@xxxxxxxxx> --- include/linux/io_uring_types.h | 2 ++ io_uring/io_uring.c | 17 +++++++++++++++++ 2 files changed, 19 insertions(+) diff --git a/include/linux/io_uring_types.h b/include/linux/io_uring_types.h index c17d2eedf478..79e223fd4733 100644 --- a/include/linux/io_uring_types.h +++ b/include/linux/io_uring_types.h @@ -450,6 +450,8 @@ struct io_ring_ctx { struct io_mapped_region param_region; /* just one zcrx per ring for now, will move to io_zcrx_ifq eventually */ struct io_mapped_region zcrx_region; + + struct completion *exit_comp; }; /* diff --git a/io_uring/io_uring.c b/io_uring/io_uring.c index 984db01f5184..d9b65a322ae1 100644 --- a/io_uring/io_uring.c +++ b/io_uring/io_uring.c @@ -2894,6 +2894,7 @@ static __cold void io_ring_exit_work(struct work_struct *work) struct io_ring_ctx *ctx = container_of(work, struct io_ring_ctx, exit_work); unsigned long timeout = jiffies + HZ * 60 * 5; unsigned long interval = HZ / 20; + struct completion *exit_comp; struct io_tctx_exit exit; struct io_tctx_node *node; int ret; @@ -2958,6 +2959,10 @@ static __cold void io_ring_exit_work(struct work_struct *work) io_kworker_tw_end(); + exit_comp = READ_ONCE(ctx->exit_comp); + if (exit_comp) + complete(exit_comp); + init_completion(&exit.completion); init_task_work(&exit.task_work, io_tctx_exit_cb); exit.ctx = ctx; @@ -3020,9 +3025,21 @@ static __cold void io_ring_ctx_wait_and_kill(struct io_ring_ctx *ctx) static int io_uring_release(struct inode *inode, struct file *file) { struct io_ring_ctx *ctx = file->private_data; + DECLARE_COMPLETION_ONSTACK(exit_comp); file->private_data = NULL; + WRITE_ONCE(ctx->exit_comp, &exit_comp); io_ring_ctx_wait_and_kill(ctx); + + /* + * Wait for cancel to run before exiting task + */ + do { + if (current->io_uring) + io_fallback_tw(current->io_uring, false); + cond_resched(); + } while (wait_for_completion_interruptible(&exit_comp)); + return 0; } -- 2.49.0