On 26/01/2021 15:28, Pavel Begunkov wrote: > do not call blocking ops when !TASK_RUNNING; state=2 set at > [<00000000ced9dbfc>] prepare_to_wait+0x1f4/0x3b0 > kernel/sched/wait.c:262 > WARNING: CPU: 1 PID: 19888 at kernel/sched/core.c:7853 > __might_sleep+0xed/0x100 kernel/sched/core.c:7848 > RIP: 0010:__might_sleep+0xed/0x100 kernel/sched/core.c:7848 > Call Trace: > __mutex_lock_common+0xc4/0x2ef0 kernel/locking/mutex.c:935 > __mutex_lock kernel/locking/mutex.c:1103 [inline] > mutex_lock_nested+0x1a/0x20 kernel/locking/mutex.c:1118 > io_wq_submit_work+0x39a/0x720 fs/io_uring.c:6411 > io_run_cancel fs/io-wq.c:856 [inline] > io_wqe_cancel_pending_work fs/io-wq.c:990 [inline] > io_wq_cancel_cb+0x614/0xcb0 fs/io-wq.c:1027 > io_uring_cancel_files fs/io_uring.c:8874 [inline] > io_uring_cancel_task_requests fs/io_uring.c:8952 [inline] > __io_uring_files_cancel+0x115d/0x19e0 fs/io_uring.c:9038 > io_uring_files_cancel include/linux/io_uring.h:51 [inline] > do_exit+0x2e6/0x2490 kernel/exit.c:780 > do_group_exit+0x168/0x2d0 kernel/exit.c:922 > get_signal+0x16b5/0x2030 kernel/signal.c:2770 > arch_do_signal_or_restart+0x8e/0x6a0 arch/x86/kernel/signal.c:811 > handle_signal_work kernel/entry/common.c:147 [inline] > exit_to_user_mode_loop kernel/entry/common.c:171 [inline] > exit_to_user_mode_prepare+0xac/0x1e0 kernel/entry/common.c:201 > __syscall_exit_to_user_mode_work kernel/entry/common.c:291 [inline] > syscall_exit_to_user_mode+0x48/0x190 kernel/entry/common.c:302 > entry_SYSCALL_64_after_hwframe+0x44/0xa9 > > Rewrite io_uring_cancel_files() to mimic __io_uring_task_cancel()'s > counting scheme, so it does all the heavy work before setting > TASK_UNINTERRUPTIBLE. > > Cc: stable@xxxxxxxxxxxxxxx # 5.9+ > Reported-by: syzbot+f655445043a26a7cfab8@xxxxxxxxxxxxxxxxxxxxxxxxx > Signed-off-by: Pavel Begunkov <asml.silence@xxxxxxxxx> > --- > fs/io_uring.c | 41 ++++++++++++++++++++++++----------------- > 1 file changed, 24 insertions(+), 17 deletions(-) > > diff --git a/fs/io_uring.c b/fs/io_uring.c > index 09aada153a71..f3f2b37e7021 100644 > --- a/fs/io_uring.c > +++ b/fs/io_uring.c > @@ -8873,30 +8873,33 @@ static void io_cancel_defer_files(struct io_ring_ctx *ctx, > } > } > > +static int io_uring_count_inflight(struct io_ring_ctx *ctx, > + struct task_struct *task, > + struct files_struct *files) > +{ > + struct io_kiocb *req; > + int cnt = 0; > + > + spin_lock_irq(&ctx->inflight_lock); > + list_for_each_entry(req, &ctx->inflight_list, inflight_entry) { > + if (!io_match_task(req, task, files)) This condition should be inversed. Jens, please drop it p.s. I wonder how tests didn't catch that > + cnt++; > + } > + spin_unlock_irq(&ctx->inflight_lock); > + return cnt; > +} > + > static void io_uring_cancel_files(struct io_ring_ctx *ctx, > struct task_struct *task, > struct files_struct *files) > { > while (!list_empty_careful(&ctx->inflight_list)) { > struct io_task_cancel cancel = { .task = task, .files = files }; > - struct io_kiocb *req; > DEFINE_WAIT(wait); > - bool found = false; > + int inflight; > > - spin_lock_irq(&ctx->inflight_lock); > - list_for_each_entry(req, &ctx->inflight_list, inflight_entry) { > - if (!io_match_task(req, task, files)) > - continue; > - found = true; > - break; > - } > - if (found) > - prepare_to_wait(&task->io_uring->wait, &wait, > - TASK_UNINTERRUPTIBLE); > - spin_unlock_irq(&ctx->inflight_lock); > - > - /* We need to keep going until we don't find a matching req */ > - if (!found) > + inflight = io_uring_count_inflight(ctx, task, files); > + if (!inflight) > break; > > io_wq_cancel_cb(ctx->io_wq, io_cancel_task_cb, &cancel, true); > @@ -8905,7 +8908,11 @@ static void io_uring_cancel_files(struct io_ring_ctx *ctx, > io_cqring_overflow_flush(ctx, true, task, files); > /* cancellations _may_ trigger task work */ > io_run_task_work(); > - schedule(); > + > + prepare_to_wait(&task->io_uring->wait, &wait, > + TASK_UNINTERRUPTIBLE); > + if (inflight == io_uring_count_inflight(ctx, task, files)) > + schedule(); > finish_wait(&task->io_uring->wait, &wait); > } > } > -- Pavel Begunkov