Hi, Can we queue up a backport of: commit 4c6e277c4cc4a6b3b2b9c66a7b014787ae757cc1 Author: Jens Axboe <axboe@xxxxxxxxx> Date: Wed Jul 1 11:29:10 2020 -0600 io_uring: abstract out task work running for 5.7 and 5.8 stable? It fixes a reported issue from Dave Chinner, since the abstraction also ensures that we always set the current task state appropriately before running task work. I've attached both a 5.8 and 5.7 port of the patch. Thanks! -- Jens Axboe
commit 4c6e277c4cc4a6b3b2b9c66a7b014787ae757cc1 Author: Jens Axboe <axboe@xxxxxxxxx> Date: Wed Jul 1 11:29:10 2020 -0600 io_uring: abstract out task work running Provide a helper to run task_work instead of checking and running manually in a bunch of different spots. While doing so, also move the task run state setting where we run the task work. Then we can move it out of the callback helpers. This also helps ensure we only do this once per task_work list run, not per task_work item. Suggested-by: Oleg Nesterov <oleg@xxxxxxxxxx> Signed-off-by: Jens Axboe <axboe@xxxxxxxxx> diff --git a/fs/io_uring.c b/fs/io_uring.c index 4e09af1d5d22..92bbbcff7777 100644 --- a/fs/io_uring.c +++ b/fs/io_uring.c @@ -1692,6 +1692,17 @@ static int io_put_kbuf(struct io_kiocb *req) return cflags; } +static inline bool io_run_task_work(void) +{ + if (current->task_works) { + __set_current_state(TASK_RUNNING); + task_work_run(); + return true; + } + + return false; +} + static void io_iopoll_queue(struct list_head *again) { struct io_kiocb *req; @@ -1881,6 +1892,7 @@ static int io_iopoll_check(struct io_ring_ctx *ctx, unsigned *nr_events, */ if (!(++iters & 7)) { mutex_unlock(&ctx->uring_lock); + io_run_task_work(); mutex_lock(&ctx->uring_lock); } @@ -4421,7 +4433,6 @@ static void io_async_task_func(struct callback_head *cb) return; } - __set_current_state(TASK_RUNNING); if (io_sq_thread_acquire_mm(ctx, req)) { io_cqring_add_event(req, -EFAULT); goto end_req; @@ -6153,8 +6164,7 @@ static int io_sq_thread(void *data) if (!list_empty(&ctx->poll_list) || need_resched() || (!time_after(jiffies, timeout) && ret != -EBUSY && !percpu_ref_is_dying(&ctx->refs))) { - if (current->task_works) - task_work_run(); + io_run_task_work(); cond_resched(); continue; } @@ -6186,8 +6196,7 @@ static int io_sq_thread(void *data) finish_wait(&ctx->sqo_wait, &wait); break; } - if (current->task_works) { - task_work_run(); + if (io_run_task_work()) { finish_wait(&ctx->sqo_wait, &wait); continue; } @@ -6211,8 +6220,7 @@ static int io_sq_thread(void *data) timeout = jiffies + ctx->sq_thread_idle; } - if (current->task_works) - task_work_run(); + io_run_task_work(); set_fs(old_fs); io_sq_thread_drop_mm(ctx); @@ -6278,9 +6286,8 @@ static int io_cqring_wait(struct io_ring_ctx *ctx, int min_events, do { if (io_cqring_events(ctx, false) >= min_events) return 0; - if (!current->task_works) + if (!io_run_task_work()) break; - task_work_run(); } while (1); if (sig) { @@ -6302,8 +6309,8 @@ static int io_cqring_wait(struct io_ring_ctx *ctx, int min_events, prepare_to_wait_exclusive(&ctx->wait, &iowq.wq, TASK_INTERRUPTIBLE); /* make sure we run task_work before checking for signals */ - if (current->task_works) - task_work_run(); + if (io_run_task_work()) + continue; if (signal_pending(current)) { if (current->jobctl & JOBCTL_TASK_WORK) { spin_lock_irq(¤t->sighand->siglock); @@ -7691,8 +7698,7 @@ SYSCALL_DEFINE6(io_uring_enter, unsigned int, fd, u32, to_submit, int submitted = 0; struct fd f; - if (current->task_works) - task_work_run(); + io_run_task_work(); if (flags & ~(IORING_ENTER_GETEVENTS | IORING_ENTER_SQ_WAKEUP)) return -EINVAL;
commit 4c6e277c4cc4a6b3b2b9c66a7b014787ae757cc1 Author: Jens Axboe <axboe@xxxxxxxxx> Date: Wed Jul 1 11:29:10 2020 -0600 io_uring: abstract out task work running Provide a helper to run task_work instead of checking and running manually in a bunch of different spots. While doing so, also move the task run state setting where we run the task work. Then we can move it out of the callback helpers. This also helps ensure we only do this once per task_work list run, not per task_work item. Suggested-by: Oleg Nesterov <oleg@xxxxxxxxxx> Signed-off-by: Jens Axboe <axboe@xxxxxxxxx> diff --git a/fs/io_uring.c b/fs/io_uring.c index 493e5047e67c..95bacab047dd 100644 --- a/fs/io_uring.c +++ b/fs/io_uring.c @@ -1747,6 +1747,17 @@ static int io_put_kbuf(struct io_kiocb *req) return cflags; } +static inline bool io_run_task_work(void) +{ + if (current->task_works) { + __set_current_state(TASK_RUNNING); + task_work_run(); + return true; + } + + return false; +} + static void io_iopoll_queue(struct list_head *again) { struct io_kiocb *req; @@ -1936,6 +1947,7 @@ static int io_iopoll_check(struct io_ring_ctx *ctx, unsigned *nr_events, */ if (!(++iters & 7)) { mutex_unlock(&ctx->uring_lock); + io_run_task_work(); mutex_lock(&ctx->uring_lock); } @@ -4356,7 +4368,6 @@ static void io_async_task_func(struct callback_head *cb) kfree(apoll); if (!canceled) { - __set_current_state(TASK_RUNNING); if (io_sq_thread_acquire_mm(ctx, req)) { io_cqring_add_event(req, -EFAULT); goto end_req; @@ -6082,8 +6093,7 @@ static int io_sq_thread(void *data) if (!list_empty(&ctx->poll_list) || need_resched() || (!time_after(jiffies, timeout) && ret != -EBUSY && !percpu_ref_is_dying(&ctx->refs))) { - if (current->task_works) - task_work_run(); + io_run_task_work(); cond_resched(); continue; } @@ -6115,8 +6125,7 @@ static int io_sq_thread(void *data) finish_wait(&ctx->sqo_wait, &wait); break; } - if (current->task_works) { - task_work_run(); + if (io_run_task_work()) { finish_wait(&ctx->sqo_wait, &wait); continue; } @@ -6145,8 +6154,7 @@ static int io_sq_thread(void *data) timeout = jiffies + ctx->sq_thread_idle; } - if (current->task_works) - task_work_run(); + io_run_task_work(); io_sq_thread_drop_mm(ctx); revert_creds(old_cred); @@ -6211,9 +6219,8 @@ static int io_cqring_wait(struct io_ring_ctx *ctx, int min_events, do { if (io_cqring_events(ctx, false) >= min_events) return 0; - if (!current->task_works) + if (!io_run_task_work()) break; - task_work_run(); } while (1); if (sig) { @@ -6235,8 +6242,8 @@ static int io_cqring_wait(struct io_ring_ctx *ctx, int min_events, prepare_to_wait_exclusive(&ctx->wait, &iowq.wq, TASK_INTERRUPTIBLE); /* make sure we run task_work before checking for signals */ - if (current->task_works) - task_work_run(); + if (io_run_task_work()) + continue; if (signal_pending(current)) { if (current->jobctl & JOBCTL_TASK_WORK) { spin_lock_irq(¤t->sighand->siglock); @@ -7655,8 +7662,7 @@ SYSCALL_DEFINE6(io_uring_enter, unsigned int, fd, u32, to_submit, int submitted = 0; struct fd f; - if (current->task_works) - task_work_run(); + io_run_task_work(); if (flags & ~(IORING_ENTER_GETEVENTS | IORING_ENTER_SQ_WAKEUP)) return -EINVAL;