If IORING_SETUP_COOP_TASKRUN is set to use cooperative scheduling for running task_work, then IORING_SETUP_TASKRUN_FLAG can be set so the application can tell if task_work is pending in the kernel for this ring. This allows use cases like io_uring_peek_cqe() to still function appropriately, or for the task to know when it would be useful to call io_uring_wait_cqe() to run pending events. Signed-off-by: Jens Axboe <axboe@xxxxxxxxx> --- fs/io_uring.c | 17 +++++++++++++---- include/uapi/linux/io_uring.h | 7 +++++++ 2 files changed, 20 insertions(+), 4 deletions(-) diff --git a/fs/io_uring.c b/fs/io_uring.c index 81f5b491c1a5..cab1736919c9 100644 --- a/fs/io_uring.c +++ b/fs/io_uring.c @@ -2536,6 +2536,8 @@ static void ctx_flush_and_put(struct io_ring_ctx *ctx, bool *locked) { if (!ctx) return; + if (ctx->flags & IORING_SETUP_TASKRUN_FLAG) + atomic_andnot(IORING_SQ_TASKRUN, &ctx->rings->sq_flags); if (*locked) { io_submit_flush_completions(ctx); mutex_unlock(&ctx->uring_lock); @@ -2676,6 +2678,9 @@ static void io_req_task_work_add(struct io_kiocb *req, bool priority) if (running) return; + if (ctx->flags & IORING_SETUP_TASKRUN_FLAG) + atomic_or(IORING_SQ_TASKRUN, &ctx->rings->sq_flags); + if (likely(!task_work_add(tsk, &tctx->task_work, ctx->notify_method))) return; @@ -11699,10 +11704,14 @@ static __cold int io_uring_create(unsigned entries, struct io_uring_params *p, * For SQPOLL, we just need a wakeup, always. For !SQPOLL, if * COOP_TASKRUN is set, then IPIs are never needed by the app. */ - if (ctx->flags & (IORING_SETUP_SQPOLL|IORING_SETUP_COOP_TASKRUN)) + if (ctx->flags & (IORING_SETUP_SQPOLL|IORING_SETUP_COOP_TASKRUN)) { ctx->notify_method = TWA_SIGNAL_NO_IPI; - else + } else { + ret = -EINVAL; + if (ctx->flags & IORING_SETUP_TASKRUN_FLAG) + goto err; ctx->notify_method = TWA_SIGNAL; + } /* * This is just grabbed for accounting purposes. When a process exits, @@ -11802,10 +11811,10 @@ static long io_uring_setup(u32 entries, struct io_uring_params __user *params) IORING_SETUP_SQ_AFF | IORING_SETUP_CQSIZE | IORING_SETUP_CLAMP | IORING_SETUP_ATTACH_WQ | IORING_SETUP_R_DISABLED | IORING_SETUP_SUBMIT_ALL | - IORING_SETUP_COOP_TASKRUN)) + IORING_SETUP_COOP_TASKRUN | IORING_SETUP_TASKRUN_FLAG)) return -EINVAL; - return io_uring_create(entries, &p, params); + return io_uring_create(entries, &p, params); } SYSCALL_DEFINE2(io_uring_setup, u32, entries, diff --git a/include/uapi/linux/io_uring.h b/include/uapi/linux/io_uring.h index 4654842ace88..ad53def6abb8 100644 --- a/include/uapi/linux/io_uring.h +++ b/include/uapi/linux/io_uring.h @@ -112,6 +112,12 @@ enum { * a task running in userspace, and saves an IPI. */ #define IORING_SETUP_COOP_TASKRUN (1U << 8) +/* + * If COOP_TASKRUN is set, get notified if task work is available for + * running and a kernel transition would be needed to run it. This sets + * IORING_SQ_TASKRUN in the sq ring flags. Not valid with COOP_TASKRUN. + */ +#define IORING_SETUP_TASKRUN_FLAG (1U << 9) enum { IORING_OP_NOP, @@ -263,6 +269,7 @@ struct io_sqring_offsets { */ #define IORING_SQ_NEED_WAKEUP (1U << 0) /* needs io_uring_enter wakeup */ #define IORING_SQ_CQ_OVERFLOW (1U << 1) /* CQ ring is overflown */ +#define IORING_SQ_TASKRUN (1U << 2) /* task should enter the kernel */ struct io_cqring_offsets { __u32 head; -- 2.35.1