If this is set, io_uring will never use an IPI to deliver a task_work notification. This can be used in the common case where a single task or thread communicates with the ring, and doesn't rely on io_uring_cqe_peek(). This provides a noticeable win in performance, both from eliminating the IPI itself, but also from avoiding interrupting the submitting task unnecessarily. Signed-off-by: Jens Axboe <axboe@xxxxxxxxx> --- fs/io_uring.c | 8 +++++--- include/uapi/linux/io_uring.h | 8 ++++++++ 2 files changed, 13 insertions(+), 3 deletions(-) diff --git a/fs/io_uring.c b/fs/io_uring.c index 7e9ac5fd3a8c..81f5b491c1a5 100644 --- a/fs/io_uring.c +++ b/fs/io_uring.c @@ -11696,9 +11696,10 @@ static __cold int io_uring_create(unsigned entries, struct io_uring_params *p, ctx->user = get_uid(current_user()); /* - * For SQPOLL, we just need a wakeup, always. + * For SQPOLL, we just need a wakeup, always. For !SQPOLL, if + * COOP_TASKRUN is set, then IPIs are never needed by the app. */ - if (ctx->flags & IORING_SETUP_SQPOLL) + if (ctx->flags & (IORING_SETUP_SQPOLL|IORING_SETUP_COOP_TASKRUN)) ctx->notify_method = TWA_SIGNAL_NO_IPI; else ctx->notify_method = TWA_SIGNAL; @@ -11800,7 +11801,8 @@ static long io_uring_setup(u32 entries, struct io_uring_params __user *params) if (p.flags & ~(IORING_SETUP_IOPOLL | IORING_SETUP_SQPOLL | IORING_SETUP_SQ_AFF | IORING_SETUP_CQSIZE | IORING_SETUP_CLAMP | IORING_SETUP_ATTACH_WQ | - IORING_SETUP_R_DISABLED | IORING_SETUP_SUBMIT_ALL)) + IORING_SETUP_R_DISABLED | IORING_SETUP_SUBMIT_ALL | + IORING_SETUP_COOP_TASKRUN)) return -EINVAL; return io_uring_create(entries, &p, params); diff --git a/include/uapi/linux/io_uring.h b/include/uapi/linux/io_uring.h index 5fb52bf32435..4654842ace88 100644 --- a/include/uapi/linux/io_uring.h +++ b/include/uapi/linux/io_uring.h @@ -104,6 +104,14 @@ enum { #define IORING_SETUP_ATTACH_WQ (1U << 5) /* attach to existing wq */ #define IORING_SETUP_R_DISABLED (1U << 6) /* start with ring disabled */ #define IORING_SETUP_SUBMIT_ALL (1U << 7) /* continue submit on error */ +/* + * Cooperative task running. When requests complete, they often require + * forcing the submitter to transition to the kernel to complete. If this + * flag is set, work will be done when the task transitions anyway, rather + * than force an inter-processor interrupt reschedule. This avoids interrupting + * a task running in userspace, and saves an IPI. + */ +#define IORING_SETUP_COOP_TASKRUN (1U << 8) enum { IORING_OP_NOP, -- 2.35.1