I've got curious about performance of the idea of having only 1 CQE per link (for the failed or last one). Tested it with a quick dirty patch doing submit-and-reap of a nops-link (patched for inline execution). 1) link size: 100 old: 206 ns per nop new: 144 ns per nop 2) link size: 10 old: 234 ns per nop new: 181 ns per nop 3) link size: 10, FORCE_ASYNC old: 667 ns per nop new: 569 ns per nop The patch below breaks sequences, linked_timeout and who knows what else. The first one requires synchronisation/atomic, so it's a bit in the way. I've been wondering, whether IOSQE_IO_DRAIN is popular and how much it's used. We can try to find tradeoff or even disable it with this feature. diff --git a/fs/io_uring.c b/fs/io_uring.c index 65a61b8b37c4..9ec29f01cfda 100644 --- a/fs/io_uring.c +++ b/fs/io_uring.c @@ -1164,7 +1164,7 @@ static bool io_cqring_overflow_flush(struct io_ring_ctx *ctx, bool force) return cqe != NULL; } -static void io_cqring_fill_event(struct io_kiocb *req, long res) +static void __io_cqring_fill_event(struct io_kiocb *req, long res) { struct io_ring_ctx *ctx = req->ctx; struct io_uring_cqe *cqe; @@ -1196,13 +1196,31 @@ static void io_cqring_fill_event(struct io_kiocb *req, long res) } } +static inline bool io_ignore_cqe(struct io_kiocb *req) +{ + if (!(req->ctx->flags & IORING_SETUP_BOXED_CQE)) + return false; + + return (req->flags & (REQ_F_LINK|REQ_F_FAIL_LINK)) == REQ_F_LINK; +} + +static void io_cqring_fill_event(struct io_kiocb *req, long res) +{ + if (io_ignore_cqe(req)) + return; + __io_cqring_fill_event(req, res); +} + static void io_cqring_add_event(struct io_kiocb *req, long res) { struct io_ring_ctx *ctx = req->ctx; unsigned long flags; + if (io_ignore_cqe(req)) + return; + spin_lock_irqsave(&ctx->completion_lock, flags); - io_cqring_fill_event(req, res); + __io_cqring_fill_event(req, res); io_commit_cqring(ctx); spin_unlock_irqrestore(&ctx->completion_lock, flags); @@ -7084,7 +7102,8 @@ static long io_uring_setup(u32 entries, struct io_uring_params __user *params) if (p.flags & ~(IORING_SETUP_IOPOLL | IORING_SETUP_SQPOLL | IORING_SETUP_SQ_AFF | IORING_SETUP_CQSIZE | - IORING_SETUP_CLAMP | IORING_SETUP_ATTACH_WQ)) + IORING_SETUP_CLAMP | IORING_SETUP_ATTACH_WQ | + IORING_SETUP_BOXED_CQE)) return -EINVAL; ret = io_uring_create(entries, &p); diff --git a/include/uapi/linux/io_uring.h b/include/uapi/linux/io_uring.h index 08891cc1c1e7..3d69369e252c 100644 --- a/include/uapi/linux/io_uring.h +++ b/include/uapi/linux/io_uring.h @@ -86,6 +86,7 @@ enum { #define IORING_SETUP_CQSIZE (1U << 3) /* app defines CQ size */ #define IORING_SETUP_CLAMP (1U << 4) /* clamp SQ/CQ ring sizes */ #define IORING_SETUP_ATTACH_WQ (1U << 5) /* attach to existing wq */ +#define IORING_SETUP_BOXED_CQE (1U << 6) /* single sqe per link */ enum { IORING_OP_NOP, -- Pavel Begunkov