On 6/18/24 19:48, Jens Axboe wrote:
Use the exported helper for queueing task_work for message passing,
rather than rolling our own.
Note that this is only done for strict data messages for now, file
descriptor passing messages still rely on the kernel task_work. It could
get converted at some point if it's performance critical.
This improves peak performance of message passing by about 5x in some
basic testing, with 2 threads just sending messages to each other.
Before this change, it was capped at around 700K/sec, with the change
it's at over 4M/sec.
Signed-off-by: Jens Axboe <axboe@xxxxxxxxx>
---
io_uring/msg_ring.c | 90 +++++++++++++++++++++++----------------------
1 file changed, 47 insertions(+), 43 deletions(-)
diff --git a/io_uring/msg_ring.c b/io_uring/msg_ring.c
index 9fdb0cc19bfd..ad7d67d44461 100644
--- a/io_uring/msg_ring.c
+++ b/io_uring/msg_ring.c
@@ -13,7 +13,6 @@
#include "filetable.h"
#include "msg_ring.h"
-
/* All valid masks for MSG_RING */
#define IORING_MSG_RING_MASK (IORING_MSG_RING_CQE_SKIP | \
IORING_MSG_RING_FLAGS_PASS)
@@ -71,54 +70,43 @@ static inline bool io_msg_need_remote(struct io_ring_ctx *target_ctx)
return target_ctx->task_complete;
}
-static int io_msg_exec_remote(struct io_kiocb *req, task_work_func_t func)
+static void io_msg_tw_complete(struct io_kiocb *req, struct io_tw_state *ts)
{
- struct io_ring_ctx *ctx = req->file->private_data;
- struct io_msg *msg = io_kiocb_to_cmd(req, struct io_msg);
- struct task_struct *task = READ_ONCE(ctx->submitter_task);
Not about this series particularly, but sounds like msg requests
should be REQ_F_INFLIGHT, but there is a chance lazy file assignment
is enough.
-
- if (unlikely(!task))
- return -EOWNERDEAD;
+ struct io_ring_ctx *ctx = req->ctx;
- init_task_work(&msg->tw, func);
- if (task_work_add(task, &msg->tw, TWA_SIGNAL))
- return -EOWNERDEAD;
+ io_add_aux_cqe(ctx, req->cqe.user_data, req->cqe.res, req->cqe.flags);
+ kmem_cache_free(req_cachep, req);
+ percpu_ref_put(&ctx->refs);
+}
- return IOU_ISSUE_SKIP_COMPLETE;
+static void io_msg_remote_post(struct io_ring_ctx *ctx, struct io_kiocb *req,
+ int res, u32 cflags, u64 user_data)
+{
+ req->cqe.user_data = user_data;
+ io_req_set_res(req, res, cflags);
+ percpu_ref_get(&ctx->refs);
+ req->ctx = ctx;
+ req->task = READ_ONCE(ctx->submitter_task);
Missing a null check, apart from that the patchset looks right
+ req->io_task_work.func = io_msg_tw_complete;
+ io_req_task_work_add_remote(req, ctx, IOU_F_TWQ_LAZY_WAKE);
}
--
Pavel Begunkov