io_cqring_wake() needs a barrier for the waitqueue_active() check. However, in case of io_req_local_work_add() prior it calls llist_add(), which implies an atomic, and with that we can replace smb_mb() with smp_mb__after_atomic(). Signed-off-by: Pavel Begunkov <asml.silence@xxxxxxxxx> --- io_uring/io_uring.c | 5 +++-- io_uring/io_uring.h | 11 +++++++++-- 2 files changed, 12 insertions(+), 4 deletions(-) diff --git a/io_uring/io_uring.c b/io_uring/io_uring.c index 5e7c086685bf..355fc1f3083d 100644 --- a/io_uring/io_uring.c +++ b/io_uring/io_uring.c @@ -1106,6 +1106,8 @@ static void io_req_local_work_add(struct io_kiocb *req) if (!llist_add(&req->io_task_work.node, &ctx->work_llist)) return; + /* need it for the following io_cqring_wake() */ + smp_mb__after_atomic(); if (unlikely(atomic_read(&req->task->io_uring->in_idle))) { io_move_task_work_from_local(ctx); @@ -1117,8 +1119,7 @@ static void io_req_local_work_add(struct io_kiocb *req) if (ctx->has_evfd) io_eventfd_signal(ctx); - io_cqring_wake(ctx); - + __io_cqring_wake(ctx); } static inline void __io_req_task_work_add(struct io_kiocb *req, bool allow_local) diff --git a/io_uring/io_uring.h b/io_uring/io_uring.h index 177bd55357d7..e733d31f31d2 100644 --- a/io_uring/io_uring.h +++ b/io_uring/io_uring.h @@ -203,17 +203,24 @@ static inline void io_commit_cqring(struct io_ring_ctx *ctx) smp_store_release(&ctx->rings->cq.tail, ctx->cached_cq_tail); } -static inline void io_cqring_wake(struct io_ring_ctx *ctx) +/* requires smb_mb() prior, see wq_has_sleeper() */ +static inline void __io_cqring_wake(struct io_ring_ctx *ctx) { /* * wake_up_all() may seem excessive, but io_wake_function() and * io_should_wake() handle the termination of the loop and only * wake as many waiters as we need to. */ - if (wq_has_sleeper(&ctx->cq_wait)) + if (waitqueue_active(&ctx->cq_wait)) wake_up_all(&ctx->cq_wait); } +static inline void io_cqring_wake(struct io_ring_ctx *ctx) +{ + smp_mb(); + __io_cqring_wake(ctx); +} + static inline bool io_sqring_full(struct io_ring_ctx *ctx) { struct io_rings *r = ctx->rings; -- 2.37.3