->uring_lock is prevalently used for submission, even though it protects many other things like iopoll, registeration, selected bufs, and more. And it's placed together with ->cq_wait poked on completion and CQ waiting sides. Move them apart, ->uring_lock goes to the submission data, and cq_wait to completion related chunk. The last one requires some reshuffling so everything needed by io_cqring_ev_posted*() is in one cacheline. Signed-off-by: Pavel Begunkov <asml.silence@xxxxxxxxx> --- fs/io_uring.c | 16 +++++++--------- 1 file changed, 7 insertions(+), 9 deletions(-) diff --git a/fs/io_uring.c b/fs/io_uring.c index e9bf26fbf65d..1b6cfc6b79c5 100644 --- a/fs/io_uring.c +++ b/fs/io_uring.c @@ -356,6 +356,8 @@ struct io_ring_ctx { /* submission data */ struct { + struct mutex uring_lock; + /* * Ring buffer of indices into array of io_uring_sqe, which is * mmapped by the application using the IORING_OFF_SQES offset. @@ -392,11 +394,6 @@ struct io_ring_ctx { unsigned sq_thread_idle; } ____cacheline_aligned_in_smp; - struct { - struct mutex uring_lock; - wait_queue_head_t cq_wait; - } ____cacheline_aligned_in_smp; - /* IRQ completion list, under ->completion_lock */ struct list_head locked_free_list; unsigned int locked_free_nr; @@ -412,12 +409,13 @@ struct io_ring_ctx { struct { unsigned cached_cq_tail; unsigned cq_entries; - atomic_t cq_timeouts; - unsigned cq_last_tm_flush; - unsigned cq_extra; + struct eventfd_ctx *cq_ev_fd; struct wait_queue_head poll_wait; + struct wait_queue_head cq_wait; + unsigned cq_extra; + atomic_t cq_timeouts; struct fasync_struct *cq_fasync; - struct eventfd_ctx *cq_ev_fd; + unsigned cq_last_tm_flush; } ____cacheline_aligned_in_smp; struct { -- 2.31.1