From: Jiufei Xue <jiufei.xue@xxxxxxxxxxxxxxxxx> Applications can use this flag to avoid accept thundering herd. And poll_events should be changed to 32 bits to cover EPOLLEXCLUSIVE. Signed-off-by: Jiufei Xue <jiufei.xue@xxxxxxxxxxxxxxxxx> --- fs/io_uring.c | 13 +++++++++---- include/uapi/linux/io_uring.h | 2 +- tools/io_uring/liburing.h | 2 +- 3 files changed, 11 insertions(+), 6 deletions(-) diff --git a/fs/io_uring.c b/fs/io_uring.c index 47790a2..03951ec 100644 --- a/fs/io_uring.c +++ b/fs/io_uring.c @@ -4289,7 +4289,11 @@ static void __io_queue_proc(struct io_poll_iocb *poll, struct io_poll_table *pt, pt->error = 0; poll->head = head; - add_wait_queue(head, &poll->wait); + + if (poll->events & EPOLLEXCLUSIVE) + add_wait_queue_exclusive(head, &poll->wait); + else + add_wait_queue(head, &poll->wait); } static void io_async_queue_proc(struct file *file, struct wait_queue_head *head, @@ -4602,7 +4606,7 @@ static void io_poll_queue_proc(struct file *file, struct wait_queue_head *head, static int io_poll_add_prep(struct io_kiocb *req, const struct io_uring_sqe *sqe) { struct io_poll_iocb *poll = &req->poll; - u16 events; + u32 events; if (unlikely(req->ctx->flags & IORING_SETUP_IOPOLL)) return -EINVAL; @@ -4612,7 +4616,8 @@ static int io_poll_add_prep(struct io_kiocb *req, const struct io_uring_sqe *sqe return -EBADF; events = READ_ONCE(sqe->poll_events); - poll->events = demangle_poll(events) | EPOLLERR | EPOLLHUP; + poll->events = demangle_poll(events) | EPOLLERR | EPOLLHUP | + (events & EPOLLEXCLUSIVE); get_task_struct(current); req->task = current; @@ -8196,7 +8201,7 @@ static int __init io_uring_init(void) BUILD_BUG_SQE_ELEM(28, /* compat */ int, rw_flags); BUILD_BUG_SQE_ELEM(28, /* compat */ __u32, rw_flags); BUILD_BUG_SQE_ELEM(28, __u32, fsync_flags); - BUILD_BUG_SQE_ELEM(28, __u16, poll_events); + BUILD_BUG_SQE_ELEM(28, __u32, poll_events); BUILD_BUG_SQE_ELEM(28, __u32, sync_range_flags); BUILD_BUG_SQE_ELEM(28, __u32, msg_flags); BUILD_BUG_SQE_ELEM(28, __u32, timeout_flags); diff --git a/include/uapi/linux/io_uring.h b/include/uapi/linux/io_uring.h index 92c2269..afc7edd 100644 --- a/include/uapi/linux/io_uring.h +++ b/include/uapi/linux/io_uring.h @@ -31,7 +31,7 @@ struct io_uring_sqe { union { __kernel_rwf_t rw_flags; __u32 fsync_flags; - __u16 poll_events; + __u32 poll_events; __u32 sync_range_flags; __u32 msg_flags; __u32 timeout_flags; diff --git a/tools/io_uring/liburing.h b/tools/io_uring/liburing.h index 5f305c8..094b9ec 100644 --- a/tools/io_uring/liburing.h +++ b/tools/io_uring/liburing.h @@ -145,7 +145,7 @@ static inline void io_uring_prep_write_fixed(struct io_uring_sqe *sqe, int fd, } static inline void io_uring_prep_poll_add(struct io_uring_sqe *sqe, int fd, - short poll_mask) + unsigned poll_mask) { memset(sqe, 0, sizeof(*sqe)); sqe->opcode = IORING_OP_POLL_ADD; -- 1.8.3.1