This will be a prerequisite for adding multishot support, but can be used with single shot support as well. Works like any other request that supports provided buffers - set addr to NULL and ensure that sqe->buf_group is set, and IOSQE_BUFFER_SELECT in sqe->flags. Then epoll wait will pick a buffer from that group and store the events there. Signed-off-by: Jens Axboe <axboe@xxxxxxxxx> --- io_uring/epoll.c | 31 +++++++++++++++++++++++++++---- io_uring/opdef.c | 1 + 2 files changed, 28 insertions(+), 4 deletions(-) diff --git a/io_uring/epoll.c b/io_uring/epoll.c index 5a47f0cce647..134112e7a505 100644 --- a/io_uring/epoll.c +++ b/io_uring/epoll.c @@ -10,6 +10,7 @@ #include <uapi/linux/io_uring.h> #include "io_uring.h" +#include "kbuf.h" #include "epoll.h" #include "poll.h" @@ -189,11 +190,13 @@ int io_epoll_wait_prep(struct io_kiocb *req, const struct io_uring_sqe *sqe) { struct io_epoll_wait *iew = io_kiocb_to_cmd(req, struct io_epoll_wait); - if (sqe->off || sqe->rw_flags || sqe->buf_index || sqe->splice_fd_in) + if (sqe->off || sqe->rw_flags || sqe->splice_fd_in) return -EINVAL; iew->maxevents = READ_ONCE(sqe->len); iew->events = u64_to_user_ptr(READ_ONCE(sqe->addr)); + if (req->flags & REQ_F_BUFFER_SELECT && iew->events) + return -EINVAL; iew->wait.flags = 0; iew->wait.private = req; @@ -207,22 +210,42 @@ int io_epoll_wait_prep(struct io_kiocb *req, const struct io_uring_sqe *sqe) int io_epoll_wait(struct io_kiocb *req, unsigned int issue_flags) { struct io_epoll_wait *iew = io_kiocb_to_cmd(req, struct io_epoll_wait); + struct epoll_event __user *evs = iew->events; struct io_ring_ctx *ctx = req->ctx; + int maxevents = iew->maxevents; + unsigned int cflags = 0; int ret; io_ring_submit_lock(ctx, issue_flags); - ret = epoll_wait(req->file, iew->events, iew->maxevents, NULL, &iew->wait); + if (io_do_buffer_select(req)) { + size_t len = iew->maxevents * sizeof(*evs); + + evs = io_buffer_select(req, &len, 0); + if (!evs) { + ret = -ENOBUFS; + goto err; + } + maxevents = len / sizeof(*evs); + } + + ret = epoll_wait(req->file, evs, maxevents, NULL, &iew->wait); if (ret == -EIOCBQUEUED) { + io_kbuf_recycle(req, 0); if (hlist_unhashed(&req->hash_node)) hlist_add_head(&req->hash_node, &ctx->epoll_list); io_ring_submit_unlock(ctx, issue_flags); return IOU_ISSUE_SKIP_COMPLETE; - } else if (ret < 0) { + } else if (ret > 0) { + cflags = io_put_kbuf(req, ret * sizeof(*evs), 0); + } else if (!ret) { + io_kbuf_recycle(req, 0); + } else { +err: req_set_fail(req); } hlist_del_init(&req->hash_node); io_ring_submit_unlock(ctx, issue_flags); - io_req_set_res(req, ret, 0); + io_req_set_res(req, ret, cflags); return IOU_OK; } diff --git a/io_uring/opdef.c b/io_uring/opdef.c index 44553a657476..04ff2b438531 100644 --- a/io_uring/opdef.c +++ b/io_uring/opdef.c @@ -520,6 +520,7 @@ const struct io_issue_def io_issue_defs[] = { .needs_file = 1, .unbound_nonreg_file = 1, .audit_skip = 1, + .buffer_select = 1, #if defined(CONFIG_EPOLL) .prep = io_epoll_wait_prep, .issue = io_epoll_wait, -- 2.47.2