By default, the POLL_ADD command does edge triggered poll - if we get a non-zero mask on the initial poll attempt, we complete the request successfully. Support level triggered by always waiting for a notification, regardless of whether or not the initial mask matches the file state. Signed-off-by: Jens Axboe <axboe@xxxxxxxxx> --- Wrote a test case and it seems to work as it should, and the usual regressions pass as well. diff --git a/include/uapi/linux/io_uring.h b/include/uapi/linux/io_uring.h index 53e7dae92e42..1d176f935f5d 100644 --- a/include/uapi/linux/io_uring.h +++ b/include/uapi/linux/io_uring.h @@ -226,10 +226,13 @@ enum io_uring_op { * * IORING_POLL_UPDATE Update existing poll request, matching * sqe->addr as the old user_data field. + * + * IORING_POLL_LEVEL Level triggered poll. */ #define IORING_POLL_ADD_MULTI (1U << 0) #define IORING_POLL_UPDATE_EVENTS (1U << 1) #define IORING_POLL_UPDATE_USER_DATA (1U << 2) +#define IORING_POLL_ADD_LEVEL (1U << 3) /* * ASYNC_CANCEL flags. diff --git a/io_uring/poll.c b/io_uring/poll.c index ed9f74403d89..7a98d934428e 100644 --- a/io_uring/poll.c +++ b/io_uring/poll.c @@ -425,11 +425,13 @@ static int __io_arm_poll_handler(struct io_kiocb *req, atomic_set(&req->poll_refs, 1); mask = vfs_poll(req->file, &ipt->pt) & poll->events; - if (mask && (poll->events & EPOLLONESHOT)) { + if (mask && + ((poll->events & (EPOLLET|EPOLLONESHOT)) == (EPOLLET|EPOLLONESHOT))) { io_poll_remove_entries(req); /* no one else has access to the req, forget about the ref */ return mask; } + if (!mask && unlikely(ipt->error || !ipt->nr_entries)) { io_poll_remove_entries(req); if (!ipt->error) @@ -441,7 +443,7 @@ static int __io_arm_poll_handler(struct io_kiocb *req, io_poll_req_insert(req); spin_unlock(&ctx->completion_lock); - if (mask) { + if (mask && (poll->events & EPOLLET)) { /* can't multishot if failed, just queue the event we've got */ if (unlikely(ipt->error || !ipt->nr_entries)) poll->events |= EPOLLONESHOT; @@ -474,7 +476,7 @@ int io_arm_poll_handler(struct io_kiocb *req, unsigned issue_flags) struct io_ring_ctx *ctx = req->ctx; struct async_poll *apoll; struct io_poll_table ipt; - __poll_t mask = POLLPRI | POLLERR; + __poll_t mask = POLLPRI | POLLERR | EPOLLET; int ret; if (!def->pollin && !def->pollout) @@ -636,7 +638,10 @@ static __poll_t io_poll_parse_events(const struct io_uring_sqe *sqe, #endif if (!(flags & IORING_POLL_ADD_MULTI)) events |= EPOLLONESHOT; - return demangle_poll(events) | (events & (EPOLLEXCLUSIVE|EPOLLONESHOT)); + if (!(flags & IORING_POLL_ADD_LEVEL)) + events |= EPOLLET; + return demangle_poll(events) | + (events & (EPOLLEXCLUSIVE|EPOLLONESHOT|EPOLLET)); } int io_poll_remove_prep(struct io_kiocb *req, const struct io_uring_sqe *sqe) @@ -677,7 +682,7 @@ int io_poll_add_prep(struct io_kiocb *req, const struct io_uring_sqe *sqe) if (sqe->buf_index || sqe->off || sqe->addr) return -EINVAL; flags = READ_ONCE(sqe->len); - if (flags & ~IORING_POLL_ADD_MULTI) + if (flags & ~(IORING_POLL_ADD_MULTI|IORING_POLL_ADD_LEVEL)) return -EINVAL; if ((flags & IORING_POLL_ADD_MULTI) && (req->flags & REQ_F_CQE_SKIP)) return -EINVAL; -- Jens Axboe