Add support for futex wake requests, which also modifies the addr and checks against it with encoded operation as FUTEX_WAKE_OP does, but only operates with a single address as may be problematic to squeeze into SQE and io_kiocb otherwise. Signed-off-by: Pavel Begunkov <asml.silence@xxxxxxxxx> --- fs/io_uring.c | 48 +++++++++++++++++++++++++++++++++-- include/uapi/linux/io_uring.h | 10 +++++++- 2 files changed, 55 insertions(+), 3 deletions(-) diff --git a/fs/io_uring.c b/fs/io_uring.c index 2c6b14a3a4f6..99f4f8d9f685 100644 --- a/fs/io_uring.c +++ b/fs/io_uring.c @@ -668,6 +668,12 @@ struct io_unlink { struct io_futex { struct file *file; + unsigned int futex_op; + + unsigned int nr_wake; + unsigned int wake_op_arg; + unsigned int flags; + void __user *uaddr; }; struct io_completion { @@ -5874,12 +5880,50 @@ static int io_files_update(struct io_kiocb *req, unsigned int issue_flags) static int io_futex_prep(struct io_kiocb *req, const struct io_uring_sqe *sqe) { - return -EINVAL; + struct io_futex *f = &req->futex; + u64 v; + + if (unlikely(req->ctx->flags & IORING_SETUP_IOPOLL)) + return -EINVAL; + if (unlikely(req->flags & (REQ_F_FIXED_FILE | REQ_F_BUFFER_SELECT))) + return -EINVAL; + if (sqe->len) + return -EINVAL; + f->flags = READ_ONCE(sqe->futex_flags); + if (f->flags & ~IORING_FUTEX_SHARED) + return -EINVAL; + + v = READ_ONCE(sqe->off); + f->nr_wake = (u32)v; + f->wake_op_arg = (u32)(v >> 32); + f->futex_op = READ_ONCE(sqe->futex_op); + f->uaddr = u64_to_user_ptr(READ_ONCE(sqe->addr)); + return 0; } static int io_futex(struct io_kiocb *req, unsigned int issue_flags) { - return -EINVAL; + bool nonblock = issue_flags & IO_URING_F_NONBLOCK; + struct io_futex *f = &req->futex; + int ret; + + switch (f->futex_op) { + case IORING_FUTEX_WAKE_OP: + ret = futex_wake_op_single(f->uaddr, f->nr_wake, f->wake_op_arg, + !(f->flags & IORING_FUTEX_SHARED), + nonblock); + /* retry from blocking context */ + if (nonblock && ret == -EAGAIN) + return -EAGAIN; + break; + default: + ret = -EINVAL; + } + + if (ret < 0) + req_set_fail(req); + __io_req_complete(req, issue_flags, ret, 0); + return 0; } static int io_req_prep(struct io_kiocb *req, const struct io_uring_sqe *sqe) diff --git a/include/uapi/linux/io_uring.h b/include/uapi/linux/io_uring.h index 6a1af5bb2ddf..6fa5a6e59934 100644 --- a/include/uapi/linux/io_uring.h +++ b/include/uapi/linux/io_uring.h @@ -17,7 +17,10 @@ struct io_uring_sqe { __u8 opcode; /* type of operation for this sqe */ __u8 flags; /* IOSQE_ flags */ - __u16 ioprio; /* ioprio for the request */ + union { + __u16 ioprio; /* ioprio for the request */ + __u16 futex_op; /* futex operation */ + } __attribute__((packed)); __s32 fd; /* file descriptor to do IO on */ union { __u64 off; /* offset into file */ @@ -161,6 +164,11 @@ enum { */ #define SPLICE_F_FD_IN_FIXED (1U << 31) /* the last bit of __u32 */ +/* + * sqe->futex_flags + */ +#define IORING_FUTEX_SHARED (1U << 0) + /* * POLL_ADD flags. Note that since sqe->poll_events is the flag space, the * command flags for POLL_ADD are stored in sqe->len. -- 2.31.1