This works just like openat(2), except it can be performed async. For the normal case of a non-blocking path lookup this will complete inline. If we have to do IO to perform the open, it'll be done from async context. Signed-off-by: Jens Axboe <axboe@xxxxxxxxx> --- fs/io_uring.c | 107 +++++++++++++++++++++++++++++++++- include/uapi/linux/io_uring.h | 2 + 2 files changed, 107 insertions(+), 2 deletions(-) diff --git a/fs/io_uring.c b/fs/io_uring.c index 1822bf9aba12..53ff67ab5c4b 100644 --- a/fs/io_uring.c +++ b/fs/io_uring.c @@ -70,6 +70,8 @@ #include <linux/sizes.h> #include <linux/hugetlb.h> #include <linux/highmem.h> +#include <linux/namei.h> +#include <linux/fsnotify.h> #define CREATE_TRACE_POINTS #include <trace/events/io_uring.h> @@ -353,6 +355,15 @@ struct io_sr_msg { int msg_flags; }; +struct io_open { + struct file *file; + int dfd; + umode_t mode; + const char __user *fname; + struct filename *filename; + int flags; +}; + struct io_async_connect { struct sockaddr_storage address; }; @@ -371,12 +382,17 @@ struct io_async_rw { ssize_t size; }; +struct io_async_open { + struct filename *filename; +}; + struct io_async_ctx { union { struct io_async_rw rw; struct io_async_msghdr msg; struct io_async_connect connect; struct io_timeout_data timeout; + struct io_async_open open; }; }; @@ -397,6 +413,7 @@ struct io_kiocb { struct io_timeout timeout; struct io_connect connect; struct io_sr_msg sr_msg; + struct io_open open; }; struct io_async_ctx *io; @@ -2135,6 +2152,79 @@ static int io_fallocate(struct io_kiocb *req, struct io_kiocb **nxt, return 0; } +static int io_openat_prep(struct io_kiocb *req, const struct io_uring_sqe *sqe) +{ + int ret; + + if (sqe->ioprio || sqe->buf_index) + return -EINVAL; + + req->open.dfd = READ_ONCE(sqe->fd); + req->open.mode = READ_ONCE(sqe->len); + req->open.fname = u64_to_user_ptr(READ_ONCE(sqe->addr)); + req->open.flags = READ_ONCE(sqe->open_flags); + + req->open.filename = getname(req->open.fname); + if (IS_ERR(req->open.filename)) { + ret = PTR_ERR(req->open.filename); + req->open.filename = NULL; + return ret; + } + + return 0; +} + +static void io_openat_async(struct io_wq_work **workptr) +{ + struct io_kiocb *req = container_of(*workptr, struct io_kiocb, work); + struct filename *filename = req->open.filename; + + io_wq_submit_work(workptr); + putname(filename); +} + +static int io_openat(struct io_kiocb *req, struct io_kiocb **nxt, + bool force_nonblock) +{ + struct open_flags op; + struct open_how how; + struct file *file; + int ret; + + how = build_open_how(req->open.flags, req->open.mode); + ret = build_open_flags(&how, &op); + if (ret) + goto err; + if (force_nonblock) + op.lookup_flags |= LOOKUP_NONBLOCK; + + ret = get_unused_fd_flags(how.flags); + if (ret < 0) + goto err; + + file = do_filp_open(req->open.dfd, req->open.filename, &op); + if (IS_ERR(file)) { + put_unused_fd(ret); + ret = PTR_ERR(file); + if (ret == -EAGAIN) { + req->work.flags |= IO_WQ_WORK_NEEDS_FILES; + req->work.func = io_openat_async; + return -EAGAIN; + } + } else { + fsnotify_open(file); + fd_install(ret, file); + } +err: + if (!io_wq_current_is_worker()) + putname(req->open.filename); + if (ret < 0) + req_set_fail_links(req); + io_cqring_add_event(req, ret); + io_put_req_find_next(req, nxt); + return 0; +} + static int io_prep_sfr(struct io_kiocb *req, const struct io_uring_sqe *sqe) { struct io_ring_ctx *ctx = req->ctx; @@ -3160,6 +3250,9 @@ static int io_req_defer_prep(struct io_kiocb *req, case IORING_OP_FALLOCATE: ret = io_fallocate_prep(req, sqe); break; + case IORING_OP_OPENAT: + ret = io_openat_prep(req, sqe); + break; default: printk_once(KERN_WARNING "io_uring: unhandled opcode %d\n", req->opcode); @@ -3322,6 +3415,14 @@ static int io_issue_sqe(struct io_kiocb *req, const struct io_uring_sqe *sqe, } ret = io_fallocate(req, nxt, force_nonblock); break; + case IORING_OP_OPENAT: + if (sqe) { + ret = io_openat_prep(req, sqe); + if (ret) + break; + } + ret = io_openat(req, nxt, force_nonblock); + break; default: ret = -EINVAL; break; @@ -3403,7 +3504,7 @@ static bool io_req_op_valid(int op) return op >= IORING_OP_NOP && op < IORING_OP_LAST; } -static int io_req_needs_file(struct io_kiocb *req) +static int io_req_needs_file(struct io_kiocb *req, int fd) { switch (req->opcode) { case IORING_OP_NOP: @@ -3413,6 +3514,8 @@ static int io_req_needs_file(struct io_kiocb *req) case IORING_OP_ASYNC_CANCEL: case IORING_OP_LINK_TIMEOUT: return 0; + case IORING_OP_OPENAT: + return fd != -1; default: if (io_req_op_valid(req->opcode)) return 1; @@ -3442,7 +3545,7 @@ static int io_req_set_file(struct io_submit_state *state, struct io_kiocb *req, if (flags & IOSQE_IO_DRAIN) req->flags |= REQ_F_IO_DRAIN; - ret = io_req_needs_file(req); + ret = io_req_needs_file(req, fd); if (ret <= 0) return ret; diff --git a/include/uapi/linux/io_uring.h b/include/uapi/linux/io_uring.h index bdbe2b130179..02af580754ce 100644 --- a/include/uapi/linux/io_uring.h +++ b/include/uapi/linux/io_uring.h @@ -34,6 +34,7 @@ struct io_uring_sqe { __u32 timeout_flags; __u32 accept_flags; __u32 cancel_flags; + __u32 open_flags; }; __u64 user_data; /* data to be passed back at completion time */ union { @@ -77,6 +78,7 @@ enum { IORING_OP_LINK_TIMEOUT, IORING_OP_CONNECT, IORING_OP_FALLOCATE, + IORING_OP_OPENAT, /* this goes last, obviously */ IORING_OP_LAST, -- 2.24.1