From: Árni Dagur <arni@xxxxxxxx> * The `sqe->splice_flags` field is used to hold flags. * We return -EAGAIN if force_nonblock is set. Signed-off-by: Árni Dagur <arni@xxxxxxxx> --- fs/io_uring.c | 76 +++++++++++++++++++++++++++++++++++ include/uapi/linux/io_uring.h | 1 + 2 files changed, 77 insertions(+) diff --git a/fs/io_uring.c b/fs/io_uring.c index ca46f314640b..a99a89798386 100644 --- a/fs/io_uring.c +++ b/fs/io_uring.c @@ -531,6 +531,13 @@ struct io_splice { unsigned int flags; }; +struct io_vmsplice { + struct file *file; + u64 addr; + u64 len; + unsigned int flags; +}; + struct io_provide_buf { struct file *file; __u64 addr; @@ -692,6 +699,7 @@ struct io_kiocb { struct io_madvise madvise; struct io_epoll epoll; struct io_splice splice; + struct io_vmsplice vmsplice; struct io_provide_buf pbuf; struct io_statx statx; struct io_shutdown shutdown; @@ -967,6 +975,12 @@ static const struct io_op_def io_op_defs[] = { .unbound_nonreg_file = 1, .work_flags = IO_WQ_WORK_BLKCG, }, + [IORING_OP_VMSPLICE] = { + .needs_file = 1, + .hash_reg_file = 1, + .unbound_nonreg_file = 1, + .work_flags = IO_WQ_WORK_MM, + }, [IORING_OP_PROVIDE_BUFFERS] = {}, [IORING_OP_REMOVE_BUFFERS] = {}, [IORING_OP_TEE] = { @@ -3884,6 +3898,63 @@ static int io_splice(struct io_kiocb *req, bool force_nonblock) return 0; } +static int io_vmsplice_prep(struct io_kiocb *req, const struct io_uring_sqe *sqe) +{ + struct io_vmsplice *sp = &req->vmsplice; + + if (unlikely(req->ctx->flags & IORING_SETUP_IOPOLL)) + return -EINVAL; + if (unlikely(READ_ONCE(sqe->off))) + return -EINVAL; + + sp->addr = READ_ONCE(sqe->addr); + sp->len = READ_ONCE(sqe->len); + sp->flags = READ_ONCE(sqe->splice_flags); + + if (sp->flags & ~SPLICE_F_ALL) + return -EINVAL; + + return 0; +} + +static int io_vmsplice(struct io_kiocb *req, bool force_nonblock) +{ + struct iovec inline_vecs[UIO_FASTIOV], *iovec = inline_vecs; + struct io_vmsplice *sp = &req->vmsplice; + void __user *buf = u64_to_user_ptr(sp->addr); + struct iov_iter __iter, *iter = &__iter; + struct file *file = sp->file; + ssize_t io_size; + int type, ret; + + if (force_nonblock) + return -EAGAIN; + + if (file->f_mode & FMODE_WRITE) + type = WRITE; + else if (file->f_mode & FMODE_READ) + type = READ; + else { + ret = -EBADF; + goto err; + } + + ret = __import_iovec(type, buf, sp->len, UIO_FASTIOV, &iovec, iter, + req->ctx->compat); + if (ret < 0) + goto err; + io_size = iov_iter_count(iter); + + ret = do_vmsplice(file, iter, sp->flags); + if (ret != io_size) { +err: + req_set_fail_links(req); + } + io_req_complete(req, ret); + kfree(iovec); + return 0; +} + /* * IORING_OP_NOP just posts a completion event, nothing else. */ @@ -6009,6 +6080,8 @@ static int io_req_prep(struct io_kiocb *req, const struct io_uring_sqe *sqe) return io_epoll_ctl_prep(req, sqe); case IORING_OP_SPLICE: return io_splice_prep(req, sqe); + case IORING_OP_VMSPLICE: + return io_vmsplice_prep(req, sqe); case IORING_OP_PROVIDE_BUFFERS: return io_provide_buffers_prep(req, sqe); case IORING_OP_REMOVE_BUFFERS: @@ -6262,6 +6335,9 @@ static int io_issue_sqe(struct io_kiocb *req, bool force_nonblock, case IORING_OP_SPLICE: ret = io_splice(req, force_nonblock); break; + case IORING_OP_VMSPLICE: + ret = io_vmsplice(req, force_nonblock); + break; case IORING_OP_PROVIDE_BUFFERS: ret = io_provide_buffers(req, force_nonblock, cs); break; diff --git a/include/uapi/linux/io_uring.h b/include/uapi/linux/io_uring.h index d31a2a1e8ef9..6bc79f9bb123 100644 --- a/include/uapi/linux/io_uring.h +++ b/include/uapi/linux/io_uring.h @@ -137,6 +137,7 @@ enum { IORING_OP_SHUTDOWN, IORING_OP_RENAMEAT, IORING_OP_UNLINKAT, + IORING_OP_VMSPLICE, /* this goes last, obviously */ IORING_OP_LAST, -- 2.30.0