Re: [PATCH 2/2] io_uring: Add vmsplice support

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

 



On 05/01/2021 23:00, arni@xxxxxxxx wrote:
> From: Árni Dagur <arni@xxxxxxxx>
> 
> * The `sqe->splice_flags` field is used to hold flags.
> * We return -EAGAIN if force_nonblock is set.
> 
> Signed-off-by: Árni Dagur <arni@xxxxxxxx>
> ---
>  fs/io_uring.c                 | 76 +++++++++++++++++++++++++++++++++++
>  include/uapi/linux/io_uring.h |  1 +
>  2 files changed, 77 insertions(+)
> 
> diff --git a/fs/io_uring.c b/fs/io_uring.c
> index ca46f314640b..a99a89798386 100644
> --- a/fs/io_uring.c
> +++ b/fs/io_uring.c
> @@ -531,6 +531,13 @@ struct io_splice {
>  	unsigned int			flags;
>  };
>  
> +struct io_vmsplice {
> +	struct file			*file;
> +	u64				addr;
> +	u64				len;
> +	unsigned int			flags;
> +};
> +
>  struct io_provide_buf {
>  	struct file			*file;
>  	__u64				addr;
> @@ -692,6 +699,7 @@ struct io_kiocb {
>  		struct io_madvise	madvise;
>  		struct io_epoll		epoll;
>  		struct io_splice	splice;
> +		struct io_vmsplice	vmsplice;
>  		struct io_provide_buf	pbuf;
>  		struct io_statx		statx;
>  		struct io_shutdown	shutdown;
> @@ -967,6 +975,12 @@ static const struct io_op_def io_op_defs[] = {
>  		.unbound_nonreg_file	= 1,
>  		.work_flags		= IO_WQ_WORK_BLKCG,
>  	},
> +	[IORING_OP_VMSPLICE] = {
> +		.needs_file = 1,
> +		.hash_reg_file		= 1,
> +		.unbound_nonreg_file	= 1,
> +		.work_flags		= IO_WQ_WORK_MM,
> +	},
>  	[IORING_OP_PROVIDE_BUFFERS] = {},
>  	[IORING_OP_REMOVE_BUFFERS] = {},
>  	[IORING_OP_TEE] = {
> @@ -3884,6 +3898,63 @@ static int io_splice(struct io_kiocb *req, bool force_nonblock)
>  	return 0;
>  }
>  
> +static int io_vmsplice_prep(struct io_kiocb *req, const struct io_uring_sqe *sqe)
> +{
> +	struct io_vmsplice *sp = &req->vmsplice;
> +
> +	if (unlikely(req->ctx->flags & IORING_SETUP_IOPOLL))
> +		return -EINVAL;
> +	if (unlikely(READ_ONCE(sqe->off)))
> +		return -EINVAL;
> +
> +	sp->addr = READ_ONCE(sqe->addr);
> +	sp->len = READ_ONCE(sqe->len);
> +	sp->flags = READ_ONCE(sqe->splice_flags);
> +
> +	if (sp->flags & ~SPLICE_F_ALL)
> +		return -EINVAL;
> +
> +	return 0;
> +}
> +
> +static int io_vmsplice(struct io_kiocb *req, bool force_nonblock)
> +{
> +	struct iovec inline_vecs[UIO_FASTIOV], *iovec = inline_vecs;
> +	struct io_vmsplice *sp = &req->vmsplice;
> +	void __user *buf = u64_to_user_ptr(sp->addr);

const struct iovec __user *uiov

> +	struct iov_iter __iter, *iter = &__iter;

read/write either use ((struct io_async_rw *)req->async_data)->iter
or to avoid allocation use an on-stack iter. This only has that
on-stack __iter, so why do you need *iter?

> +	struct file *file = sp->file;
> +	ssize_t io_size;
> +	int type, ret;
> +
> +	if (force_nonblock)
> +		return -EAGAIN;
> +
> +	if (file->f_mode & FMODE_WRITE)
> +		type = WRITE;
> +	else if (file->f_mode & FMODE_READ)
> +		type = READ;
> +	else {
> +		ret = -EBADF;
> +		goto err;

it jumps to kfree(iovec), where iovec=inline_vecs

> +	}
> +
> +	ret = __import_iovec(type, buf, sp->len, UIO_FASTIOV, &iovec, iter,
> +				req->ctx->compat);

This may happen asynchronously long after io_uring_enter(submit)
returned, e.g. if a user keeps uiov on-stack it will fail or read
garbage.

So, it's either to make it a part of ABI -- users must not delete
uiov until the request completion, or copy it while not-yet-async.
For consistency with read/write I'd prefer the second.

> +	if (ret < 0)
> +		goto err;
> +	io_size = iov_iter_count(iter);
> +
> +	ret = do_vmsplice(file, iter, sp->flags);
> +	if (ret != io_size) {
> +err:
> +		req_set_fail_links(req);
> +	}
> +	io_req_complete(req, ret);
> +	kfree(iovec);
> +	return 0;
> +}
> +
>  /*
>   * IORING_OP_NOP just posts a completion event, nothing else.
>   */
> @@ -6009,6 +6080,8 @@ static int io_req_prep(struct io_kiocb *req, const struct io_uring_sqe *sqe)
>  		return io_epoll_ctl_prep(req, sqe);
>  	case IORING_OP_SPLICE:
>  		return io_splice_prep(req, sqe);
> +	case IORING_OP_VMSPLICE:
> +		return io_vmsplice_prep(req, sqe);
>  	case IORING_OP_PROVIDE_BUFFERS:
>  		return io_provide_buffers_prep(req, sqe);
>  	case IORING_OP_REMOVE_BUFFERS:
> @@ -6262,6 +6335,9 @@ static int io_issue_sqe(struct io_kiocb *req, bool force_nonblock,
>  	case IORING_OP_SPLICE:
>  		ret = io_splice(req, force_nonblock);
>  		break;
> +	case IORING_OP_VMSPLICE:
> +		ret = io_vmsplice(req, force_nonblock);
> +		break;
>  	case IORING_OP_PROVIDE_BUFFERS:
>  		ret = io_provide_buffers(req, force_nonblock, cs);
>  		break;
> diff --git a/include/uapi/linux/io_uring.h b/include/uapi/linux/io_uring.h
> index d31a2a1e8ef9..6bc79f9bb123 100644
> --- a/include/uapi/linux/io_uring.h
> +++ b/include/uapi/linux/io_uring.h
> @@ -137,6 +137,7 @@ enum {
>  	IORING_OP_SHUTDOWN,
>  	IORING_OP_RENAMEAT,
>  	IORING_OP_UNLINKAT,
> +	IORING_OP_VMSPLICE,
>  
>  	/* this goes last, obviously */
>  	IORING_OP_LAST,
> 

-- 
Pavel Begunkov



[Index of Archives]     [Linux Samsung SoC]     [Linux Rockchip SoC]     [Linux Actions SoC]     [Linux for Synopsys ARC Processors]     [Linux NFS]     [Linux NILFS]     [Linux USB Devel]     [Video for Linux]     [Linux Audio Users]     [Yosemite News]     [Linux Kernel]     [Linux SCSI]


  Powered by Linux