Re: [PATCH 3/3] io_uring: add IORING_OP_MADVISE

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

 



On 10/01/2020 18:47, Jens Axboe wrote:
> This adds support for doing madvise(2) through io_uring. We assume that
> any operation can block, and hence punt everything async. This could be
> improved, but hard to make bullet proof. The async punt ensures it's
> safe.
> 
I don't like that it share structs/fields names with fadvise. E.g. madvise's
context is called struct io_fadvise. Could it at least have fadvise_advice filed
in struct io_uring_sqe? io_uring parts of the patchset look good.

Reviewed-by: Pavel Begunkov <asml.silence@xxxxxxxxx>

> Signed-off-by: Jens Axboe <axboe@xxxxxxxxx>
> ---
>  fs/io_uring.c                 | 56 ++++++++++++++++++++++++++++++++++-
>  include/uapi/linux/io_uring.h |  1 +
>  2 files changed, 56 insertions(+), 1 deletion(-)
> 
> diff --git a/fs/io_uring.c b/fs/io_uring.c
> index 0b200a7d4ae0..378f97cc2bf2 100644
> --- a/fs/io_uring.c
> +++ b/fs/io_uring.c
> @@ -403,7 +403,10 @@ struct io_files_update {
>  
>  struct io_fadvise {
>  	struct file			*file;
> -	u64				offset;
> +	union {
> +		u64			offset;
> +		u64			addr;
> +	};
>  	u32				len;
>  	u32				advice;
>  };
> @@ -682,6 +685,10 @@ static const struct io_op_def io_op_defs[] = {
>  		/* IORING_OP_FADVISE */
>  		.needs_file		= 1,
>  	},
> +	{
> +		/* IORING_OP_MADVISE */
> +		.needs_mm		= 1,
> +	},
>  };
>  
>  static void io_wq_submit_work(struct io_wq_work **workptr);
> @@ -2448,6 +2455,42 @@ static int io_openat(struct io_kiocb *req, struct io_kiocb **nxt,
>  	return 0;
>  }
>  
> +static int io_madvise_prep(struct io_kiocb *req, const struct io_uring_sqe *sqe)
> +{
> +#if defined(CONFIG_ADVISE_SYSCALLS) && defined(CONFIG_MMU)
> +	if (sqe->ioprio || sqe->buf_index || sqe->off)
> +		return -EINVAL;
> +
> +	req->fadvise.addr = READ_ONCE(sqe->addr);
> +	req->fadvise.len = READ_ONCE(sqe->len);
> +	req->fadvise.advice = READ_ONCE(sqe->fadvise_advice);
> +	return 0;
> +#else
> +	return -EOPNOTSUPP;
> +#endif
> +}
> +
> +static int io_madvise(struct io_kiocb *req, struct io_kiocb **nxt,
> +		      bool force_nonblock)
> +{
> +#if defined(CONFIG_ADVISE_SYSCALLS) && defined(CONFIG_MMU)
> +	struct io_fadvise *fa = &req->fadvise;
> +	int ret;
> +
> +	if (force_nonblock)
> +		return -EAGAIN;
> +
> +	ret = do_madvise(fa->addr, fa->len, fa->advice);
> +	if (ret < 0)
> +		req_set_fail_links(req);
> +	io_cqring_add_event(req, ret);
> +	io_put_req_find_next(req, nxt);
> +	return 0;
> +#else
> +	return -EOPNOTSUPP;
> +#endif
> +}
> +
>  static int io_fadvise_prep(struct io_kiocb *req, const struct io_uring_sqe *sqe)
>  {
>  	if (sqe->ioprio || sqe->buf_index || sqe->addr)
> @@ -3769,6 +3812,9 @@ static int io_req_defer_prep(struct io_kiocb *req,
>  	case IORING_OP_FADVISE:
>  		ret = io_fadvise_prep(req, sqe);
>  		break;
> +	case IORING_OP_MADVISE:
> +		ret = io_madvise_prep(req, sqe);
> +		break;
>  	default:
>  		printk_once(KERN_WARNING "io_uring: unhandled opcode %d\n",
>  				req->opcode);
> @@ -3973,6 +4019,14 @@ static int io_issue_sqe(struct io_kiocb *req, const struct io_uring_sqe *sqe,
>  		}
>  		ret = io_fadvise(req, nxt, force_nonblock);
>  		break;
> +	case IORING_OP_MADVISE:
> +		if (sqe) {
> +			ret = io_madvise_prep(req, sqe);
> +			if (ret)
> +				break;
> +		}
> +		ret = io_madvise(req, nxt, force_nonblock);
> +		break;
>  	default:
>  		ret = -EINVAL;
>  		break;
> diff --git a/include/uapi/linux/io_uring.h b/include/uapi/linux/io_uring.h
> index f87d8fb42916..7cb6fe0fccd7 100644
> --- a/include/uapi/linux/io_uring.h
> +++ b/include/uapi/linux/io_uring.h
> @@ -88,6 +88,7 @@ enum {
>  	IORING_OP_READ,
>  	IORING_OP_WRITE,
>  	IORING_OP_FADVISE,
> +	IORING_OP_MADVISE,
>  
>  	/* this goes last, obviously */
>  	IORING_OP_LAST,
> 

-- 
Pavel Begunkov

Attachment: signature.asc
Description: OpenPGP digital signature


[Index of Archives]     [Linux Ext4 Filesystem]     [Union Filesystem]     [Filesystem Testing]     [Ceph Users]     [Ecryptfs]     [AutoFS]     [Kernel Newbies]     [Share Photos]     [Security]     [Netfilter]     [Bugtraq]     [Yosemite News]     [MIPS Linux]     [ARM Linux]     [Linux Security]     [Linux Cachefs]     [Reiser Filesystem]     [Linux RAID]     [Samba]     [Device Mapper]     [CEPH Development]

  Powered by Linux