Re: [PATCH 2/6] io: define an interface for IO extensions

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

 



"Darrick J. Wong" <darrick.wong@xxxxxxxxxx> writes:

> Define a generic interface to allow userspace to attach metadata to an
> IO operation.  This interface will be used initially to implement
> protection information (PI) pass through, though it ought to be usable
> by anyone else desiring to extend the IO interface.  It should not be
> difficult to modify the non-AIO calls to use this mechanism.

My main issue with this patch is determining what exactly gets returned
to userspace when there is an issue in the teardown_extensions path.
It looks like you'll get the first error propagated from
io_teardown_extensions, others are ignored.  Then, in aio_complete, if
there was no error with the I/O, then you'll get the teardown error
reported in event->res, otherwise you'll get it in event->res2.  So,
what are the valid errors returned by the teardown routine for
extensions?  How is the userspace app supposed to determine where the
error came from, the I/O or a failure in the extension teardown?

I think it may make sense to only use res2 for reporting io extension
teardown failures.  Any new code that will use extensions can certainly
be written to check both res and res2, and this method would prevent the
ambiguity I mentioned.

Finally, I know this is an RFC, but please add some man-page changes to
your patch set, and CC linux-man.  Michael Kerrisk typically has
valuable advice on new APIs.

Cheers,
Jeff

>
> Signed-off-by: Darrick J. Wong <darrick.wong@xxxxxxxxxx>
> ---
>  fs/aio.c                     |  180 +++++++++++++++++++++++++++++++++++++++++-
>  include/linux/aio.h          |    7 ++
>  include/uapi/linux/aio_abi.h |   15 +++-
>  3 files changed, 197 insertions(+), 5 deletions(-)
>
>
> diff --git a/fs/aio.c b/fs/aio.c
> index 062a5f6..0c40bdc 100644
> --- a/fs/aio.c
> +++ b/fs/aio.c
> @@ -158,6 +158,11 @@ static struct vfsmount *aio_mnt;
>  static const struct file_operations aio_ring_fops;
>  static const struct address_space_operations aio_ctx_aops;
>  
> +static int io_teardown_extensions(struct kiocb *req);
> +static int io_setup_extensions(struct kiocb *req, int is_write,
> +			       struct io_extension __user *ioext);
> +static int iocb_setup_extensions(struct iocb *iocb, struct kiocb *req);
> +
>  static struct file *aio_private_file(struct kioctx *ctx, loff_t nr_pages)
>  {
>  	struct qstr this = QSTR_INIT("[aio]", 5);
> @@ -916,6 +921,17 @@ void aio_complete(struct kiocb *iocb, long res, long res2)
>  	struct io_event	*ev_page, *event;
>  	unsigned long	flags;
>  	unsigned tail, pos;
> +	int ret;
> +
> +	ret = io_teardown_extensions(iocb);
> +	if (ret) {
> +		if (!res)
> +			res = ret;
> +		else if (!res2)
> +			res2 = ret;
> +		else
> +			pr_err("error %d tearing down aio extensions\n", ret);
> +	}
>  
>  	/*
>  	 * Special case handling for sync iocbs:
> @@ -1350,15 +1366,167 @@ rw_common:
>  	return 0;
>  }
>  
> +/* IO extension code */
> +#define REQUIRED_STRUCTURE_SIZE(type, member)	\
> +	(offsetof(type, member) + sizeof(((type *)NULL)->member))
> +#define IO_EXT_SIZE(member) \
> +	REQUIRED_STRUCTURE_SIZE(struct io_extension, member)
> +
> +struct io_extension_type {
> +	unsigned int type;
> +	unsigned int extension_struct_size;
> +	int (*setup_fn)(struct kiocb *, int is_write);
> +	int (*destroy_fn)(struct kiocb *);
> +};
> +
> +static struct io_extension_type extensions[] = {
> +	{IO_EXT_INVALID, 0, NULL, NULL},
> +};
> +
> +static int is_write_iocb(struct iocb *iocb)
> +{
> +	switch (iocb->aio_lio_opcode) {
> +	case IOCB_CMD_PWRITE:
> +	case IOCB_CMD_PWRITEV:
> +		return 1;
> +	default:
> +		return 0;
> +	}
> +}
> +
> +static int io_teardown_extensions(struct kiocb *req)
> +{
> +	struct io_extension_type *iet;
> +	int ret, ret2;
> +
> +	if (req->ki_ioext == NULL)
> +		return 0;
> +
> +	/* Shut down all the extensions */
> +	ret = 0;
> +	for (iet = extensions; iet->type != IO_EXT_INVALID; iet++) {
> +		if (!(req->ki_ioext->ke_kern.ie_has & iet->type))
> +			continue;
> +		ret2 = iet->destroy_fn(req);
> +		if (ret2 && !ret)
> +			ret = ret2;
> +	}
> +
> +	/* Copy out return values */
> +	if (unlikely(copy_to_user(req->ki_ioext->ke_user,
> +				  &req->ki_ioext->ke_kern,
> +				  sizeof(struct io_extension)))) {
> +		if (!ret)
> +			ret = -EFAULT;
> +	}
> +
> +	kfree(req->ki_ioext);
> +	req->ki_ioext = NULL;
> +	return ret;
> +}
> +
> +static int io_check_bufsize(__u64 has, __u64 size)
> +{
> +	struct io_extension_type *iet;
> +	__u64 all_flags = 0;
> +
> +	for (iet = extensions; iet->type != IO_EXT_INVALID; iet++) {
> +		all_flags |= iet->type;
> +		if (!(has & iet->type))
> +			continue;
> +		if (iet->extension_struct_size > size)
> +			return -EINVAL;
> +	}
> +
> +	if (has & ~all_flags)
> +		return -EINVAL;
> +
> +	return 0;
> +}
> +
> +static int io_setup_extensions(struct kiocb *req, int is_write,
> +			       struct io_extension __user *ioext)
> +{
> +	struct io_extension_type *iet;
> +	__u64 sz, has;
> +	int ret;
> +
> +	/* Check size of buffer */
> +	if (unlikely(copy_from_user(&sz, &ioext->ie_size, sizeof(sz))))
> +		return -EFAULT;
> +	if (sz > PAGE_SIZE ||
> +	    sz > sizeof(struct io_extension) ||
> +	    sz < IO_EXT_SIZE(ie_has))
> +		return -EINVAL;
> +
> +	/* Check that the buffer's big enough */
> +	if (unlikely(copy_from_user(&has, &ioext->ie_has, sizeof(has))))
> +		return -EFAULT;
> +	ret = io_check_bufsize(has, sz);
> +	if (ret)
> +		return ret;
> +
> +	/* Copy from userland */
> +	req->ki_ioext = kzalloc(sizeof(struct kio_extension), GFP_NOIO);
> +	if (!req->ki_ioext)
> +		return -ENOMEM;
> +
> +	req->ki_ioext->ke_user = ioext;
> +	if (unlikely(copy_from_user(&req->ki_ioext->ke_kern, ioext, sz))) {
> +		ret = -EFAULT;
> +		goto out;
> +	}
> +
> +	/* Try to initialize all the extensions */
> +	has = 0;
> +	for (iet = extensions; iet->type != IO_EXT_INVALID; iet++) {
> +		if (!(req->ki_ioext->ke_kern.ie_has & iet->type))
> +			continue;
> +		ret = iet->setup_fn(req, is_write);
> +		if (ret) {
> +			req->ki_ioext->ke_kern.ie_has = has;
> +			goto out_destroy;
> +		}
> +		has |= iet->type;
> +	}
> +
> +	return 0;
> +out_destroy:
> +	io_teardown_extensions(req);
> +out:
> +	kfree(req->ki_ioext);
> +	req->ki_ioext = NULL;
> +	return ret;
> +}
> +
> +static int iocb_setup_extensions(struct iocb *iocb, struct kiocb *req)
> +{
> +	struct io_extension __user *user_ext;
> +
> +	if (!(iocb->aio_flags & IOCB_FLAG_EXTENSIONS))
> +		return 0;
> +
> +	user_ext = (struct io_extension __user *)iocb->aio_extension_ptr;
> +	return io_setup_extensions(req, is_write_iocb(iocb), user_ext);
> +}
> +
>  static int io_submit_one(struct kioctx *ctx, struct iocb __user *user_iocb,
>  			 struct iocb *iocb, bool compat)
>  {
>  	struct kiocb *req;
>  	ssize_t ret;
>  
> +	/* check for flags we don't know about */
> +	if (iocb->aio_flags & ~IOCB_FLAG_ALL) {
> +		pr_debug("EINVAL: invalid flags\n");
> +		return -EINVAL;
> +	}
> +
>  	/* enforce forwards compatibility on users */
> -	if (unlikely(iocb->aio_reserved1 || iocb->aio_reserved2)) {
> -		pr_debug("EINVAL: reserve field set\n");
> +	if (unlikely(iocb->aio_reserved1 ||
> +	    (!(iocb->aio_flags & IOCB_FLAG_EXTENSIONS) &&
> +	     iocb->aio_extension_ptr))) {
> +		pr_debug("EINVAL: reserved field set\n");
>  		return -EINVAL;
>  	}
>  
> @@ -1408,13 +1576,19 @@ static int io_submit_one(struct kioctx *ctx, struct iocb __user *user_iocb,
>  	req->ki_pos = iocb->aio_offset;
>  	req->ki_nbytes = iocb->aio_nbytes;
>  
> +	ret = iocb_setup_extensions(iocb, req);
> +	if (unlikely(ret))
> +		goto out_del_ext;
> +
>  	ret = aio_run_iocb(req, iocb->aio_lio_opcode,
>  			   (char __user *)(unsigned long)iocb->aio_buf,
>  			   compat);
>  	if (ret)
> -		goto out_put_req;
> +		goto out_del_ext;
>  
>  	return 0;
> +out_del_ext:
> +	io_teardown_extensions(req);
>  out_put_req:
>  	put_reqs_available(ctx, 1);
>  	percpu_ref_put(&ctx->reqs);
> diff --git a/include/linux/aio.h b/include/linux/aio.h
> index d9c92da..60f4364 100644
> --- a/include/linux/aio.h
> +++ b/include/linux/aio.h
> @@ -29,6 +29,10 @@ struct kiocb;
>  
>  typedef int (kiocb_cancel_fn)(struct kiocb *);
>  
> +struct kio_extension {
> +	struct io_extension __user *ke_user;
> +	struct io_extension ke_kern;
> +};
>  struct kiocb {
>  	struct file		*ki_filp;
>  	struct kioctx		*ki_ctx;	/* NULL for sync ops */
> @@ -52,6 +56,9 @@ struct kiocb {
>  	 * this is the underlying eventfd context to deliver events to.
>  	 */
>  	struct eventfd_ctx	*ki_eventfd;
> +
> +	/* Kernel copy of extension descriptors */
> +	struct kio_extension	*ki_ioext;
>  };
>  
>  static inline bool is_sync_kiocb(struct kiocb *kiocb)
> diff --git a/include/uapi/linux/aio_abi.h b/include/uapi/linux/aio_abi.h
> index bb2554f..07ffd1f 100644
> --- a/include/uapi/linux/aio_abi.h
> +++ b/include/uapi/linux/aio_abi.h
> @@ -53,6 +53,8 @@ enum {
>   *                   is valid.
>   */
>  #define IOCB_FLAG_RESFD		(1 << 0)
> +#define IOCB_FLAG_EXTENSIONS	(1 << 1)
> +#define IOCB_FLAG_ALL		(IOCB_FLAG_RESFD | IOCB_FLAG_EXTENSIONS)
>  
>  /* read() from /dev/aio returns these structures. */
>  struct io_event {
> @@ -70,6 +72,15 @@ struct io_event {
>  #error edit for your odd byteorder.
>  #endif
>  
> +/* IO extension types */
> +#define IO_EXT_INVALID	(0)
> +
> +/* IO extension descriptor */
> +struct io_extension {
> +	__u64 ie_size;
> +	__u64 ie_has;
> +};
> +
>  /*
>   * we always use a 64bit off_t when communicating
>   * with userland.  its up to libraries to do the
> @@ -91,8 +102,8 @@ struct iocb {
>  	__u64	aio_nbytes;
>  	__s64	aio_offset;
>  
> -	/* extra parameters */
> -	__u64	aio_reserved2;	/* TODO: use this for a (struct sigevent *) */
> +	/* aio extensions, only present if IOCB_FLAG_EXTENSIONS */
> +	__u64	aio_extension_ptr;
>  
>  	/* flags for the "struct iocb" */
>  	__u32	aio_flags;
>
> --
> To unsubscribe from this list: send the line "unsubscribe linux-fsdevel" in
> the body of a message to majordomo@xxxxxxxxxxxxxxx
> More majordomo info at  http://vger.kernel.org/majordomo-info.html

--
To unsubscribe, send a message with 'unsubscribe linux-mm' in
the body to majordomo@xxxxxxxxx.  For more info on Linux MM,
see: http://www.linux-mm.org/ .
Don't email: <a href=mailto:"dont@xxxxxxxxx";> email@xxxxxxxxx </a>




[Index of Archives]     [Linux ARM Kernel]     [Linux ARM]     [Linux Omap]     [Fedora ARM]     [IETF Annouce]     [Bugtraq]     [Linux]     [Linux OMAP]     [Linux MIPS]     [ECOS]     [Asterisk Internet PBX]     [Linux API]