Re: [PATCH 6/8] io_uring: support buffer registration updates

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

 



On 12/11/2020 23:00, Bijan Mottahedeh wrote:
> Introduce IORING_REGISTER_BUFFERS_UPDATE and IORING_OP_BUFFERS_UPDATE,
> consistent with file registration update.

I'd prefer to not add a new opcode for each new resource. Can we have
only IORING_OP_RESOURCE_UPDATE and multiplex inside? Even better if you
could fit all into IORING_OP_FILES_UPDATE and then

#define IORING_OP_RESOURCE_UPDATE IORING_OP_FILES_UPDATE

Jens, what do you think?

> 
> Signed-off-by: Bijan Mottahedeh <bijan.mottahedeh@xxxxxxxxxx>
> ---
>  fs/io_uring.c                 | 139 +++++++++++++++++++++++++++++++++++++++++-
>  include/uapi/linux/io_uring.h |   8 +--
>  2 files changed, 140 insertions(+), 7 deletions(-)
> 
> diff --git a/fs/io_uring.c b/fs/io_uring.c
> index 71f6d5c..6020fd2 100644
> --- a/fs/io_uring.c
> +++ b/fs/io_uring.c
> @@ -1006,6 +1006,9 @@ struct io_op_def {
>  		.work_flags		= IO_WQ_WORK_MM | IO_WQ_WORK_FILES |
>  						IO_WQ_WORK_FS | IO_WQ_WORK_BLKCG,
>  	},
> +	[IORING_OP_BUFFERS_UPDATE] = {
> +		.work_flags		= IO_WQ_WORK_MM,
> +	},
>  };
>  
>  enum io_mem_account {
> @@ -1025,6 +1028,9 @@ static void __io_complete_rw(struct io_kiocb *req, long res, long res2,
>  static int __io_sqe_files_update(struct io_ring_ctx *ctx,
>  				 struct io_uring_rsrc_update *ip,
>  				 unsigned nr_args);
> +static int __io_sqe_buffers_update(struct io_ring_ctx *ctx,
> +				   struct io_uring_rsrc_update *up,
> +				   unsigned nr_args);
>  static void __io_clean_op(struct io_kiocb *req);
>  static struct file *io_file_get(struct io_submit_state *state,
>  				struct io_kiocb *req, int fd, bool fixed);
> @@ -5939,6 +5945,19 @@ static void destroy_fixed_rsrc_ref_node(struct fixed_rsrc_ref_node *ref_node)
>  	percpu_ref_exit(&ref_node->refs);
>  	kfree(ref_node);
>  }
> +
> +static int io_buffers_update_prep(struct io_kiocb *req,
> +				  const struct io_uring_sqe *sqe)
> +{
> +	return io_rsrc_update_prep(req, sqe);
> +}
> +
> +static int io_buffers_update(struct io_kiocb *req, bool force_nonblock,
> +			     struct io_comp_state *cs)
> +{
> +	return io_rsrc_update(req, force_nonblock, cs, __io_sqe_buffers_update);
> +}
> +
>  static int io_req_prep(struct io_kiocb *req, const struct io_uring_sqe *sqe)
>  {
>  	switch (req->opcode) {
> @@ -6010,11 +6029,13 @@ static int io_req_prep(struct io_kiocb *req, const struct io_uring_sqe *sqe)
>  		return io_renameat_prep(req, sqe);
>  	case IORING_OP_UNLINKAT:
>  		return io_unlinkat_prep(req, sqe);
> +	case IORING_OP_BUFFERS_UPDATE:
> +		return io_buffers_update_prep(req, sqe);
>  	}
>  
>  	printk_once(KERN_WARNING "io_uring: unhandled opcode %d\n",
>  			req->opcode);
> -	return-EINVAL;
> +	return -EINVAL;
>  }
>  
>  static int io_req_defer_prep(struct io_kiocb *req,
> @@ -6268,6 +6289,9 @@ static int io_issue_sqe(struct io_kiocb *req, bool force_nonblock,
>  	case IORING_OP_UNLINKAT:
>  		ret = io_unlinkat(req, force_nonblock);
>  		break;
> +	case IORING_OP_BUFFERS_UPDATE:
> +		ret = io_buffers_update(req, force_nonblock, cs);
> +		break;
>  	default:
>  		ret = -EINVAL;
>  		break;
> @@ -8224,6 +8248,7 @@ static void io_buffer_unmap(struct io_ring_ctx *ctx, struct io_mapped_ubuf *imu)
>  	if (imu->acct_pages)
>  		io_unaccount_mem(ctx, imu->nr_bvecs, ACCT_PINNED);
>  	kvfree(imu->bvec);
> +	imu->bvec = NULL;
>  	imu->nr_bvecs = 0;
>  }
>  
> @@ -8441,6 +8466,7 @@ static int io_sqe_buffer_register(struct io_ring_ctx *ctx, struct iovec *iov,
>  		if (pret > 0)
>  			unpin_user_pages(pages, pret);
>  		kvfree(imu->bvec);
> +		imu->bvec = NULL;
>  		goto done;
>  	}
>  
> @@ -8602,6 +8628,8 @@ static void io_buf_data_ref_zero(struct percpu_ref *ref)
>  static void io_ring_buf_put(struct io_ring_ctx *ctx, struct io_rsrc_put *prsrc)
>  {
>  	io_buffer_unmap(ctx, prsrc->buf);
> +	kvfree(prsrc->buf);
> +	prsrc->buf = NULL;
>  }
>  
>  static struct fixed_rsrc_ref_node *alloc_fixed_buf_ref_node(
> @@ -8684,6 +8712,111 @@ static int io_sqe_buffers_register(struct io_ring_ctx *ctx, void __user *arg,
>  	return 0;
>  }
>  
> +static inline int io_queue_buffer_removal(struct fixed_rsrc_data *data,
> +					  struct io_mapped_ubuf *imu)
> +{
> +	return io_queue_rsrc_removal(data, (void *)imu);
> +}
> +
> +static void destroy_fixed_buf_ref_node(struct fixed_rsrc_ref_node *ref_node)
> +{
> +	destroy_fixed_rsrc_ref_node(ref_node);
> +}
> +
> +static int __io_sqe_buffers_update(struct io_ring_ctx *ctx,
> +				   struct io_uring_rsrc_update *up,
> +				   unsigned nr_args)
> +{
> +	struct fixed_rsrc_data *data = ctx->buf_data;
> +	struct fixed_rsrc_ref_node *ref_node;
> +	struct io_mapped_ubuf *imu;
> +	struct iovec iov;
> +	struct iovec __user *iovs;
> +	struct page *last_hpage = NULL;
> +	__u32 done;
> +	int i, err;
> +	bool needs_switch = false;
> +
> +	if (check_add_overflow(up->offset, nr_args, &done))
> +		return -EOVERFLOW;
> +	if (done > ctx->nr_user_bufs)
> +		return -EINVAL;
> +
> +	ref_node = alloc_fixed_buf_ref_node(ctx);
> +	if (IS_ERR(ref_node))
> +		return PTR_ERR(ref_node);
> +
> +	done = 0;
> +	iovs = u64_to_user_ptr(up->iovs);
> +	while (nr_args) {
> +		struct fixed_rsrc_table *table;
> +		unsigned index;
> +
> +		err = 0;
> +		if (copy_from_user(&iov, &iovs[done], sizeof(iov))) {
> +			err = -EFAULT;
> +			break;
> +		}
> +		i = array_index_nospec(up->offset, ctx->nr_user_bufs);
> +		table = &ctx->buf_data->table[i >> IORING_BUF_TABLE_SHIFT];
> +		index = i & IORING_BUF_TABLE_MASK;
> +		imu = &table->bufs[index];
> +		if (table->bufs[index].ubuf) {
> +			struct io_mapped_ubuf *dup;
> +			dup = kmemdup(imu, sizeof(*imu), GFP_KERNEL);
> +			if (!dup) {
> +				err = -ENOMEM;
> +				break;
> +			}
> +			err = io_queue_buffer_removal(data, dup);
> +			if (err)
> +				break;
> +			memset(imu, 0, sizeof(*imu));
> +			needs_switch = true;
> +		}
> +		if (!io_buffer_validate(&iov)) {
> +			err = io_sqe_buffer_register(ctx, &iov, imu,
> +						     &last_hpage);
> +			if (err) {
> +				memset(imu, 0, sizeof(*imu));
> +				break;
> +			}
> +		}
> +		nr_args--;
> +		done++;
> +		up->offset++;
> +	}
> +
> +	if (needs_switch) {
> +		percpu_ref_kill(&data->node->refs);
> +		spin_lock(&data->lock);
> +		list_add(&ref_node->node, &data->ref_list);
> +		data->node = ref_node;
> +		spin_unlock(&data->lock);
> +		percpu_ref_get(&ctx->buf_data->refs);
> +	} else
> +		destroy_fixed_buf_ref_node(ref_node);
> +
> +	return done ? done : err;
> +}
> +
> +static int io_sqe_buffers_update(struct io_ring_ctx *ctx, void __user *arg,
> +				 unsigned nr_args)
> +{
> +	struct io_uring_rsrc_update up;
> +
> +	if (!ctx->buf_data)
> +		return -ENXIO;
> +	if (!nr_args)
> +		return -EINVAL;
> +	if (copy_from_user(&up, arg, sizeof(up)))
> +		return -EFAULT;
> +	if (up.resv)
> +		return -EINVAL;
> +
> +	return __io_sqe_buffers_update(ctx, &up, nr_args);
> +}
> +
>  static int io_eventfd_register(struct io_ring_ctx *ctx, void __user *arg)
>  {
>  	__s32 __user *fds = arg;
> @@ -9961,6 +10094,7 @@ static bool io_register_op_must_quiesce(int op)
>  	switch (op) {
>  	case IORING_UNREGISTER_FILES:
>  	case IORING_REGISTER_FILES_UPDATE:
> +	case IORING_REGISTER_BUFFERS_UPDATE:
>  	case IORING_REGISTER_PROBE:
>  	case IORING_REGISTER_PERSONALITY:
>  	case IORING_UNREGISTER_PERSONALITY:
> @@ -10036,6 +10170,9 @@ static int __io_uring_register(struct io_ring_ctx *ctx, unsigned opcode,
>  			break;
>  		ret = io_sqe_buffers_unregister(ctx);
>  		break;
> +	case IORING_REGISTER_BUFFERS_UPDATE:
> +		ret = io_sqe_buffers_update(ctx, arg, nr_args);
> +		break;
>  	case IORING_REGISTER_FILES:
>  		ret = io_sqe_files_register(ctx, arg, nr_args);
>  		break;
> diff --git a/include/uapi/linux/io_uring.h b/include/uapi/linux/io_uring.h
> index 87f0f56..17682b5 100644
> --- a/include/uapi/linux/io_uring.h
> +++ b/include/uapi/linux/io_uring.h
> @@ -137,6 +137,7 @@ enum {
>  	IORING_OP_SHUTDOWN,
>  	IORING_OP_RENAMEAT,
>  	IORING_OP_UNLINKAT,
> +	IORING_OP_BUFFERS_UPDATE,
>  
>  	/* this goes last, obviously */
>  	IORING_OP_LAST,
> @@ -279,17 +280,12 @@ enum {
>  	IORING_UNREGISTER_PERSONALITY		= 10,
>  	IORING_REGISTER_RESTRICTIONS		= 11,
>  	IORING_REGISTER_ENABLE_RINGS		= 12,
> +	IORING_REGISTER_BUFFERS_UPDATE		= 13,
>  
>  	/* this goes last */
>  	IORING_REGISTER_LAST
>  };
>  
> -struct io_uring_files_update {
> -	__u32 offset;
> -	__u32 resv;
> -	__aligned_u64 /* __s32 * */ fds;
> -};
> -
>  struct io_uring_rsrc_update {
>  	__u32 offset;
>  	__u32 resv;
> 

-- 
Pavel Begunkov



[Index of Archives]     [Linux Samsung SoC]     [Linux Rockchip SoC]     [Linux Actions SoC]     [Linux for Synopsys ARC Processors]     [Linux NFS]     [Linux NILFS]     [Linux USB Devel]     [Video for Linux]     [Linux Audio Users]     [Yosemite News]     [Linux Kernel]     [Linux SCSI]


  Powered by Linux