Re: [PATCH v3 08/13] io_uring: implement fixed buffers registration similar to fixed files

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

 



On 18/12/2020 18:07, Bijan Mottahedeh wrote:
> Apply fixed_rsrc functionality for fixed buffers support.

git generated a pretty messy diff...

Because it's do quiesce, fixed read/write access buffers from asynchronous
contexts without synchronisation. That won't work anymore, so

1. either we save it in advance, that would require extra req_async
allocation for linked fixed rw

2. or synchronise whenever async. But that would mean that a request
may get and do IO on two different buffers, that's rotten.

3. do mixed -- lazy, but if do IO then alloc.

3.5 also "synchronise" there would mean uring_lock, that's not welcome,
but we can probably do rcu.

Let me think of a patch...

[...]
> @@ -8373,7 +8433,13 @@ static bool headpage_already_acct(struct io_ring_ctx *ctx, struct page **pages,
>  
>  	/* check previously registered pages */
>  	for (i = 0; i < ctx->nr_user_bufs; i++) {
> -		struct io_mapped_ubuf *imu = &ctx->user_bufs[i];
> +		struct fixed_rsrc_table *table;
> +		struct io_mapped_ubuf *imu;
> +		unsigned int index;
> +
> +		table = &ctx->buf_data->table[i >> IORING_BUF_TABLE_SHIFT];
> +		index = i & IORING_BUF_TABLE_MASK;
> +		imu = &table->bufs[index];

io_buf_from_index() may tak buf_data, so can be reused.

>  
>  		for (j = 0; j < imu->nr_bvecs; j++) {
>  			if (!PageCompound(imu->bvec[j].bv_page))
> @@ -8508,19 +8574,79 @@ static int io_sqe_buffer_register(struct io_ring_ctx *ctx, struct iovec *iov,
>  	return ret;
>  }
>  
> -static int io_buffers_map_alloc(struct io_ring_ctx *ctx, unsigned int nr_args)
> +static void io_free_buf_tables(struct fixed_rsrc_data *buf_data,
> +			       unsigned int nr_tables)
>  {
> -	if (ctx->user_bufs)
> -		return -EBUSY;
> -	if (!nr_args || nr_args > UIO_MAXIOV)
> -		return -EINVAL;
> +	int i;
>  
> -	ctx->user_bufs = kcalloc(nr_args, sizeof(struct io_mapped_ubuf),
> -					GFP_KERNEL);
> -	if (!ctx->user_bufs)
> -		return -ENOMEM;
> +	for (i = 0; i < nr_tables; i++) {
> +		struct fixed_rsrc_table *table = &buf_data->table[i];
>  
> -	return 0;
> +		kfree(table->bufs);
> +	}
> +}
> +
> +static int io_alloc_buf_tables(struct fixed_rsrc_data *buf_data,
> +			       unsigned int nr_tables, unsigned int nr_bufs)
> +{
> +	int i;
> +		

trailing tabs

> +	for (i = 0; i < nr_tables; i++) {
> +		struct fixed_rsrc_table *table = &buf_data->table[i];
> +		unsigned int this_bufs;
> +
> +		this_bufs = min(nr_bufs, IORING_MAX_BUFS_TABLE);
> +		table->bufs = kcalloc(this_bufs, sizeof(struct io_mapped_ubuf),
> +				      GFP_KERNEL);
> +		if (!table->bufs)
> +			break;
> +		nr_bufs -= this_bufs;
> +	}
> +
> +	if (i == nr_tables)
> +		return 0;
> +
> +	io_free_buf_tables(buf_data, nr_tables);

Would work because kcalloc() zeroed buf_data->table, but

io_free_buf_tables(buf_data, __i__);

> +	return 1;
> +}
> +
[...]
>  static int io_sqe_buffers_register(struct io_ring_ctx *ctx, void __user *arg,
>  				   unsigned int nr_args)
>  {
>  	int i, ret;
>  	struct iovec iov;
>  	struct page *last_hpage = NULL;
> +	struct fixed_rsrc_ref_node *ref_node;
> +	struct fixed_rsrc_data *buf_data;
>  
> -	ret = io_buffers_map_alloc(ctx, nr_args);
> -	if (ret)
> -		return ret;
> +	if (ctx->nr_user_bufs)
> +		return -EBUSY;
>  
> -	for (i = 0; i < nr_args; i++) {
> -		struct io_mapped_ubuf *imu = &ctx->user_bufs[i];
> +	buf_data = io_buffers_map_alloc(ctx, nr_args);
> +	if (IS_ERR(buf_data))
> +		return PTR_ERR(buf_data);
> +
> +	for (i = 0; i < nr_args; i++, ctx->nr_user_bufs++) {
> +		struct fixed_rsrc_table *table;
> +		struct io_mapped_ubuf *imu;
> +		unsigned int index;
>  
>  		ret = io_copy_iov(ctx, &iov, arg, i);
>  		if (ret)
>  			break;
>  
> +		/* allow sparse sets */
> +		if (!iov.iov_base && !iov.iov_len)
> +			continue;
> +
>  		ret = io_buffer_validate(&iov);
>  		if (ret)
>  			break;
>  
> +		table = &buf_data->table[i >> IORING_BUF_TABLE_SHIFT];

same, io_buf_from_index() can be reused

> +		index = i & IORING_BUF_TABLE_MASK;
> +		imu = &table->bufs[index];
> +
>  		ret = io_sqe_buffer_register(ctx, &iov, imu, &last_hpage);
>  		if (ret)
>  			break;
> +	}
>  
[...]
> @@ -9854,6 +10023,7 @@ static bool io_register_op_must_quiesce(int op)
>  	switch (op) {
>  	case IORING_UNREGISTER_FILES:
>  	case IORING_REGISTER_FILES_UPDATE:
> +	case IORING_UNREGISTER_BUFFERS:

what about REGISTER_BUFFERS? 

>  	case IORING_REGISTER_PROBE:
>  	case IORING_REGISTER_PERSONALITY:
>  	case IORING_UNREGISTER_PERSONALITY:
> 

-- 
Pavel Begunkov



[Index of Archives]     [Linux Samsung SoC]     [Linux Rockchip SoC]     [Linux Actions SoC]     [Linux for Synopsys ARC Processors]     [Linux NFS]     [Linux NILFS]     [Linux USB Devel]     [Video for Linux]     [Linux Audio Users]     [Yosemite News]     [Linux Kernel]     [Linux SCSI]


  Powered by Linux