On 18/12/2020 18:07, Bijan Mottahedeh wrote: > Apply fixed_rsrc functionality for fixed buffers support. git generated a pretty messy diff... Because it's do quiesce, fixed read/write access buffers from asynchronous contexts without synchronisation. That won't work anymore, so 1. either we save it in advance, that would require extra req_async allocation for linked fixed rw 2. or synchronise whenever async. But that would mean that a request may get and do IO on two different buffers, that's rotten. 3. do mixed -- lazy, but if do IO then alloc. 3.5 also "synchronise" there would mean uring_lock, that's not welcome, but we can probably do rcu. Let me think of a patch... [...] > @@ -8373,7 +8433,13 @@ static bool headpage_already_acct(struct io_ring_ctx *ctx, struct page **pages, > > /* check previously registered pages */ > for (i = 0; i < ctx->nr_user_bufs; i++) { > - struct io_mapped_ubuf *imu = &ctx->user_bufs[i]; > + struct fixed_rsrc_table *table; > + struct io_mapped_ubuf *imu; > + unsigned int index; > + > + table = &ctx->buf_data->table[i >> IORING_BUF_TABLE_SHIFT]; > + index = i & IORING_BUF_TABLE_MASK; > + imu = &table->bufs[index]; io_buf_from_index() may tak buf_data, so can be reused. > > for (j = 0; j < imu->nr_bvecs; j++) { > if (!PageCompound(imu->bvec[j].bv_page)) > @@ -8508,19 +8574,79 @@ static int io_sqe_buffer_register(struct io_ring_ctx *ctx, struct iovec *iov, > return ret; > } > > -static int io_buffers_map_alloc(struct io_ring_ctx *ctx, unsigned int nr_args) > +static void io_free_buf_tables(struct fixed_rsrc_data *buf_data, > + unsigned int nr_tables) > { > - if (ctx->user_bufs) > - return -EBUSY; > - if (!nr_args || nr_args > UIO_MAXIOV) > - return -EINVAL; > + int i; > > - ctx->user_bufs = kcalloc(nr_args, sizeof(struct io_mapped_ubuf), > - GFP_KERNEL); > - if (!ctx->user_bufs) > - return -ENOMEM; > + for (i = 0; i < nr_tables; i++) { > + struct fixed_rsrc_table *table = &buf_data->table[i]; > > - return 0; > + kfree(table->bufs); > + } > +} > + > +static int io_alloc_buf_tables(struct fixed_rsrc_data *buf_data, > + unsigned int nr_tables, unsigned int nr_bufs) > +{ > + int i; > + trailing tabs > + for (i = 0; i < nr_tables; i++) { > + struct fixed_rsrc_table *table = &buf_data->table[i]; > + unsigned int this_bufs; > + > + this_bufs = min(nr_bufs, IORING_MAX_BUFS_TABLE); > + table->bufs = kcalloc(this_bufs, sizeof(struct io_mapped_ubuf), > + GFP_KERNEL); > + if (!table->bufs) > + break; > + nr_bufs -= this_bufs; > + } > + > + if (i == nr_tables) > + return 0; > + > + io_free_buf_tables(buf_data, nr_tables); Would work because kcalloc() zeroed buf_data->table, but io_free_buf_tables(buf_data, __i__); > + return 1; > +} > + [...] > static int io_sqe_buffers_register(struct io_ring_ctx *ctx, void __user *arg, > unsigned int nr_args) > { > int i, ret; > struct iovec iov; > struct page *last_hpage = NULL; > + struct fixed_rsrc_ref_node *ref_node; > + struct fixed_rsrc_data *buf_data; > > - ret = io_buffers_map_alloc(ctx, nr_args); > - if (ret) > - return ret; > + if (ctx->nr_user_bufs) > + return -EBUSY; > > - for (i = 0; i < nr_args; i++) { > - struct io_mapped_ubuf *imu = &ctx->user_bufs[i]; > + buf_data = io_buffers_map_alloc(ctx, nr_args); > + if (IS_ERR(buf_data)) > + return PTR_ERR(buf_data); > + > + for (i = 0; i < nr_args; i++, ctx->nr_user_bufs++) { > + struct fixed_rsrc_table *table; > + struct io_mapped_ubuf *imu; > + unsigned int index; > > ret = io_copy_iov(ctx, &iov, arg, i); > if (ret) > break; > > + /* allow sparse sets */ > + if (!iov.iov_base && !iov.iov_len) > + continue; > + > ret = io_buffer_validate(&iov); > if (ret) > break; > > + table = &buf_data->table[i >> IORING_BUF_TABLE_SHIFT]; same, io_buf_from_index() can be reused > + index = i & IORING_BUF_TABLE_MASK; > + imu = &table->bufs[index]; > + > ret = io_sqe_buffer_register(ctx, &iov, imu, &last_hpage); > if (ret) > break; > + } > [...] > @@ -9854,6 +10023,7 @@ static bool io_register_op_must_quiesce(int op) > switch (op) { > case IORING_UNREGISTER_FILES: > case IORING_REGISTER_FILES_UPDATE: > + case IORING_UNREGISTER_BUFFERS: what about REGISTER_BUFFERS? > case IORING_REGISTER_PROBE: > case IORING_REGISTER_PERSONALITY: > case IORING_UNREGISTER_PERSONALITY: > -- Pavel Begunkov