On Tue, May 14, 2024 at 1:25 PM Chenliang Li <cliang01.li@xxxxxxxxxxx> wrote: > > Modify the original buffer registration and enable the coalescing for > buffers with more than one hugepages. > > Signed-off-by: Chenliang Li <cliang01.li@xxxxxxxxxxx> > --- > io_uring/rsrc.c | 44 ++++++++------------------------------------ > 1 file changed, 8 insertions(+), 36 deletions(-) > > diff --git a/io_uring/rsrc.c b/io_uring/rsrc.c > index 53fac5f27bbf..5e5c1d6f3501 100644 > --- a/io_uring/rsrc.c > +++ b/io_uring/rsrc.c > @@ -1047,7 +1047,7 @@ static int io_sqe_buffer_register(struct io_ring_ctx *ctx, struct iovec *iov, > unsigned long off; > size_t size; > int ret, nr_pages, i; > - struct folio *folio = NULL; > + struct io_imu_folio_data data; > > *pimu = (struct io_mapped_ubuf *)&dummy_ubuf; > if (!iov->iov_base) > @@ -1062,30 +1062,11 @@ static int io_sqe_buffer_register(struct io_ring_ctx *ctx, struct iovec *iov, > goto done; > } > > - /* If it's a huge page, try to coalesce them into a single bvec entry */ > - if (nr_pages > 1) { > - folio = page_folio(pages[0]); > - for (i = 1; i < nr_pages; i++) { > - /* > - * Pages must be consecutive and on the same folio for > - * this to work > - */ > - if (page_folio(pages[i]) != folio || > - pages[i] != pages[i - 1] + 1) { > - folio = NULL; > - break; > - } > - } > - if (folio) { > - /* > - * The pages are bound to the folio, it doesn't > - * actually unpin them but drops all but one reference, > - * which is usually put down by io_buffer_unmap(). > - * Note, needs a better helper. > - */ > - unpin_user_pages(&pages[1], nr_pages - 1); > - nr_pages = 1; > - } > + /* If it's huge page(s), try to coalesce them into fewer bvec entries */ > + if (io_sqe_buffer_try_coalesce(pages, nr_pages, &data)) { > + ret = io_coalesced_imu_alloc(ctx, iov, pimu, last_hpage, > + pages, &data); > + goto done; > } > > imu = kvmalloc(struct_size(imu, bvec, nr_pages), GFP_KERNEL); > @@ -1109,10 +1090,6 @@ static int io_sqe_buffer_register(struct io_ring_ctx *ctx, struct iovec *iov, > *pimu = imu; > ret = 0; > > - if (folio) { > - bvec_set_page(&imu->bvec[0], pages[0], size, off); > - goto done; > - } > for (i = 0; i < nr_pages; i++) { > size_t vec_len; > > @@ -1218,23 +1195,18 @@ int io_import_fixed(int ddir, struct iov_iter *iter, > * we know that: > * > * 1) it's a BVEC iter, we set it up > - * 2) all bvecs are PAGE_SIZE in size, except potentially the > + * 2) all bvecs are the same in size, except potentially the > * first and last bvec > * > * So just find our index, and adjust the iterator afterwards. > * If the offset is within the first bvec (or the whole first > * bvec, just use iov_iter_advance(). This makes it easier > * since we can just skip the first segment, which may not > - * be PAGE_SIZE aligned. > + * be folio_size aligned. > */ > const struct bio_vec *bvec = imu->bvec; > > if (offset < bvec->bv_len) { > - /* > - * Note, huge pages buffers consists of one large > - * bvec entry and should always go this way. The other > - * branch doesn't expect non PAGE_SIZE'd chunks. > - */ > iter->bvec = bvec; > iter->nr_segs = bvec->bv_len; > iter->count -= offset; > -- > 2.34.1 > > Looks good. Reviewed-by: Anuj Gupta <anuj20.g@xxxxxxxxxxx> -- Anuj Gupta