This patch depends on patch 1, 2, 3. It modifies the original buffer registration path to expand the one-hugepage coalescing feature to work with multi-hugepage buffers. Separated from previous patches to make it more easily reviewed. Signed-off-by: Chenliang Li <cliang01.li@xxxxxxxxxxx> --- io_uring/rsrc.c | 44 ++++++++------------------------------------ 1 file changed, 8 insertions(+), 36 deletions(-) diff --git a/io_uring/rsrc.c b/io_uring/rsrc.c index 7f95eba72f1c..70acc76ff27c 100644 --- a/io_uring/rsrc.c +++ b/io_uring/rsrc.c @@ -1044,7 +1044,7 @@ static int io_sqe_buffer_register(struct io_ring_ctx *ctx, struct iovec *iov, unsigned long off; size_t size; int ret, nr_pages, i; - struct folio *folio = NULL; + struct io_imu_folio_data data; *pimu = (struct io_mapped_ubuf *)&dummy_ubuf; if (!iov->iov_base) @@ -1059,30 +1059,11 @@ static int io_sqe_buffer_register(struct io_ring_ctx *ctx, struct iovec *iov, goto done; } - /* If it's a huge page, try to coalesce them into a single bvec entry */ - if (nr_pages > 1) { - folio = page_folio(pages[0]); - for (i = 1; i < nr_pages; i++) { - /* - * Pages must be consecutive and on the same folio for - * this to work - */ - if (page_folio(pages[i]) != folio || - pages[i] != pages[i - 1] + 1) { - folio = NULL; - break; - } - } - if (folio) { - /* - * The pages are bound to the folio, it doesn't - * actually unpin them but drops all but one reference, - * which is usually put down by io_buffer_unmap(). - * Note, needs a better helper. - */ - unpin_user_pages(&pages[1], nr_pages - 1); - nr_pages = 1; - } + /* If it's huge page(s), try to coalesce them into fewer bvec entries */ + if (io_sqe_buffer_try_coalesce(pages, nr_pages, &data)) { + ret = io_coalesced_imu_alloc(ctx, iov, pimu, last_hpage, + pages, &data); + goto done; } imu = kvmalloc(struct_size(imu, bvec, nr_pages), GFP_KERNEL); @@ -1106,10 +1087,6 @@ static int io_sqe_buffer_register(struct io_ring_ctx *ctx, struct iovec *iov, *pimu = imu; ret = 0; - if (folio) { - bvec_set_page(&imu->bvec[0], pages[0], size, off); - goto done; - } for (i = 0; i < nr_pages; i++) { size_t vec_len; @@ -1215,23 +1192,18 @@ int io_import_fixed(int ddir, struct iov_iter *iter, * we know that: * * 1) it's a BVEC iter, we set it up - * 2) all bvecs are PAGE_SIZE in size, except potentially the + * 2) all bvecs are the same in size, except potentially the * first and last bvec * * So just find our index, and adjust the iterator afterwards. * If the offset is within the first bvec (or the whole first * bvec, just use iov_iter_advance(). This makes it easier * since we can just skip the first segment, which may not - * be PAGE_SIZE aligned. + * be folio_size aligned. */ const struct bio_vec *bvec = imu->bvec; if (offset < bvec->bv_len) { - /* - * Note, huge pages buffers consists of one large - * bvec entry and should always go this way. The other - * branch doesn't expect non PAGE_SIZE'd chunks. - */ iter->bvec = bvec; iter->nr_segs = bvec->bv_len; iter->count -= offset; -- 2.34.1