On 6/28/24 09:44, Chenliang Li wrote:
Modify io_sqe_buffer_register to enable the coalescing for
multi-hugepage fixed buffers.
Signed-off-by: Chenliang Li <cliang01.li@xxxxxxxxxxx>
---
io_uring/rsrc.c | 47 ++++++++++++++++-------------------------------
1 file changed, 16 insertions(+), 31 deletions(-)
diff --git a/io_uring/rsrc.c b/io_uring/rsrc.c
index 3198cf854db1..790ed3c1bcc8 100644
--- a/io_uring/rsrc.c
+++ b/io_uring/rsrc.c
@@ -945,7 +945,8 @@ static int io_sqe_buffer_register(struct io_ring_ctx *ctx, struct iovec *iov,
unsigned long off;
size_t size;
int ret, nr_pages, i;
- struct folio *folio = NULL;
+ struct io_imu_folio_data data;
+ bool coalesced;
*pimu = (struct io_mapped_ubuf *)&dummy_ubuf;
if (!iov->iov_base)
@@ -960,31 +961,8 @@ static int io_sqe_buffer_register(struct io_ring_ctx *ctx, struct iovec *iov,
goto done;
}
- /* If it's a huge page, try to coalesce them into a single bvec entry */
- if (nr_pages > 1) {
- folio = page_folio(pages[0]);
- for (i = 1; i < nr_pages; i++) {
- /*
- * Pages must be consecutive and on the same folio for
- * this to work
- */
- if (page_folio(pages[i]) != folio ||
- pages[i] != pages[i - 1] + 1) {
- folio = NULL;
- break;
- }
- }
- if (folio) {
- /*
- * The pages are bound to the folio, it doesn't
- * actually unpin them but drops all but one reference,
- * which is usually put down by io_buffer_unmap().
- * Note, needs a better helper.
- */
- unpin_user_pages(&pages[1], nr_pages - 1);
- nr_pages = 1;
- }
- }
+ /* If it's huge page(s), try to coalesce them into fewer bvec entries */
+ coalesced = io_try_coalesce_buffer(&pages, &nr_pages, &data);
imu = kvmalloc(struct_size(imu, bvec, nr_pages), GFP_KERNEL);
if (!imu)
@@ -1004,17 +982,24 @@ static int io_sqe_buffer_register(struct io_ring_ctx *ctx, struct iovec *iov,
imu->nr_bvecs = nr_pages;
imu->folio_shift = PAGE_SHIFT;
imu->folio_mask = PAGE_MASK;
+ if (coalesced) {
+ imu->folio_shift = data.folio_shift;
+ imu->folio_mask = ~((1UL << data.folio_shift) - 1);
+ }
*pimu = imu;
ret = 0;
- if (folio) {
- bvec_set_page(&imu->bvec[0], pages[0], size, off);
- goto done;
- }
for (i = 0; i < nr_pages; i++) {
size_t vec_len;
- vec_len = min_t(size_t, size, PAGE_SIZE - off);
+ if (coalesced) {
+ size_t seg_size = i ? data.folio_size :
+ PAGE_SIZE * data.nr_pages_head;
When you're compacting the page array, instead of taking a middle
page for the first folio, you can set it to the first page in the
folio and fix up the offset. Kind of:
new_array[0] = compound_head(old_array[0]);
off += folio_page_idx(folio, old_array[0]) << PAGE_SHIFT;
With that change you should be able to treat it in a uniform way
without branching.
off = (unsigned long) iov->iov_base & ~folio_mask;
vec_len = min_t(size_t, size, folio_size - off);
+
+ vec_len = min_t(size_t, size, seg_size - off);
+ } else {
+ vec_len = min_t(size_t, size, PAGE_SIZE - off);
+ }
bvec_set_page(&imu->bvec[i], pages[i], vec_len, off);
off = 0;
size -= vec_len;
--
Pavel Begunkov