On 5/14/24 08:54, Chenliang Li wrote:
Introduce two functions to separate the coalesced imu alloc and
accounting path from the original one. This helps to keep the original
code path clean.
Signed-off-by: Chenliang Li <cliang01.li@xxxxxxxxxxx>
---
io_uring/rsrc.c | 89 +++++++++++++++++++++++++++++++++++++++++++++++++
1 file changed, 89 insertions(+)
diff --git a/io_uring/rsrc.c b/io_uring/rsrc.c
index 578d382ca9bc..53fac5f27bbf 100644
--- a/io_uring/rsrc.c
+++ b/io_uring/rsrc.c
@@ -871,6 +871,45 @@ static int io_buffer_account_pin(struct io_ring_ctx *ctx, struct page **pages,
return ret;
}
+static int io_coalesced_buffer_account_pin(struct io_ring_ctx *ctx,
+ struct page **pages,
+ struct io_mapped_ubuf *imu,
+ struct page **last_hpage,
+ struct io_imu_folio_data *data)
+{
+ int i, j, ret;
+
+ imu->acct_pages = 0;
+ j = 0;
+ for (i = 0; i < data->nr_folios; i++) {
+ struct page *hpage = pages[j];
+
+ if (hpage == *last_hpage)
+ continue;
+ *last_hpage = hpage;
+ /*
+ * Already checked the page array in try coalesce,
+ * so pass in nr_pages=0 here to waive that.
+ */
+ if (headpage_already_acct(ctx, pages, 0, hpage))
+ continue;
+ imu->acct_pages += data->nr_pages_mid;
+ if (i)
+ j += data->nr_pages_mid;
+ else
+ j = data->nr_pages_head;
You should account an entire folio here, i.e. ->nr_pages_mid
in either case. Let's say the first page in the registration
is the last page of a huge page, you'd account 4K while it
actually pins the entire huge page size.
It seems like you can just call io_buffer_account_pin()
instead.
On that note, you shouldn't duplicate code in either case,
just treat the normal discontig pages case as folios of
shift=PAGE_SHIFT.
Either just plain reuse or adjust io_buffer_account_pin()
instead of io_coalesced_buffer_account_pin().
io_coalesced_imu_alloc() should also go away.
io_sqe_buffer_register() {
struct io_imu_folio_data data;
if (!io_sqe_buffer_try_coalesce(pages, folio_data)) {
folio_data.shift = PAGE_SHIFT;
...
}
io_buffer_account_pin(pages, &data);
imu->data = uaddr;
...
}
+ }
+
+ if (!imu->acct_pages)
+ return 0;
+
+ ret = io_account_mem(ctx, imu->acct_pages);
+ if (!ret)
+ return 0;
+ imu->acct_pages = 0;
+ return ret;
+}
+
static bool __io_sqe_buffer_try_coalesce(struct page **pages, int nr_pages,
struct io_imu_folio_data *data)
{
@@ -949,6 +988,56 @@ static bool io_sqe_buffer_try_coalesce(struct page **pages, int nr_pages,
return true;
}
+static int io_coalesced_imu_alloc(struct io_ring_ctx *ctx, struct iovec *iov,
+ struct io_mapped_ubuf **pimu,
+ struct page **last_hpage, struct page **pages,
+ struct io_imu_folio_data *data)
+{
+ struct io_mapped_ubuf *imu = NULL;
+ unsigned long off;
+ size_t size, vec_len;
+ int ret, i, j;
+
+ ret = -ENOMEM;
+ imu = kvmalloc(struct_size(imu, bvec, data->nr_folios), GFP_KERNEL);
+ if (!imu)
+ return ret;
+
+ ret = io_coalesced_buffer_account_pin(ctx, pages, imu, last_hpage,
+ data);
+ if (ret) {
+ unpin_user_page(pages[0]);
+ j = data->nr_pages_head;
+ for (i = 1; i < data->nr_folios; i++) {
+ unpin_user_page(pages[j]);
+ j += data->nr_pages_mid;
+ }
+ return ret;
+ }
+ off = (unsigned long) iov->iov_base & ~PAGE_MASK;
+ size = iov->iov_len;
+ /* store original address for later verification */
+ imu->ubuf = (unsigned long) iov->iov_base;
+ imu->ubuf_end = imu->ubuf + iov->iov_len;
+ imu->nr_bvecs = data->nr_folios;
+ imu->folio_shift = data->folio_shift;
+ imu->folio_mask = ~((1UL << data->folio_shift) - 1);
+ *pimu = imu;
+ ret = 0;
+
+ vec_len = min_t(size_t, size, PAGE_SIZE * data->nr_pages_head - off);
+ bvec_set_page(&imu->bvec[0], pages[0], vec_len, off);
+ size -= vec_len;
+ j = data->nr_pages_head;
+ for (i = 1; i < data->nr_folios; i++) {
+ vec_len = min_t(size_t, size, data->folio_size);
+ bvec_set_page(&imu->bvec[i], pages[j], vec_len, 0);
+ size -= vec_len;
+ j += data->nr_pages_mid;
+ }
+ return ret;
+}
+
static int io_sqe_buffer_register(struct io_ring_ctx *ctx, struct iovec *iov,
struct io_mapped_ubuf **pimu,
struct page **last_hpage)
--
Pavel Begunkov