On 10/24/24 7:00 AM, Christoph Hellwig wrote:
From: Ming Lei <ming.lei@xxxxxxxxxx>
The iov_iter_extract_pages interface allows to return physically
discontiguous pages, as long as all but the first and last page
in the array are page aligned and page size. Rewrite
iov_iter_extract_bvec_pages to take advantage of that instead of only
returning ranges of physically contiguous pages.
Signed-off-by: Ming Lei <ming.lei@xxxxxxxxxx>
[hch: minor cleanups, new commit log]
Signed-off-by: Christoph Hellwig <hch@xxxxxx>
---
lib/iov_iter.c | 67 +++++++++++++++++++++++++++++++++-----------------
1 file changed, 45 insertions(+), 22 deletions(-)
diff --git a/lib/iov_iter.c b/lib/iov_iter.c
index 1abb32c0da50..9fc06f5fb748 100644
--- a/lib/iov_iter.c
+++ b/lib/iov_iter.c
@@ -1677,8 +1677,8 @@ static ssize_t iov_iter_extract_xarray_pages(struct iov_iter *i,
}
/*
- * Extract a list of contiguous pages from an ITER_BVEC iterator. This does
- * not get references on the pages, nor does it get a pin on them.
+ * Extract a list of virtually contiguous pages from an ITER_BVEC iterator.
+ * This does not get references on the pages, nor does it get a pin on them.
*/
static ssize_t iov_iter_extract_bvec_pages(struct iov_iter *i,
struct page ***pages, size_t maxsize,
@@ -1686,35 +1686,58 @@ static ssize_t iov_iter_extract_bvec_pages(struct iov_iter *i,
iov_iter_extraction_t extraction_flags,
size_t *offset0)
{
- struct page **p, *page;
- size_t skip = i->iov_offset, offset, size;
- int k;
+ size_t skip = i->iov_offset, size = 0;
+ struct bvec_iter bi;
+ int k = 0;
- for (;;) {
- if (i->nr_segs == 0)
- return 0;
- size = min(maxsize, i->bvec->bv_len - skip);
- if (size)
- break;
+ if (i->nr_segs == 0)
+ return 0;
+
+ if (i->iov_offset == i->bvec->bv_len) {
i->iov_offset = 0;
i->nr_segs--;
i->bvec++;
skip = 0;
}
+ bi.bi_size = maxsize + skip;
+ bi.bi_bvec_done = skip;
+
+ maxpages = want_pages_array(pages, maxsize, skip, maxpages);
+
+ while (bi.bi_size && bi.bi_idx < i->nr_segs) {
+ struct bio_vec bv = bvec_iter_bvec(i->bvec, bi);
+
+ /*
+ * The iov_iter_extract_pages interface only allows an offset
+ * into the first page. Break out of the loop if we see an
+ * offset into subsequent pages, the caller will have to call
+ * iov_iter_extract_pages again for the reminder.
+ */
+ if (k) {
+ if (bv.bv_offset)
+ break;
+ } else {
+ *offset0 = bv.bv_offset;
+ }
- skip += i->bvec->bv_offset;
- page = i->bvec->bv_page + skip / PAGE_SIZE;
- offset = skip % PAGE_SIZE;
- *offset0 = offset;
+ (*pages)[k++] = bv.bv_page;
+ size += bv.bv_len;
- maxpages = want_pages_array(pages, size, offset, maxpages);
- if (!maxpages)
- return -ENOMEM;
- p = *pages;
- for (k = 0; k < maxpages; k++)
- p[k] = page + k;
+ if (k >= maxpages)
+ break;
+
+ /*
+ * We are done when the end of the bvec doesn't align to a page
+ * boundary as that would create a hole in the returned space.
+ * The caller will handle this with another call to
+ * iov_iter_extract_pages.
+ */
+ if (bv.bv_offset + bv.bv_len != PAGE_SIZE)
+ break;
+
+ bvec_iter_advance_single(i->bvec, &bi, bv.bv_len);
+ }
- size = min_t(size_t, size, maxpages * PAGE_SIZE - offset);
iov_iter_advance(i, size);
return size;
}
This is causing major network regression in UDP sendfile, found by syzbot.
I will release the syzbot report and this fix :
diff --git a/lib/iov_iter.c b/lib/iov_iter.c
index 65ec660c2960..e19aab1fccca 100644
--- a/lib/iov_iter.c
+++ b/lib/iov_iter.c
@@ -1728,6 +1728,10 @@ static ssize_t iov_iter_extract_bvec_pages(struct
iov_iter *i,
(*pages)[k++] = bv.bv_page;
size += bv.bv_len;
+ if (size > maxsize) {
+ size = maxsize;
+ break;
+ }
if (k >= maxpages)
break;