[RFC PATCH 3/4] iov_iter: Add a scatterlist iterator type

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

 



Add an iterator type that can iterate over a scatterlist.  This can be used
as a bridge to help convert things that take scatterlists into things that
take I/O iterators.

Signed-off-by: David Howells <dhowells@xxxxxxxxxx>
---
 include/linux/uio.h |  12 ++
 lib/iov_iter.c      | 315 +++++++++++++++++++++++++++++++++++++++++++-
 2 files changed, 321 insertions(+), 6 deletions(-)

diff --git a/include/linux/uio.h b/include/linux/uio.h
index 59a586333e1b..0e50f4af6877 100644
--- a/include/linux/uio.h
+++ b/include/linux/uio.h
@@ -12,6 +12,7 @@
 
 struct page;
 struct folio_queue;
+struct scatterlist;
 
 typedef unsigned int __bitwise iov_iter_extraction_t;
 
@@ -30,6 +31,7 @@ enum iter_type {
 	ITER_XARRAY,
 	ITER_DISCARD,
 	ITER_ITERLIST,
+	ITER_SCATTERLIST,
 };
 
 #define ITER_SOURCE	1	// == WRITE
@@ -46,6 +48,7 @@ struct iov_iter {
 	bool nofault;
 	bool data_source;
 	size_t iov_offset;
+	size_t orig_count;
 	/*
 	 * Hack alert: overlay ubuf_iovec with iovec + count, so
 	 * that the members resolve correctly regardless of the type
@@ -73,11 +76,13 @@ struct iov_iter {
 				struct xarray *xarray;
 				void __user *ubuf;
 				struct iov_iterlist *iterlist;
+				struct scatterlist *sglist;
 			};
 			size_t count;
 		};
 	};
 	union {
+		struct scatterlist *sglist_head;
 		unsigned long nr_segs;
 		u8 folioq_slot;
 		loff_t xarray_start;
@@ -161,6 +166,11 @@ static inline bool iov_iter_is_iterlist(const struct iov_iter *i)
 	return iov_iter_type(i) == ITER_ITERLIST;
 }
 
+static inline bool iov_iter_is_scatterlist(const struct iov_iter *i)
+{
+	return iov_iter_type(i) == ITER_SCATTERLIST;
+}
+
 static inline unsigned char iov_iter_rw(const struct iov_iter *i)
 {
 	return i->data_source ? WRITE : READ;
@@ -317,6 +327,8 @@ void iov_iter_xarray(struct iov_iter *i, unsigned int direction, struct xarray *
 void iov_iter_iterlist(struct iov_iter *i, unsigned int direction,
 		       struct iov_iterlist *iterlist, unsigned long nr_segs,
 		       size_t count);
+void iov_iter_scatterlist(struct iov_iter *i, unsigned int direction,
+			  struct scatterlist *sglist, size_t count);
 ssize_t iov_iter_get_pages2(struct iov_iter *i, struct page **pages,
 			size_t maxsize, unsigned maxpages, size_t *start);
 ssize_t iov_iter_get_pages_alloc2(struct iov_iter *i, struct page ***pages,
diff --git a/lib/iov_iter.c b/lib/iov_iter.c
index 1d9190abfeb5..ed9859af3c5d 100644
--- a/lib/iov_iter.c
+++ b/lib/iov_iter.c
@@ -562,6 +562,26 @@ static void iov_iter_folioq_advance(struct iov_iter *i, size_t size)
 	i->folioq = folioq;
 }
 
+static void iov_iter_scatterlist_advance(struct iov_iter *i, size_t size)
+{
+	struct scatterlist *sg;
+
+	if (!i->count)
+		return;
+	i->count -= size;
+
+	size += i->iov_offset;
+
+	for (sg = i->sglist; sg; sg_next(sg)) {
+		if (likely(size < sg->length))
+			break;
+		size -= sg->length;
+	}
+	WARN_ON(!sg && size > 0);
+	i->iov_offset = size;
+	i->sglist = sg;
+}
+
 void iov_iter_advance(struct iov_iter *i, size_t size)
 {
 	if (unlikely(i->count < size))
@@ -591,6 +611,8 @@ void iov_iter_advance(struct iov_iter *i, size_t size)
 			i->iterlist++;
 			i->nr_segs--;
 		}
+	} else if (iov_iter_is_scatterlist(i)) {
+		iov_iter_scatterlist_advance(i, size);
 	}
 }
 EXPORT_SYMBOL(iov_iter_advance);
@@ -638,6 +660,15 @@ static void iov_iter_revert_iterlist(struct iov_iter *i, size_t unroll)
 	}
 }
 
+static void iov_iter_revert_scatterlist(struct iov_iter *i)
+{
+	size_t skip = i->orig_count - i->count;
+
+	i->sglist = i->sglist_head;
+	i->count = i->orig_count;
+	iov_iter_advance(i, skip);
+}
+
 void iov_iter_revert(struct iov_iter *i, size_t unroll)
 {
 	if (!unroll)
@@ -649,6 +680,8 @@ void iov_iter_revert(struct iov_iter *i, size_t unroll)
 		return;
 	if (unlikely(iov_iter_is_iterlist(i)))
 		return iov_iter_revert_iterlist(i, unroll);
+	if (unlikely(iov_iter_is_scatterlist(i)))
+		return iov_iter_revert_scatterlist(i);
 	if (unroll <= i->iov_offset) {
 		i->iov_offset -= unroll;
 		return;
@@ -706,6 +739,8 @@ size_t iov_iter_single_seg_count(const struct iov_iter *i)
 	if (unlikely(iov_iter_is_folioq(i)))
 		return !i->count ? 0 :
 			umin(folioq_folio_size(i->folioq, i->folioq_slot), i->count);
+	if (unlikely(iov_iter_is_scatterlist(i)))
+		return !i->sglist ? 0 : umin(i->count, i->sglist->length - i->iov_offset);
 	return i->count;
 }
 EXPORT_SYMBOL(iov_iter_single_seg_count);
@@ -856,6 +891,33 @@ void iov_iter_iterlist(struct iov_iter *iter, unsigned int direction,
 }
 EXPORT_SYMBOL(iov_iter_iterlist);
 
+/**
+ * iov_iter_scatterlist - Initialise an I/O iterator for a scatterlist chain
+ * @iter: The iterator to initialise.
+ * @direction: The direction of the transfer.
+ * @sglist: The head of the scatterlist
+ * @count: The size of the I/O buffer in bytes.
+ *
+ * Set up an I/O iterator that walks over a scatterlist.  Because scatterlists
+ * can be chained and have no back pointers, reversion requires starting again
+ * at the beginning and counting forwards.
+ */
+void iov_iter_scatterlist(struct iov_iter *iter, unsigned int direction,
+			  struct scatterlist *sglist, size_t count)
+{
+	WARN_ON(direction & ~(READ | WRITE));
+	*iter = (struct iov_iter){
+		.iter_type	= ITER_SCATTERLIST,
+		.data_source	= direction,
+		.sglist		= sglist,
+		.sglist_head	= sglist,
+		.iov_offset	= 0,
+		.count		= count,
+		.orig_count	= count,
+	};
+}
+EXPORT_SYMBOL(iov_iter_scatterlist);
+
 static bool iov_iter_aligned_iovec(const struct iov_iter *i, unsigned addr_mask,
 				   unsigned len_mask)
 {
@@ -994,6 +1056,26 @@ static unsigned long iov_iter_alignment_bvec(const struct iov_iter *i)
 	return res;
 }
 
+static unsigned long iov_iter_alignment_scatterlist(const struct iov_iter *i)
+{
+	struct scatterlist *sg;
+	unsigned skip = i->iov_offset;
+	unsigned res = 0;
+	size_t size = i->count;
+
+	for (sg = i->sglist; sg; sg = sg_next(sg)) {
+		size_t len = sg->length - skip;
+		res |= (unsigned long)sg->offset + skip;
+		if (len > size)
+			len = size;
+		res |= len;
+		size -= len;
+		skip = 0;
+	} while (size);
+
+	return res;
+}
+
 unsigned long iov_iter_alignment(const struct iov_iter *i)
 {
 	if (likely(iter_is_ubuf(i))) {
@@ -1024,6 +1106,8 @@ unsigned long iov_iter_alignment(const struct iov_iter *i)
 			align |= iov_iter_alignment(&i->iterlist[j].iter);
 		return align;
 	}
+	if (iov_iter_is_scatterlist(i))
+		return iov_iter_alignment_scatterlist(i);
 
 	return 0;
 }
@@ -1058,13 +1142,8 @@ unsigned long iov_iter_gap_alignment(const struct iov_iter *i)
 }
 EXPORT_SYMBOL(iov_iter_gap_alignment);
 
-static int want_pages_array(struct page ***res, size_t size,
-			    size_t start, unsigned int maxpages)
+static int __want_pages_array(struct page ***res, unsigned int count)
 {
-	unsigned int count = DIV_ROUND_UP(size + start, PAGE_SIZE);
-
-	if (count > maxpages)
-		count = maxpages;
 	WARN_ON(!count);	// caller should've prevented that
 	if (!*res) {
 		*res = kvmalloc_array(count, sizeof(struct page *), GFP_KERNEL);
@@ -1074,6 +1153,16 @@ static int want_pages_array(struct page ***res, size_t size,
 	return count;
 }
 
+static int want_pages_array(struct page ***res, size_t size,
+			    size_t start, unsigned int maxpages)
+{
+	size_t count = DIV_ROUND_UP(size + start, PAGE_SIZE);
+
+	if (count > maxpages)
+		count = maxpages;
+	return __want_pages_array(res, count);
+}
+
 static ssize_t iter_folioq_get_pages(struct iov_iter *iter,
 				     struct page ***ppages, size_t maxsize,
 				     unsigned maxpages, size_t *_start_offset)
@@ -1186,6 +1275,52 @@ static ssize_t iter_xarray_get_pages(struct iov_iter *i,
 	return maxsize;
 }
 
+static struct page *first_scatterlist_segment(const struct iov_iter *i,
+					      size_t *size, size_t *start)
+{
+	struct scatterlist *sg = i->sglist;
+	struct page *page;
+	size_t skip = i->iov_offset, len;
+
+	if (!sg)
+		return NULL;
+
+	len = sg->length - skip;
+	if (*size > len)
+		*size = len;
+	skip += sg->offset;
+	page = sg_page(sg) + skip / PAGE_SIZE;
+	*start = skip % PAGE_SIZE;
+	return page;
+}
+
+static ssize_t iter_scatterlist_get_pages(struct iov_iter *i,
+					  struct page ***pages, size_t maxsize,
+					  unsigned maxpages, size_t *start)
+{
+	struct page **p, *page;
+	unsigned int n;
+
+	page = first_scatterlist_segment(i, &maxsize, start);
+	if (!page)
+		return -EFAULT;
+	n = want_pages_array(pages, maxsize, *start, maxpages);
+	if (!n)
+		return -ENOMEM;
+	p = *pages;
+	for (int k = 0; k < n; k++)
+		get_page(p[k] = page + k);
+	maxsize = min_t(size_t, maxsize, n * PAGE_SIZE - *start);
+	i->count -= maxsize;
+	i->iov_offset += maxsize;
+	if (i->iov_offset == i->bvec->bv_len) {
+		i->iov_offset = 0;
+		i->bvec++;
+		i->nr_segs--;
+	}
+	return maxsize;
+}
+
 /* must be done on non-empty ITER_UBUF or ITER_IOVEC one */
 static unsigned long first_iovec_segment(const struct iov_iter *i, size_t *size)
 {
@@ -1296,6 +1431,8 @@ static ssize_t __iov_iter_get_pages_alloc(struct iov_iter *i,
 		i->count -= size;
 		return size;
 	}
+	if (iov_iter_is_scatterlist(i))
+		return iter_scatterlist_get_pages(i, pages, maxsize, maxpages, start);
 	return -EFAULT;
 }
 
@@ -1379,6 +1516,25 @@ static int iterlist_npages(const struct iov_iter *i, int maxpages)
 	return npages;
 }
 
+static int scatterlist_npages(const struct iov_iter *i, int maxpages)
+{
+	struct scatterlist *sg;
+	size_t skip = i->iov_offset, size = i->count;
+	int npages = 0;
+
+	for (sg = i->sglist; sg && size; sg = sg_next(sg)) {
+		unsigned offs = (sg->offset + skip) % PAGE_SIZE;
+		size_t len = umin(sg->length - skip, size);
+
+		size -= len;
+		npages += DIV_ROUND_UP(offs + len, PAGE_SIZE);
+		if (unlikely(npages > maxpages))
+			return maxpages;
+		skip = 0;
+	}
+	return npages;
+}
+
 int iov_iter_npages(const struct iov_iter *i, int maxpages)
 {
 	if (unlikely(!i->count))
@@ -1405,6 +1561,8 @@ int iov_iter_npages(const struct iov_iter *i, int maxpages)
 	}
 	if (iov_iter_is_iterlist(i))
 		return iterlist_npages(i, maxpages);
+	if (iov_iter_is_scatterlist(i))
+		return scatterlist_npages(i, maxpages);
 	return 0;
 }
 EXPORT_SYMBOL(iov_iter_npages);
@@ -1792,6 +1950,107 @@ static ssize_t iov_iter_extract_xarray_pages(struct iov_iter *i,
 	return maxsize;
 }
 
+/*
+ * Count the number of virtually contiguous pages in a scatterlist iterator
+ * from the current point.
+ */
+static size_t count_scatterlist_contig_pages(const struct iov_iter *i,
+					     size_t maxpages, size_t maxsize)
+{
+	struct scatterlist *sg;
+	size_t npages = 0;
+	size_t skip = i->iov_offset, size = umin(i->count, maxsize);
+
+	for (sg = i->sglist; sg && size; sg = sg_next(sg)) {
+		size_t offs = (sg->offset + skip) % PAGE_SIZE;
+		size_t part = umin(sg->length - skip, size);
+
+		if (!part)
+			break;
+		size -= part;
+		npages += DIV_ROUND_UP(offs + part, PAGE_SIZE);
+		if (unlikely(npages > maxpages))
+			return maxpages;
+		if (((offs + part) % PAGE_SIZE) != 0)
+			break;
+		skip = 0;
+	}
+	return npages;
+}
+
+/*
+ * Extract a list of contiguous pages from an ITER_FOLIOQ iterator.  This does
+ * not get references on the pages, nor does it get a pin on them.
+ */
+static ssize_t iov_iter_extract_scatterlist_pages(struct iov_iter *i,
+						  struct page ***pages, size_t maxsize,
+						  unsigned int maxpages,
+						  iov_iter_extraction_t extraction_flags,
+						  size_t *offset0)
+{
+	struct scatterlist *sg = i->sglist;
+	struct page **p;
+	size_t npages, skip, size = 0;
+	int nr = 0;
+
+	if (!sg)
+		return 0;
+
+	while (skip = i->iov_offset,
+	       skip == sg->length) {
+		sg = sg_next(sg);
+		i->sglist = sg;
+		i->iov_offset = 0;
+		if (!sg)
+			return 0;
+	}
+
+	npages = count_scatterlist_contig_pages(i, maxpages, maxsize);
+
+	maxpages = __want_pages_array(pages, npages);
+	if (!maxpages)
+		return -ENOMEM;
+	*offset0 = (sg->offset + skip) & ~PAGE_MASK;
+	p = *pages;
+
+	for (sg = i->sglist; sg; sg = sg_next(sg)) {
+		struct page *page = sg_page(sg);
+		size_t part = umin(sg->length - skip, maxsize);
+		size_t off = sg->offset + skip;
+
+		if (!part)
+			break;
+
+		page += off / PAGE_SIZE;
+		off %= PAGE_SIZE;
+
+		do {
+			size_t chunk = umin(part, PAGE_SIZE - off);
+
+			p[nr++] = page;
+			page++;
+			maxpages--;
+			maxsize -= chunk;
+			size += chunk;
+			skip += chunk;
+			part -= chunk;
+			off = 0;
+		} while (part && maxsize && maxpages);
+
+		if (((sg->offset + skip + part) % PAGE_SIZE) != 0)
+			break;
+		if (!maxsize || !maxpages) {
+			if (!part)
+				sg = sg_next(sg);
+			break;
+		}
+		skip = 0;
+	}
+
+	iov_iter_advance(i, size);
+	return size;
+}
+
 /*
  * Extract a list of virtually contiguous pages from an ITER_BVEC iterator.
  * This does not get references on the pages, nor does it get a pin on them.
@@ -2051,6 +2310,10 @@ ssize_t iov_iter_extract_pages(struct iov_iter *i,
 		i->count -= size;
 		return size;
 	}
+	if (iov_iter_is_scatterlist(i))
+		return iov_iter_extract_scatterlist_pages(i, pages, maxsize,
+							  maxpages, extraction_flags,
+							  offset0);
 	return -EFAULT;
 }
 EXPORT_SYMBOL_GPL(iov_iter_extract_pages);
@@ -2148,6 +2411,44 @@ static size_t iterate_iterlist(struct iov_iter *iter, size_t len, void *priv, vo
 	return progress;
 }
 
+/*
+ * Handle iteration over ITER_SCATTERLIST.
+ */
+static size_t iterate_scatterlist(struct iov_iter *iter, size_t len, void *priv, void *priv2,
+				  iov_step_f step)
+{
+	struct scatterlist *sg = iter->sglist;
+	size_t progress = 0, skip = iter->iov_offset;
+
+	do {
+		struct page *page = sg_page(sg);
+		size_t remain, consumed;
+		size_t offset = sg->offset + skip, part;
+		void *kaddr = kmap_local_page(page + offset / PAGE_SIZE);
+
+		part = min3(len,
+			   (size_t)(sg->length - skip),
+			   (size_t)(PAGE_SIZE - offset % PAGE_SIZE));
+		remain = step(kaddr + offset % PAGE_SIZE, progress, part, priv, priv2);
+		kunmap_local(kaddr);
+		consumed = part - remain;
+		len -= consumed;
+		progress += consumed;
+		skip += consumed;
+		if (skip >= sg->length) {
+			skip = 0;
+			sg = sg_next(sg);
+		}
+		if (remain)
+			break;
+	} while (len);
+
+	iter->sglist = sg;
+	iter->iov_offset = skip;
+	iter->count -= progress;
+	return progress;
+}
+
 /*
  * Out of line iteration for iterator types that don't need such fast handling.
  */
@@ -2160,6 +2461,8 @@ size_t __iterate_and_advance2(struct iov_iter *iter, size_t len, void *priv,
 		return iterate_xarray(iter, len, priv, priv2, step);
 	if (iov_iter_is_iterlist(iter))
 		return iterate_iterlist(iter, len, priv, priv2, ustep, step);
+	if (iov_iter_is_scatterlist(iter))
+		return iterate_scatterlist(iter, len, priv, priv2, step);
 	WARN_ON(1);
 	return 0;
 }





[Index of Archives]     [Linux ARM Kernel]     [Linux ARM]     [Linux Omap]     [Fedora ARM]     [IETF Annouce]     [Bugtraq]     [Linux OMAP]     [Linux MIPS]     [eCos]     [Asterisk Internet PBX]     [Linux API]

  Powered by Linux