[RFC][PATCH] iov_iter: Add extraction functions

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

 



Hi Al, Jeff,

Here's a replacement for the extract_iter_to_iter() patch I had previously.
It's a WIP, some bits aren't fully implemented, though some bits I have tested
and got to work, but if you could take a look and see if you're okay with the
interface.

I think I've addressed most of Al's comments.  The page-pinning is conditional
on certain types of iterator, and a number of the iterator types just extract
to the same thing.  It should now handle kvec-class iterators that refer to
vmalloc'd data.

I've also added extraction to scatterlist (which I'll need for doing various
crypto things) and extraction to ib_sge which could be used in cifs/smb RDMA,
bypassing the conversion-to-scatterlist step.

As mentioned, there are bits that aren't fully implemented, let alone tested.

David
---
iov_iter: Add extraction functions

Add extraction functions to extract the page content from an I/O iterator
to one of three destinations:

 (1) extract_iter_to_iter()

     Builds a new iterator from the source iterator such that the new
     iterator remains valid if the source iterator gets deallocated, such
     as can happen in asynchronous I/O when -EIOCBQUEUED is returned.

     For UBUF/IOVEC-class iterators the output iteratior will be BVEC-class
     and data/buffer pages are pinned to prevent them being moved, swapped
     out or discarded for the duration.

     For XARRAY-class iterators, the new iterator is copied and then
     trimmed; no page pinning is done.  For BVEC- and KVEC-class iterators,
     the bio_vec/kvec table is copied and trimmed; again no page pinning.

     KVEC-class iterators with vmalloc'd areas should work.

     DISCARD- and PIPE-class iterators are not currently supported and
     incur an error.

     The extraction function fills out a "cleanup" record that can then be
     passed to iov_iter_clean_up() once the I/O is complete.  This will
     undo any pinning and free any allocated bufferage.

     Tested with DIO read: IOVEC, UBUF.
     Tested just dumping iterator: BVEC, KVEC.
     Untested: XARRAY.

 (2) extract_iter_to_sg()

     Similar to above, but builds a scatterlist and attaches it to an
     sg_table instead of a new iterator.  Returns an indication if the
     pages were pinned.

     The caller is responsible for freeing the scatterlist and unpinning
     any pages.

     Tested to dump sglist: IOVEC, UBUF, KVEC.
     Untested: BVEC, XARRAY.

 (3) extract_iter_to_rdma()

     Similar again, but fills in an ib_sge struct array, mapping the pages
     to DMA addresses appropriate to the specified driver.

     The caller provides the ib_sge array, so it's up to the caller to
     clean that up.  No page-pinning is done as ib_sge does not provide a
     place to note the source page(s), so only BVEC-, KVEC- and
     XARRAY-class iterators are supported for now.  One possible solution
     to that is to pass the iterator through extract_iter_to_iter() first.

     Completely untested and needs more modification to deal with errors
     from ib_dma_map_page().

Note that for the moment, none of these advance the source iterator.

An additional function, iov_iter_flush_dcache(), is provided to do dcache
flushing over the source buffer, using the information provided in the
extracted iterator and the cleanup record.  This works for BVEC-class
iterators, but is incompletely implemented for KVEC-class and probably
wrong for XARRAY-class.

Signed-off-by: David Howells <dhowells@xxxxxxxxxx>
---

diff --git a/include/linux/uio2.h b/include/linux/uio2.h
new file mode 100644
index 000000000000..bc3e5ea96de7
--- /dev/null
+++ b/include/linux/uio2.h
@@ -0,0 +1,45 @@
+/* SPDX-License-Identifier: GPL-2.0-or-later */
+/* iov_iter extractors
+ *
+ * Copyright (C) 2022 Red Hat, Inc. All Rights Reserved.
+ * Written by David Howells (dhowells@xxxxxxxxxx)
+ */
+
+#ifndef _LINUX_UIO2_H
+#define _LINUX_UIO2_H
+
+#include <linux/uio.h>
+#include <linux/dma-direction.h>
+
+struct sg_table;
+struct ib_device;
+struct ib_sge;
+
+/*
+ * Cleanup information for an extracted iterator.
+ */
+struct iov_iter_cleanup {
+	union {
+		struct bio_vec	*bv;
+		struct kvec	*kv;
+	};
+	unsigned int	nr_segs;
+	bool		pages_pinned;
+	enum iter_type	type:8;
+};
+
+ssize_t extract_iter_to_iter(struct iov_iter *iter, size_t len,
+			     struct iov_iter *to,
+			     struct iov_iter_cleanup *cleanup);
+ssize_t extract_iter_to_sg(struct iov_iter *iter, size_t len,
+			   struct sg_table *sgtable, bool *pinned);
+ssize_t extract_iter_to_rdma(struct iov_iter *iter, size_t len,
+			     struct ib_device *device, u32 local_dma_lkey,
+			     enum dma_data_direction direction,
+			     struct ib_sge *sge, unsigned int max_sge,
+			     unsigned int *nr_sge);
+void iov_iter_flush_dcache(struct iov_iter *iter,
+			   struct iov_iter_cleanup *cleanup);
+void iov_iter_clean_up(struct iov_iter_cleanup *cleanup);
+
+#endif /* _LINUX_UIO2_H */
diff --git a/lib/Makefile b/lib/Makefile
index 5927d7fa0806..7d46c1a68322 100644
--- a/lib/Makefile
+++ b/lib/Makefile
@@ -44,7 +44,7 @@ obj-y	+= lockref.o
 
 obj-y += bcd.o sort.o parser.o debug_locks.o random32.o \
 	 bust_spinlocks.o kasprintf.o bitmap.o scatterlist.o \
-	 list_sort.o uuid.o iov_iter.o clz_ctz.o \
+	 list_sort.o uuid.o iov_iter.o iov_iter_extract.o clz_ctz.o \
 	 bsearch.o find_bit.o llist.o memweight.o kfifo.o \
 	 percpu-refcount.o rhashtable.o base64.o \
 	 once.o refcount.o usercopy.o errseq.o bucket_locks.o \
diff --git a/lib/iov_iter_extract.c b/lib/iov_iter_extract.c
new file mode 100644
index 000000000000..eec7287ce779
--- /dev/null
+++ b/lib/iov_iter_extract.c
@@ -0,0 +1,653 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/* Extract page list from an iterator and attach it to a scatter list, an RDMA
+ * SGE array or another iterator.
+ *
+ * Copyright (C) 2022 Red Hat, Inc. All Rights Reserved.
+ * Written by David Howells (dhowells@xxxxxxxxxx)
+ */
+
+#include <linux/export.h>
+#include <linux/bvec.h>
+#include <linux/uio.h>
+#include <linux/uio2.h>
+#include <linux/pagemap.h>
+#include <linux/slab.h>
+#include <linux/vmalloc.h>
+#include <linux/scatterlist.h>
+#include <rdma/ib_verbs.h>
+
+enum iter_extract_dest {
+	EXTRACT_TO_BVEC,
+	EXTRACT_TO_SGLIST,
+	EXTRACT_TO_RDMA,
+};
+
+struct extract_to_rdma {
+	struct ib_sge		*sge;
+	unsigned int		*nr_sge;
+	struct ib_device	*device;
+	u32			local_dma_lkey;
+	enum dma_data_direction	direction;
+};
+
+/*
+ * When we're extracting lists of pages, we can avoid having to do a second
+ * allocation by putting the list of extracted pages overlapping the end of the
+ * array.  As long as the array elements are larger than page pointers, and as
+ * long as we work 0->last, the two shouldn't interfere.
+ */
+static struct page **locate_pages_array(void *array, unsigned int array_max,
+					enum iter_extract_dest dest)
+{
+	void *p;
+	size_t arr_size, pg_size;
+
+	switch (dest) {
+	case EXTRACT_TO_BVEC: {
+		struct iov_iter *to = array;
+
+		arr_size = array_size(array_max, sizeof(struct bio_vec));
+		p = (void *)to->bvec;
+		break;
+	}
+	case EXTRACT_TO_SGLIST: {
+		struct sg_table *sgtable = array;
+
+		arr_size = array_size(array_max, sizeof(struct scatterlist));
+		p = sgtable->sgl;
+		break;
+	}
+	case EXTRACT_TO_RDMA: {
+		struct extract_to_rdma *rdma = array;
+
+		arr_size = array_size(array_max, sizeof(struct ib_sge));
+		p = rdma->sge;
+		break;
+	}
+	}
+
+	pg_size = array_size(array_max, sizeof(struct page *));
+	return (void *)p + arr_size - pg_size;
+}
+
+/*
+ * Attach a segment of a contiguous span of pages to a single buffer segment.
+ */
+static int extract_contig_pages(void *array, struct page *lowest_page,
+				unsigned long off, size_t len,
+				enum iter_extract_dest dest)
+{
+	switch (dest) {
+	case EXTRACT_TO_BVEC: {
+		struct iov_iter *to = array;
+		struct bio_vec *bv = (struct bio_vec *)&to->bvec[to->nr_segs++];
+
+		bv->bv_page = lowest_page;
+		bv->bv_len = len;
+		bv->bv_offset = off;
+		to->count += len;
+		return to->nr_segs;
+	}
+	case EXTRACT_TO_SGLIST: {
+		struct sg_table *sgtable = array;
+		struct scatterlist *sg = &sgtable->sgl[sgtable->nents++];
+
+		sg_set_page(sg, lowest_page, len, off);
+		return sgtable->nents;
+	}
+	case EXTRACT_TO_RDMA: {
+		struct extract_to_rdma *rdma = array;
+		struct ib_sge *sge = &rdma->sge[*rdma->nr_sge];
+
+		sge->addr = ib_dma_map_page(rdma->device, lowest_page,
+					    off, len, rdma->direction);
+		if (ib_dma_mapping_error(rdma->device, sge->addr)) {
+			sge->addr = 0;
+			return -EIO;
+		}
+		sge->length = len;
+		sge->lkey   = rdma->local_dma_lkey;
+		*rdma->nr_sge += 1;
+		return *rdma->nr_sge;
+	}
+	}
+
+	BUG();
+}
+
+static unsigned int extract_page_list(void *array,
+				      struct page **pages, unsigned int nr_pages,
+				      unsigned long off, size_t len,
+				      enum iter_extract_dest dest)
+{
+	struct page *page;
+	unsigned int ret = 0;
+	size_t seg;
+	int i;
+
+	for (i = 0; i < nr_pages; i++) {
+		seg = min_t(size_t, PAGE_SIZE - off, len);
+		page = *pages;
+		*pages++ = NULL;
+		ret = extract_contig_pages(array, page, off, seg, dest);
+		len -= seg;
+		off = 0;
+	}
+
+	return ret;
+}
+
+static void terminate_array(void *array, enum iter_extract_dest dest)
+{
+	if (dest == EXTRACT_TO_SGLIST) {
+		struct sg_table *sgtable = array;
+		struct scatterlist *sg = sgtable->sgl + sgtable->nents - 1;
+
+		sgtable->orig_nents = sgtable->nents;
+		if (sgtable->nents)
+			sg_mark_end(sg);
+	}
+}
+
+/*
+ * Extract and pin the pages from UBUF- or IOVEC-class iterators and add them
+ * to the destination buffer.
+ */
+static ssize_t iov_iter_extract_user(struct iov_iter *iter,
+				     void *array, unsigned int array_max,
+				     ssize_t maxsize,
+				     enum iter_extract_dest dest)
+{
+	const struct iovec *iov;
+	struct iovec ubuf;
+	struct page **pages;
+	unsigned long uaddr, start = iter->iov_offset;
+	unsigned int i = 0, ix = 0, gup_flags = 0, nr_segs, n;
+	ssize_t ret = 0;
+	size_t len, off;
+	int res;
+
+	pages = locate_pages_array(array, array_max, dest);
+
+	if (iov_iter_rw(iter) != WRITE)
+		gup_flags |= FOLL_WRITE;
+	if (iter->nofault)
+		gup_flags |= FOLL_NOFAULT;
+
+	if (iter_is_ubuf(iter)) {
+		ubuf.iov_base = iter->ubuf;
+		ubuf.iov_len = iov_iter_count(iter);
+		iov = &ubuf;
+		nr_segs = 1;
+	} else {
+		iov = iter->iov;
+		nr_segs = iter->nr_segs;
+	}
+
+	do {
+		len = iov[i].iov_len;
+		if (start >= len) {
+			start -= len;
+			i++;
+			if (i >= nr_segs)
+				break;
+			continue;
+		}
+
+		uaddr = (unsigned long)iov[i].iov_base + start;
+		len = min_t(size_t, maxsize, len - start);
+		off = uaddr & ~PAGE_MASK;
+		uaddr &= PAGE_MASK;
+
+		n = DIV_ROUND_UP(len + off, PAGE_SIZE);
+		n = min(n, array_max - ix);
+
+		res = get_user_pages_fast(uaddr, n, gup_flags, pages + ix);
+		if (unlikely(res <= 0)) {
+			if (res < 0)
+				return res;
+			break;
+		}
+
+		len = min_t(size_t, len, res * PAGE_SIZE - off);
+		maxsize -= len;
+		start += len;
+		ret += len;
+		ix = extract_page_list(array, pages + ix, res, off, len, dest);
+	} while (maxsize > 0 && ix < array_max);
+
+	terminate_array(array, dest);
+	return ret;
+}
+
+/*
+ * Extract the pages from a BVEC-class iterator and add them to the destination
+ * buffer.  The pages are not pinned.
+ */
+static ssize_t iov_iter_extract_bvec(struct iov_iter *iter,
+				     void *array, unsigned int array_max,
+				     ssize_t maxsize,
+				     enum iter_extract_dest dest)
+{
+	const struct bio_vec *bv = iter->bvec;
+	unsigned long start = iter->iov_offset;
+	unsigned int i, ix;
+	ssize_t ret = 0;
+
+	for (i = 0; i < iter->nr_segs; i++) {
+		size_t off, len;
+
+		len = bv[i].bv_len;
+		if (start >= len) {
+			start -= len;
+			continue;
+		}
+
+		len = min_t(size_t, maxsize, len - start);
+		off = bv[i].bv_offset + start;
+
+		maxsize -= len;
+		ret += len;
+		ix = extract_contig_pages(array, bv[i].bv_page, off, len,
+					  dest);
+		if (maxsize <= 0 || ix >= array_max)
+			break;
+		start = 0;
+	}
+
+	terminate_array(array, dest);
+	return ret;
+}
+
+/*
+ * Extract the pages from a KVEC-class iterator and add them to the destination
+ * buffer.  This can deal with vmalloc'd buffers as well as kmalloc'd or static
+ * buffers.  The pages are not pinned.
+ */
+static ssize_t iov_iter_extract_kvec(struct iov_iter *iter,
+				     void *array, unsigned int array_max,
+				     ssize_t maxsize,
+				     enum iter_extract_dest dest)
+{
+	const struct kvec *kv = iter->kvec;
+	unsigned long start = iter->iov_offset;
+	unsigned int i, ix;
+	ssize_t ret = 0;
+
+	for (i = 0; i < iter->nr_segs; i++) {
+		struct page *page;
+		unsigned long kaddr;
+		size_t off, len, seg;
+
+		len = kv[i].iov_len;
+		if (start >= len) {
+			start -= len;
+			continue;
+		}
+
+		kaddr = (unsigned long)kv[i].iov_base + start;
+		off = kaddr & ~PAGE_MASK;
+		len = min_t(size_t, maxsize, len - start);
+		kaddr &= PAGE_MASK;
+
+		maxsize -= len;
+		ret += len;
+		do {
+			seg = min_t(size_t, len, PAGE_SIZE - off);
+			if (is_vmalloc_or_module_addr((void *)kaddr))
+				page = vmalloc_to_page((void *)kaddr);
+			else
+				page = virt_to_page(kaddr);
+			ix = extract_contig_pages(array, page, off, seg, dest);
+			len -= seg;
+			kaddr += PAGE_SIZE;
+			off = 0;
+		} while (len > 0 && ix <= array_max);
+		if (maxsize <= 0 || ix >= array_max)
+			break;
+		start = 0;
+	}
+
+	terminate_array(array, dest);
+	return ret;
+}
+
+/*
+ * Extract the pages from an XARRAY-class iterator and add them to the
+ * destination buffer.  The pages are not pinned.
+ */
+static ssize_t iov_iter_extract_xarray(struct iov_iter *iter,
+				       void *array, unsigned int array_max,
+				       ssize_t maxsize,
+				       enum iter_extract_dest dest)
+{
+	struct xarray *xa = iter->xarray;
+	struct folio *folio;
+	unsigned int ix;
+	loff_t start = iter->xarray_start + iter->iov_offset;
+	pgoff_t index = start / PAGE_SIZE;
+	ssize_t ret;
+	size_t offset, len;
+	XA_STATE(xas, xa, index);
+
+	rcu_read_lock();
+	xas_for_each(&xas, folio, ULONG_MAX) {
+		if (xas_retry(&xas, folio))
+			continue;
+		if (WARN_ON(xa_is_value(folio)))
+			break;
+		if (WARN_ON(folio_test_hugetlb(folio)))
+			break;
+
+		offset = offset_in_folio(folio, start);
+		len = min_t(size_t, maxsize, folio_size(folio) - offset);
+
+		ix = extract_contig_pages(array, folio_page(folio, 0),
+					  offset, len, dest);
+		maxsize -= len;
+		ret += len;
+		if (ix >= array_max) {
+			WARN_ON_ONCE(ix > array_max);
+			break;
+		}
+
+		if (maxsize <= 0)
+			break;
+	}
+
+	rcu_read_unlock();
+	terminate_array(array, dest);
+	return ret;
+}
+
+static ssize_t iov_iter_extract_pages(struct iov_iter *iter,
+				      void *array, unsigned int array_max,
+				      size_t maxsize,
+				      enum iter_extract_dest dest)
+{
+	if (likely(user_backed_iter(iter)))
+		return iov_iter_extract_user(iter, array, array_max, maxsize,
+					     dest);
+	if (iov_iter_is_bvec(iter))
+		return iov_iter_extract_bvec(iter, array, array_max, maxsize,
+					     dest);
+	if (iov_iter_is_kvec(iter))
+		return iov_iter_extract_kvec(iter, array, array_max, maxsize,
+					     dest);
+	if (iov_iter_is_xarray(iter))
+		return iov_iter_extract_xarray(iter, array, array_max, maxsize,
+					       dest);
+	if (iov_iter_is_pipe(iter)) {
+		pr_warn("extract pipe unsupported\n");
+		return -EIO;
+	}
+
+	pr_warn("extract other-type unsupported\n");
+	return -EFAULT;
+}
+
+/**
+ * extract_iter_to_iter - Extract the pages from an iterator into another iterator
+ * @iter: The iterator to extract from
+ * @len: The amount of iterator to copy
+ * @to: The iterator to fill in
+ * @cleanup: Information on how to clean up the resulting iterator
+ *
+ * Extract the page fragments from the given amount of the source iterator and
+ * build up an iterator that refers to all of those bits.  This allows the
+ * source iterator to disposed of.
+ *
+ * UBUF- and IOVEC-class iterators are extracted to BVEC-class iterators and
+ * the extracted pages are pinned; BVEC-, KVEC- and XARRAY-class are extracted
+ * as the same type and truncated with no pinning; PIPE- and DISCARD-class are
+ * not supported.
+ */
+ssize_t extract_iter_to_iter(struct iov_iter *iter, size_t len,
+			     struct iov_iter *to,
+			     struct iov_iter_cleanup *cleanup)
+{
+	struct bio_vec *bv;
+	unsigned int bv_max;
+	ssize_t ret;
+	size_t bv_size;
+
+	memset(cleanup, 0, sizeof(*cleanup));
+
+	cleanup->type = iov_iter_type(iter);
+	switch (iov_iter_type(iter)) {
+	case ITER_KVEC:
+		cleanup->kv = (void *)dup_iter(to, iter, GFP_KERNEL);
+		if (!cleanup->kv)
+			return -ENOMEM;
+		cleanup->nr_segs = to->nr_segs;
+		iov_iter_truncate(to, len);
+		return iov_iter_count(to);
+	case ITER_XARRAY:
+		*to = *iter;
+		iov_iter_truncate(to, len);
+		return iov_iter_count(to);
+
+	case ITER_UBUF:
+	case ITER_IOVEC:
+		cleanup->pages_pinned = true;
+		fallthrough;
+	case ITER_BVEC:
+		bv_max = iov_iter_npages(iter, INT_MAX);
+		bv_size = array_size(bv_max, sizeof(*bv));
+		bv = kvmalloc(bv_size, GFP_KERNEL);
+		if (!bv)
+			return -ENOMEM;
+
+		iov_iter_bvec(to, iov_iter_rw(iter), bv, 0, 0);
+		ret = iov_iter_extract_pages(iter, to, bv_max, len,
+					     EXTRACT_TO_BVEC);
+		if (ret < 0) {
+			iov_iter_clean_up(cleanup);
+			return ret;
+		}
+
+		cleanup->type = ITER_BVEC;
+		cleanup->bv = bv;
+		cleanup->nr_segs = to->nr_segs;
+		return ret;
+
+	case ITER_DISCARD:
+	case ITER_PIPE:
+	default:
+		WARN_ON_ONCE(1);
+		return -EIO;
+	}
+}
+EXPORT_SYMBOL(extract_iter_to_iter);
+
+/**
+ * extract_iter_to_sg - Extract the pages from an iterator into an sglist
+ * @iter: The iterator to extract from
+ * @len: The amount of iterator to copy
+ * @sgtable: The scatterlist table to fill in
+ * @pages_pinned: On return set to true if pages were pinned
+ *
+ * Extract the page fragments from the given amount of the source iterator and
+ * build up scatterlist that refers to all of those bits.
+ *
+ * The pages referred to by UBUF- and IOVEC-class iterators are extracted and
+ * pinned; BVEC-, KVEC- and XARRAY-class are extracted and aren't pinned; PIPE-
+ * and DISCARD-class are not supported
+ */
+ssize_t extract_iter_to_sg(struct iov_iter *iter, size_t len,
+			   struct sg_table *sgtable, bool *pages_pinned)
+{
+	struct scatterlist *sg = NULL;
+	unsigned int sg_max;
+	ssize_t ret;
+	size_t sg_size;
+
+	switch (iov_iter_type(iter)) {
+	case ITER_DISCARD:
+	case ITER_PIPE:
+		WARN_ON_ONCE(1);
+		return -EIO;
+	case ITER_UBUF:
+	case ITER_IOVEC:
+		*pages_pinned = true;
+		break;
+	case ITER_BVEC:
+	case ITER_KVEC:
+	case ITER_XARRAY:
+		*pages_pinned = false;
+		break;
+	}
+
+	sg_max = iov_iter_npages(iter, INT_MAX);
+	sg_size = array_size(sg_max, sizeof(*sg));
+	sg = kvmalloc(sg_size, GFP_KERNEL);
+	if (!sg)
+		return -ENOMEM;
+	memset(sg, 0, sg_size);
+
+	sgtable->sgl = sg;
+	sgtable->nents = 0;
+	sgtable->orig_nents = 0;
+	ret = iov_iter_extract_pages(iter, sgtable, sg_max, len,
+				     EXTRACT_TO_SGLIST);
+	if (ret < 0)
+		kvfree(sg);
+	return ret;
+}
+EXPORT_SYMBOL(extract_iter_to_sg);
+
+/**
+ * extract_iter_to_rdma - Extract the pages from an iterator into an rdma SGE list
+ * @iter: The iterator to extract from
+ * @len: The amount of iterator to copy
+ * @device: The RDMA device
+ * @local_dma_lkey: DMA keying
+ * @direction: The DMA direction
+ * @sge: The SGE array to fill
+ * @max_sge: The maximum size of SGE[].
+ * @nr_sge: On return set to the number of SGEs used
+ *
+ * Extract the page fragments from the given amount of the source iterator and
+ * build up an RDMA SGE list that refers to all of those bits.
+ *
+ * Only BVEC-, KVEC- and XARRAY-class iterators are supported and the extracted
+ * pages aren't pinned; UBUF-, IOVEC-, PIPE- and DISCARD-class are not
+ * supported.
+ */
+ssize_t extract_iter_to_rdma(struct iov_iter *iter, size_t len,
+			     struct ib_device *device, u32 local_dma_lkey,
+			     enum dma_data_direction direction,
+			     struct ib_sge *sge, unsigned int max_sge,
+			     unsigned int *nr_sge)
+{
+	struct extract_to_rdma rdma = {
+		.device		= device,
+		.local_dma_lkey	= local_dma_lkey,
+		.direction	= direction,
+		.sge		= sge,
+		.nr_sge		= nr_sge,
+	};
+
+	switch (iov_iter_type(iter)) {
+	case ITER_DISCARD:
+	case ITER_PIPE:
+		WARN_ON_ONCE(1);
+		return -EIO;
+	case ITER_UBUF:
+	case ITER_IOVEC:
+		WARN_ON_ONCE(1); /* Can't pin pages; extract to a bvec first. */
+		break;
+	case ITER_BVEC:
+	case ITER_KVEC:
+	case ITER_XARRAY:
+		break;
+	}
+
+	BUG(); // TODO: Implement handling of errors from ib_dma_map_page().
+	return iov_iter_extract_pages(iter, &rdma, max_sge, len,
+				      EXTRACT_TO_RDMA);
+}
+EXPORT_SYMBOL(extract_iter_to_rdma);
+
+/**
+ * iov_iter_flush_dcache - Flush the dcache extracted into an iterator
+ * @iter: The destination iterator
+ * @cleanup: The cleanup record produced by extract_iter_to_iter()
+ *
+ * Use the information stored in an extraction cleanup record to flush the
+ * cache.
+ */
+void iov_iter_flush_dcache(struct iov_iter *iter,
+			   struct iov_iter_cleanup *cleanup)
+{
+	int i;
+
+	switch (cleanup->type) {
+	case ITER_BVEC:
+		for (i = 0; i < cleanup->nr_segs; i++)
+			flush_dcache_page(cleanup->bv[i].bv_page);
+		break;
+	case ITER_KVEC:
+		BUG(); // TODO: Make this work.  Using bv is wrong.
+		//for (i = 0; i < cleanup->nr_segs; i++)
+		//	flush_dcache_page(cleanup->bv[i].bv_page);
+		break;
+	case ITER_XARRAY: {
+		struct page *page;
+		loff_t pos = iter->xarray_start + iter->iov_offset;
+		pgoff_t index = pos >> PAGE_SHIFT;
+		unsigned int offset = pos & ~PAGE_MASK;
+		int nr_pages = DIV_ROUND_UP(offset + iov_iter_count(iter), PAGE_SIZE);
+
+		XA_STATE(xas, iter->xarray, index);
+
+		rcu_read_lock();
+		for (page = xas_load(&xas); page; page = xas_next(&xas)) {
+			if (xas_retry(&xas, page))
+				continue;
+			if (unlikely(page != xas_reload(&xas))) {
+				xas_reset(&xas);
+				continue;
+			}
+
+			flush_dcache_page(find_subpage(page, xas.xa_index));
+			if (nr_pages <= 0)
+				break;
+		}
+		rcu_read_unlock();
+		break;
+	}
+	default:
+		BUG();
+	}
+}
+EXPORT_SYMBOL(iov_iter_flush_dcache);
+
+/**
+ * iov_iter_clean_up - Clean up segment list and unpin pages
+ * @cleanup: The cleanup information from extract_iter_to_iter()
+ */
+void iov_iter_clean_up(struct iov_iter_cleanup *cleanup)
+{
+	unsigned int i;
+
+	if (!cleanup->bv)
+		return;
+	switch (cleanup->type) {
+	case ITER_BVEC:
+		if (cleanup->pages_pinned) {
+			for (i = 0; i < cleanup->nr_segs; i++)
+				if (cleanup->bv[i].bv_page)
+					put_page(cleanup->bv[i].bv_page);
+		}
+		kvfree(cleanup->bv);
+		cleanup->bv = NULL;
+		break;
+	case ITER_KVEC:
+		kvfree(cleanup->kv);
+		cleanup->kv = NULL;
+		break;
+	default:
+		break;
+	}
+}





[Index of Archives]     [Linux Ext4 Filesystem]     [Union Filesystem]     [Filesystem Testing]     [Ceph Users]     [Ecryptfs]     [NTFS 3]     [AutoFS]     [Kernel Newbies]     [Share Photos]     [Security]     [Netfilter]     [Bugtraq]     [Yosemite News]     [MIPS Linux]     [ARM Linux]     [Linux Security]     [Linux Cachefs]     [Reiser Filesystem]     [Linux RAID]     [NTFS 3]     [Samba]     [Device Mapper]     [CEPH Development]

  Powered by Linux