Re: [PATCH v2 01/21] scatterlist: Introduce sg_map helper functions

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

 



On 26/04/17 01:44 AM, Christoph Hellwig wrote:
> I think we'll at least need a draft of those to make sense of these
> patches.  Otherwise they just look very clumsy.

Ok, what follows is a draft patch attempting to show where I'm thinking
of going with this. Obviously it will not compile because it assumes
the users throughout the kernel are a bit different than they are today.
Notably, there is no sg_page anymore.

There's also likely a ton of issues and arguments to have over a bunch
of the specifics below and I'd expect the concept to evolve more
as cleanup occurs. This itself is an evolution of the draft I posted
replying to you in my last RFC thread.

Also, before any of this is truly useful to us, pfn_t would have to
infect a few other places in the kernel.

Thanks,

Logan


diff --git a/include/linux/scatterlist.h b/include/linux/scatterlist.h
index fad170b..85ef928 100644
--- a/include/linux/scatterlist.h
+++ b/include/linux/scatterlist.h
@@ -6,13 +6,14 @@
 #include <linux/bug.h>
 #include <linux/mm.h>
 #include <linux/highmem.h>
+#include <linux/pfn_t.h>
 #include <asm/io.h>

 struct scatterlist {
 #ifdef CONFIG_DEBUG_SG
 	unsigned long	sg_magic;
 #endif
-	unsigned long	page_link;
+	pfn_t  		pfn;
 	unsigned int	offset;
 	unsigned int	length;
 	dma_addr_t	dma_address;
@@ -60,15 +61,68 @@ struct sg_table {

 #define SG_MAGIC	0x87654321

-/*
- * We overload the LSB of the page pointer to indicate whether it's
- * a valid sg entry, or whether it points to the start of a new
scatterlist.
- * Those low bits are there for everyone! (thanks mason :-)
- */
-#define sg_is_chain(sg)		((sg)->page_link & 0x01)
-#define sg_is_last(sg)		((sg)->page_link & 0x02)
-#define sg_chain_ptr(sg)	\
-	((struct scatterlist *) ((sg)->page_link & ~0x03))
+static inline bool sg_is_chain(struct scatterlist *sg)
+{
+	return sg->pfn.val & PFN_SG_CHAIN;
+}
+
+static inline bool sg_is_last(struct scatterlist *sg)
+{
+	return sg->pfn.val & PFN_SG_LAST;
+}
+
+static inline struct scatterlist *sg_chain_ptr(struct scatterlist *sg)
+{
+	unsigned long sgl = pfn_t_to_pfn(sg->pfn);
+	return (struct scatterlist *)(sgl << PAGE_SHIFT);
+}
+
+static inline bool sg_is_iomem(struct scatterlist *sg)
+{
+	return pfn_t_is_iomem(sg->pfn);
+}
+
+/**
+ * sg_assign_pfn - Assign a given pfn_t to an SG entry
+ * @sg:		    SG entry
+ * @pfn:	    The pfn
+ *
+ * Description:
+ *   Assign a pfn to sg entry. Also see sg_set_pfn(), the most commonly
used
+ *   variant.w
+ *
+ **/
+static inline void sg_assign_pfn(struct scatterlist *sg, pfn_t pfn)
+{
+#ifdef CONFIG_DEBUG_SG
+	BUG_ON(sg->sg_magic != SG_MAGIC);
+	BUG_ON(sg_is_chain(sg));
+	BUG_ON(pfn.val & (PFN_SG_CHAIN | PFN_SG_LAST));
+#endif
+
+	sg->pfn = pfn;
+}
+
+/**
+ * sg_set_pfn - Set sg entry to point at given pfn
+ * @sg:		 SG entry
+ * @pfn:	 The page
+ * @len:	 Length of data
+ * @offset:	 Offset into page
+ *
+ * Description:
+ *   Use this function to set an sg entry pointing at a pfn, never assign
+ *   the page directly. We encode sg table information in the lower bits
+ *   of the page pointer. See sg_pfn_t for looking up the pfn_t belonging
+ *   to an sg entry.
+ **/
+static inline void sg_set_pfn(struct scatterlist *sg, pfn_t pfn,
+			      unsigned int len, unsigned int offset)
+{
+	sg_assign_pfn(sg, pfn);
+	sg->offset = offset;
+	sg->length = len;
+}

 /**
  * sg_assign_page - Assign a given page to an SG entry
@@ -82,18 +136,13 @@ struct sg_table {
  **/
 static inline void sg_assign_page(struct scatterlist *sg, struct page
*page)
 {
-	unsigned long page_link = sg->page_link & 0x3;
+	if (!page) {
+		pfn_t null_pfn = {0};
+		sg_assign_pfn(sg, null_pfn);
+		return;
+	}

-	/*
-	 * In order for the low bit stealing approach to work, pages
-	 * must be aligned at a 32-bit boundary as a minimum.
-	 */
-	BUG_ON((unsigned long) page & 0x03);
-#ifdef CONFIG_DEBUG_SG
-	BUG_ON(sg->sg_magic != SG_MAGIC);
-	BUG_ON(sg_is_chain(sg));
-#endif
-	sg->page_link = page_link | (unsigned long) page;
+	sg_assign_pfn(sg, page_to_pfn_t(page));
 }

 /**
@@ -106,8 +155,7 @@ static inline void sg_assign_page(struct scatterlist
*sg, struct page *page)
  * Description:
  *   Use this function to set an sg entry pointing at a page, never assign
  *   the page directly. We encode sg table information in the lower bits
- *   of the page pointer. See sg_page() for looking up the page belonging
- *   to an sg entry.
+ *   of the page pointer.
  *
  **/
 static inline void sg_set_page(struct scatterlist *sg, struct page *page,
@@ -118,13 +166,53 @@ static inline void sg_set_page(struct scatterlist
*sg, struct page *page,
 	sg->length = len;
 }

-static inline struct page *sg_page(struct scatterlist *sg)
+/**
+ * sg_pfn_t - Return the pfn_t for the sg
+ * @sg:		 SG entry
+ *
+ **/
+static inline pfn_t sg_pfn_t(struct scatterlist *sg)
 {
 #ifdef CONFIG_DEBUG_SG
 	BUG_ON(sg->sg_magic != SG_MAGIC);
 	BUG_ON(sg_is_chain(sg));
 #endif
-	return (struct page *)((sg)->page_link & ~0x3);
+
+	return sg->pfn;
+}
+
+/**
+ * sg_to_mappable_page - Try to return a struct page safe for general
+ *	use in the kernel
+ * @sg:		 SG entry
+ * @page:	 A pointer to the returned page
+ *
+ * Description:
+ *   If possible, return a mappable page that's safe for use around the
+ *   kernel. Should only be used in legacy situations. sg_pfn_t() is a
+ *   better choice for new code. This is deliberately more awkward than
+ *   the old sg_page to enforce the __must_check rule and discourage future
+ *   use.
+ *
+ *   An example where this is required is in nvme-fabrics: a page from an
+ *   sgl is placed into a bio. This function would be required until we can
+ *   convert bios to use pfn_t as well. Similar issues with skbs, etc.
+ **/
+static inline __must_check int sg_to_mappable_page(struct scatterlist *sg,
+						   struct page **ret)
+{
+	struct page *pg;
+
+	if (unlikely(sg_is_iomem(sg)))
+		return -EFAULT;
+
+	pg = pfn_t_to_page(sg->pfn);
+	if (unlikely(!pg))
+		return -EFAULT;
+
+	*ret = pg;
+
+	return 0;
 }

 #define SG_KMAP		     (1 << 0)	/* create a mapping with kmap */
@@ -167,8 +255,19 @@ static inline void *sg_map(struct scatterlist *sg,
size_t offset, int flags)
 	unsigned int pg_off;
 	void *ret;

+	if (unlikely(sg_is_iomem(sg))) {
+		ret = ERR_PTR(-EFAULT);
+		goto out;
+	}
+
+	pg = pfn_t_to_page(sg->pfn);
+	if (unlikely(!pg)) {
+		ret = ERR_PTR(-EFAULT);
+		goto out;
+	}
+
 	offset += sg->offset;
-	pg = nth_page(sg_page(sg), offset >> PAGE_SHIFT);
+	pg = nth_page(pg, offset >> PAGE_SHIFT);
 	pg_off = offset_in_page(offset);

 	if (flags & SG_KMAP_ATOMIC)
@@ -178,12 +277,7 @@ static inline void *sg_map(struct scatterlist *sg,
size_t offset, int flags)
 	else
 		ret = ERR_PTR(-EINVAL);

-	/*
-	 * In theory, this can't happen yet. Once we start adding
-	 * unmapable memory, it also shouldn't happen unless developers
-	 * start putting unmappable struct pages in sgls and passing
-	 * it to code that doesn't support it.
-	 */
+out:
 	BUG_ON(flags & SG_MAP_MUST_NOT_FAIL && IS_ERR(ret));

 	return ret;
@@ -202,9 +296,15 @@ static inline void *sg_map(struct scatterlist *sg,
size_t offset, int flags)
 static inline void sg_unmap(struct scatterlist *sg, void *addr,
 			    size_t offset, int flags)
 {
-	struct page *pg = nth_page(sg_page(sg), offset >> PAGE_SHIFT);
+	struct page *pg;
 	unsigned int pg_off = offset_in_page(offset);

+	pg = pfn_t_to_page(sg->pfn);
+	if (unlikely(!pg))
+		return;
+
+	pg = nth_page(pg, offset >> PAGE_SHIFT);
+
 	if (flags & SG_KMAP_ATOMIC)
 		kunmap_atomic(addr - sg->offset - pg_off);
 	else if (flags & SG_KMAP)
@@ -246,17 +346,18 @@ static inline void sg_set_buf(struct scatterlist
*sg, const void *buf,
 static inline void sg_chain(struct scatterlist *prv, unsigned int
prv_nents,
 			    struct scatterlist *sgl)
 {
+	pfn_t pfn;
+	unsigned long _sgl = (unsigned long) sgl;
+
 	/*
 	 * offset and length are unused for chain entry.  Clear them.
 	 */
 	prv[prv_nents - 1].offset = 0;
 	prv[prv_nents - 1].length = 0;

-	/*
-	 * Set lowest bit to indicate a link pointer, and make sure to clear
-	 * the termination bit if it happens to be set.
-	 */
-	prv[prv_nents - 1].page_link = ((unsigned long) sgl | 0x01) & ~0x02;
+	BUG_ON(_sgl & PAGE_MASK);
+	pfn = __pfn_to_pfn_t(_sgl >> PAGE_SHIFT, PFN_SG_CHAIN);
+	prv[prv_nents - 1].pfn = pfn;
 }

 /**
@@ -276,8 +377,8 @@ static inline void sg_mark_end(struct scatterlist *sg)
 	/*
 	 * Set termination bit, clear potential chain bit
 	 */
-	sg->page_link |= 0x02;
-	sg->page_link &= ~0x01;
+	sg->pfn.val |= PFN_SG_LAST;
+	sg->pfn.val &= ~PFN_SG_CHAIN;
 }

 /**
@@ -293,7 +394,7 @@ static inline void sg_unmark_end(struct scatterlist *sg)
 #ifdef CONFIG_DEBUG_SG
 	BUG_ON(sg->sg_magic != SG_MAGIC);
 #endif
-	sg->page_link &= ~0x02;
+	sg->pfn.val &= ~PFN_SG_LAST;
 }

 /**
@@ -301,14 +402,13 @@ static inline void sg_unmark_end(struct
scatterlist *sg)
  * @sg:	     SG entry
  *
  * Description:
- *   This calls page_to_phys() on the page in this sg entry, and adds the
- *   sg offset. The caller must know that it is legal to call
page_to_phys()
- *   on the sg page.
+ *   This calls pfn_t_to_phys() on the pfn in this sg entry, and adds the
+ *   sg offset.
  *
  **/
 static inline dma_addr_t sg_phys(struct scatterlist *sg)
 {
-	return page_to_phys(sg_page(sg)) + sg->offset;
+	return pfn_t_to_phys(sg->pfn) + sg->offset;
 }

 /**
@@ -323,7 +423,12 @@ static inline dma_addr_t sg_phys(struct scatterlist
*sg)
  **/
 static inline void *sg_virt(struct scatterlist *sg)
 {
-	return page_address(sg_page(sg)) + sg->offset;
+	struct page *pg = pfn_t_to_page(sg->pfn);
+
+	BUG_ON(sg_is_iomem(sg));
+	BUG_ON(!pg);
+
+	return page_address(pg) + sg->offset;
 }

 int sg_nents(struct scatterlist *sg);
@@ -422,10 +527,18 @@ void __sg_page_iter_start(struct sg_page_iter *piter,
 /**
  * sg_page_iter_page - get the current page held by the page iterator
  * @piter:	page iterator holding the page
+ *
+ * This function will require some cleanup. Some users simply mark
+ * attributes of the pages which are fine, others actually map it and
+ * will require some saftey there.
  */
 static inline struct page *sg_page_iter_page(struct sg_page_iter *piter)
 {
-	return nth_page(sg_page(piter->sg), piter->sg_pgoffset);
+	struct page *pg = pfn_t_to_page(piter->sg->pfn);
+	if (!pg)
+		return NULL;
+
+	return nth_page(pg, piter->sg_pgoffset);
 }

 /**
@@ -468,11 +581,13 @@ static inline dma_addr_t
sg_page_iter_dma_address(struct sg_page_iter *piter)
 #define SG_MITER_ATOMIC		(1 << 0)	 /* use kmap_atomic */
 #define SG_MITER_TO_SG		(1 << 1)	/* flush back to phys on unmap */
 #define SG_MITER_FROM_SG	(1 << 2)	/* nop */
+#define SG_MITER_SUPPORTS_IOMEM (1 << 3)        /* iteratee supports
iomem */

 struct sg_mapping_iter {
 	/* the following three fields can be accessed directly */
 	struct page		*page;		/* currently mapped page */
 	void			*addr;		/* pointer to the mapped area */
+	void __iomem            *ioaddr;        /* pointer to iomem */
 	size_t			length;		/* length of the mapped area */
 	size_t			consumed;	/* number of consumed bytes */
 	struct sg_page_iter	piter;		/* page iterator */
diff --git a/lib/scatterlist.c b/lib/scatterlist.c
index c6cf822..2d1c58c 100644
--- a/lib/scatterlist.c
+++ b/lib/scatterlist.c
@@ -571,6 +571,8 @@ EXPORT_SYMBOL(sg_miter_skip);
  */
 bool sg_miter_next(struct sg_mapping_iter *miter)
 {
+	void *addr;
+
 	sg_miter_stop(miter);

 	/*
@@ -580,13 +582,25 @@ bool sg_miter_next(struct sg_mapping_iter *miter)
 	if (!sg_miter_get_next_page(miter))
 		return false;

+	if (sg_is_iomem(miter->piter.sg) &&
+	    !(miter->__flags & SG_MITER_SUPPORTS_IOMEM))
+		return false;
+
 	miter->page = sg_page_iter_page(&miter->piter);
 	miter->consumed = miter->length = miter->__remaining;

 	if (miter->__flags & SG_MITER_ATOMIC)
-		miter->addr = kmap_atomic(miter->page) + miter->__offset;
+		addr = kmap_atomic(miter->page) + miter->__offset;
 	else
-		miter->addr = kmap(miter->page) + miter->__offset;
+		addr = kmap(miter->page) + miter->__offset;
+
+	if (sg_is_iomem(miter->piter.sg)) {
+		miter->addr = NULL;
+		miter->ioaddr = (void * __iomem) addr;
+	} else {
+		miter->addr = addr;
+		miter->ioaddr = NULL;
+	}

 	return true;
 }
@@ -651,7 +665,7 @@ size_t sg_copy_buffer(struct scatterlist *sgl,
unsigned int nents, void *buf,
 {
 	unsigned int offset = 0;
 	struct sg_mapping_iter miter;
-	unsigned int sg_flags = SG_MITER_ATOMIC;
+	unsigned int sg_flags = SG_MITER_ATOMIC | SG_MITER_SUPPORTS_IOMEM;

 	if (to_buffer)
 		sg_flags |= SG_MITER_FROM_SG;
@@ -668,10 +682,17 @@ size_t sg_copy_buffer(struct scatterlist *sgl,
unsigned int nents, void *buf,

 		len = min(miter.length, buflen - offset);

-		if (to_buffer)
-			memcpy(buf + offset, miter.addr, len);
-		else
-			memcpy(miter.addr, buf + offset, len);
+		if (miter.addr) {
+			if (to_buffer)
+				memcpy(buf + offset, miter.addr, len);
+			else
+				memcpy(miter.addr, buf + offset, len);
+		} else if (miter.ioaddr) {
+			if (to_buffer)
+				memcpy_fromio(buf + offset, miter.addr, len);
+			else
+				memcpy_toio(miter.addr, buf + offset, len);
+		}

 		offset += len;
 	}
--
To unsubscribe from this list: send the line "unsubscribe linux-rdma" in
the body of a message to majordomo@xxxxxxxxxxxxxxx
More majordomo info at  http://vger.kernel.org/majordomo-info.html



[Index of Archives]     [Linux USB Devel]     [Video for Linux]     [Linux Audio Users]     [Photo]     [Yosemite News]     [Yosemite Photos]     [Linux Kernel]     [Linux SCSI]     [XFree86]
  Powered by Linux