[PATCH v6 34/34] net: [RFC][WIP] Make __zerocopy_sg_from_iter() correctly pin or leave pages unref'd

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

 



Make __zerocopy_sg_from_iter() call iov_iter_extract_pages() to get pages
that have been ref'd, pinned or left alone as appropriate.  As this is only
used for source buffers, pinning isn't an option, but being unref'd is.

The way __zerocopy_sg_from_iter() merges fragments is also altered, such
that fragments must also match their cleanup modes to be merged.

An extra helper and wrapper, folio_put_unpin_sub() and page_put_unpin_sub()
are added to allow multiple refs to be put/unpinned.

Signed-off-by: David Howells <dhowells@xxxxxxxxxx>
cc: "David S. Miller" <davem@xxxxxxxxxxxxx>
cc: Eric Dumazet <edumazet@xxxxxxxxxx>
cc: Jakub Kicinski <kuba@xxxxxxxxxx>
cc: Paolo Abeni <pabeni@xxxxxxxxxx>
cc: netdev@xxxxxxxxxxxxxxx
---

 include/linux/mm.h  |    2 ++
 mm/gup.c            |   25 +++++++++++++++++++++++++
 net/core/datagram.c |   23 +++++++++++++----------
 3 files changed, 40 insertions(+), 10 deletions(-)

diff --git a/include/linux/mm.h b/include/linux/mm.h
index f14edb192394..e3923b89c75e 100644
--- a/include/linux/mm.h
+++ b/include/linux/mm.h
@@ -1368,7 +1368,9 @@ static inline bool is_cow_mapping(vm_flags_t flags)
 #endif
 
 void folio_put_unpin(struct folio *folio, unsigned int flags);
+void folio_put_unpin_sub(struct folio *folio, unsigned int flags, unsigned int refs);
 void page_put_unpin(struct page *page, unsigned int flags);
+void page_put_unpin_sub(struct page *page, unsigned int flags, unsigned int refs);
 
 /*
  * The identification function is mainly used by the buddy allocator for
diff --git a/mm/gup.c b/mm/gup.c
index 3ee4b4c7e0cb..49dd27ba6c13 100644
--- a/mm/gup.c
+++ b/mm/gup.c
@@ -213,6 +213,31 @@ void page_put_unpin(struct page *page, unsigned int flags)
 }
 EXPORT_SYMBOL_GPL(page_put_unpin);
 
+/**
+ * folio_put_unpin_sub - Unpin/put a folio as appropriate
+ * @folio: The folio to release
+ * @flags: gup flags indicating the mode of release (FOLL_*)
+ * @refs: Number of refs/pins to drop
+ *
+ * Release a folio according to the flags.  If FOLL_GET is set, the folio has a
+ * ref dropped; if FOLL_PIN is set, it is unpinned; otherwise it is left
+ * unaltered.
+ */
+void folio_put_unpin_sub(struct folio *folio, unsigned int flags,
+			 unsigned int refs)
+{
+	if (flags & (FOLL_GET | FOLL_PIN))
+		gup_put_folio(folio, refs, flags);
+}
+EXPORT_SYMBOL_GPL(folio_put_unpin_sub);
+
+void page_put_unpin_sub(struct page *page, unsigned int flags,
+			unsigned int refs)
+{
+	folio_put_unpin_sub(page_folio(page), flags, refs);
+}
+EXPORT_SYMBOL_GPL(page_put_unpin_sub);
+
 /**
  * try_grab_page() - elevate a page's refcount by a flag-dependent amount
  * @page:    pointer to page to be grabbed
diff --git a/net/core/datagram.c b/net/core/datagram.c
index 122bfb144d32..63ea1f8817e0 100644
--- a/net/core/datagram.c
+++ b/net/core/datagram.c
@@ -614,6 +614,7 @@ int __zerocopy_sg_from_iter(struct msghdr *msg, struct sock *sk,
 			    struct sk_buff *skb, struct iov_iter *from,
 			    size_t length)
 {
+	unsigned int cleanup_mode = iov_iter_extract_mode(from, FOLL_SOURCE_BUF);
 	int frag;
 
 	if (msg && msg->msg_ubuf && msg->sg_from_iter)
@@ -622,7 +623,7 @@ int __zerocopy_sg_from_iter(struct msghdr *msg, struct sock *sk,
 	frag = skb_shinfo(skb)->nr_frags;
 
 	while (length && iov_iter_count(from)) {
-		struct page *pages[MAX_SKB_FRAGS];
+		struct page *pages[MAX_SKB_FRAGS], **ppages = pages;
 		struct page *last_head = NULL;
 		size_t start;
 		ssize_t copied;
@@ -632,9 +633,9 @@ int __zerocopy_sg_from_iter(struct msghdr *msg, struct sock *sk,
 		if (frag == MAX_SKB_FRAGS)
 			return -EMSGSIZE;
 
-		copied = iov_iter_get_pages(from, pages, length,
-					    MAX_SKB_FRAGS - frag, &start,
-					    FOLL_SOURCE_BUF);
+		copied = iov_iter_extract_pages(from, &ppages, length,
+						MAX_SKB_FRAGS - frag,
+						FOLL_SOURCE_BUF, &start);
 		if (copied < 0)
 			return -EFAULT;
 
@@ -662,12 +663,14 @@ int __zerocopy_sg_from_iter(struct msghdr *msg, struct sock *sk,
 				skb_frag_t *last = &skb_shinfo(skb)->frags[frag - 1];
 
 				if (head == skb_frag_page(last) &&
+				    cleanup_mode == skb_frag_cleanup(last) &&
 				    start == skb_frag_off(last) + skb_frag_size(last)) {
 					skb_frag_size_add(last, size);
 					/* We combined this page, we need to release
-					 * a reference. Since compound pages refcount
-					 * is shared among many pages, batch the refcount
-					 * adjustments to limit false sharing.
+					 * a reference or a pin.  Since compound pages
+					 * refcount is shared among many pages, batch
+					 * the refcount adjustments to limit false
+					 * sharing.
 					 */
 					last_head = head;
 					refs++;
@@ -675,14 +678,14 @@ int __zerocopy_sg_from_iter(struct msghdr *msg, struct sock *sk,
 				}
 			}
 			if (refs) {
-				page_ref_sub(last_head, refs);
+				page_put_unpin_sub(last_head, cleanup_mode, refs);
 				refs = 0;
 			}
 			skb_fill_page_desc_noacc(skb, frag++, head, start, size,
-						 FOLL_GET);
+						 cleanup_mode);
 		}
 		if (refs)
-			page_ref_sub(last_head, refs);
+			page_put_unpin_sub(last_head, cleanup_mode, refs);
 	}
 	return 0;
 }





[Index of Archives]     [Linux Ext4 Filesystem]     [Union Filesystem]     [Filesystem Testing]     [Ceph Users]     [Ecryptfs]     [NTFS 3]     [AutoFS]     [Kernel Newbies]     [Share Photos]     [Security]     [Netfilter]     [Bugtraq]     [Yosemite News]     [MIPS Linux]     [ARM Linux]     [Linux Security]     [Linux Cachefs]     [Reiser Filesystem]     [Linux RAID]     [NTFS 3]     [Samba]     [Device Mapper]     [CEPH Development]

  Powered by Linux