Re: [PATCH net-next v3 11/13] net: replace page_frag with page_frag_cache

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

 



On Wed, 8 May 2024, Yunsheng Lin wrote:

Use the newly introduced prepare/probe/commit API to
replace page_frag with page_frag_cache for sk_page_frag().

CC: Alexander Duyck <alexander.duyck@xxxxxxxxx>
Signed-off-by: Yunsheng Lin <linyunsheng@xxxxxxxxxx>
---
.../chelsio/inline_crypto/chtls/chtls.h       |   3 -
.../chelsio/inline_crypto/chtls/chtls_io.c    | 100 ++++---------
.../chelsio/inline_crypto/chtls/chtls_main.c  |   3 -
drivers/net/tun.c                             |  28 ++--
include/linux/sched.h                         |   4 +-
include/net/sock.h                            |  14 +-
kernel/exit.c                                 |   3 +-
kernel/fork.c                                 |   3 +-
net/core/skbuff.c                             |  32 ++--
net/core/skmsg.c                              |  22 +--
net/core/sock.c                               |  46 ++++--
net/ipv4/ip_output.c                          |  33 +++--
net/ipv4/tcp.c                                |  35 ++---
net/ipv4/tcp_output.c                         |  28 ++--
net/ipv6/ip6_output.c                         |  33 +++--
net/kcm/kcmsock.c                             |  30 ++--
net/mptcp/protocol.c                          |  70 +++++----
net/sched/em_meta.c                           |   2 +-
net/tls/tls_device.c                          | 139 ++++++++++--------
19 files changed, 331 insertions(+), 297 deletions(-)


<snip>

diff --git a/net/mptcp/protocol.c b/net/mptcp/protocol.c
index bb8f96f2b86f..ab844011d442 100644
--- a/net/mptcp/protocol.c
+++ b/net/mptcp/protocol.c
@@ -960,17 +960,18 @@ static bool mptcp_skb_can_collapse_to(u64 write_seq,
}

/* we can append data to the given data frag if:
- * - there is space available in the backing page_frag
- * - the data frag tail matches the current page_frag free offset
+ * - there is space available for the current page
+ * - the data frag tail matches the current page and offset
 * - the data frag end sequence number matches the current write seq
 */
static bool mptcp_frag_can_collapse_to(const struct mptcp_sock *msk,
-				       const struct page_frag *pfrag,
+				       const struct page *page,
+				       const unsigned int offset,
+				       const unsigned int size,

Hi Yunsheng -

Why add the 'size' parameter here? It's checked to be a nonzero value, but it can only be 0 if page is also NULL. In this case "page == df->page" will be false, so the function will return false even without checking 'size'.

Thanks,

Mat

				       const struct mptcp_data_frag *df)
{
-	return df && pfrag->page == df->page &&
-		pfrag->size - pfrag->offset > 0 &&
-		pfrag->offset == (df->offset + df->data_len) &&
+	return df && size && page == df->page &&
+		offset == (df->offset + df->data_len) &&
		df->data_seq + df->data_len == msk->write_seq;
}

@@ -1085,30 +1086,36 @@ static void mptcp_enter_memory_pressure(struct sock *sk)
/* ensure we get enough memory for the frag hdr, beyond some minimal amount of
 * data
 */
-static bool mptcp_page_frag_refill(struct sock *sk, struct page_frag *pfrag)
+static struct page *mptcp_page_frag_alloc_prepare(struct sock *sk,
+						  struct page_frag_cache *pfrag,
+						  unsigned int *offset,
+						  unsigned int *size, void **va)
{
-	if (likely(skb_page_frag_refill(32U + sizeof(struct mptcp_data_frag),
-					pfrag, sk->sk_allocation)))
-		return true;
+	struct page *page;
+
+	page = page_frag_alloc_prepare(pfrag, offset, size, va,
+				       sk->sk_allocation);
+	if (likely(page))
+		return page;

	mptcp_enter_memory_pressure(sk);
-	return false;
+	return NULL;
}

static struct mptcp_data_frag *
-mptcp_carve_data_frag(const struct mptcp_sock *msk, struct page_frag *pfrag,
-		      int orig_offset)
+mptcp_carve_data_frag(const struct mptcp_sock *msk, struct page *page,
+		      unsigned int orig_offset)
{
	int offset = ALIGN(orig_offset, sizeof(long));
	struct mptcp_data_frag *dfrag;

-	dfrag = (struct mptcp_data_frag *)(page_to_virt(pfrag->page) + offset);
+	dfrag = (struct mptcp_data_frag *)(page_to_virt(page) + offset);
	dfrag->data_len = 0;
	dfrag->data_seq = msk->write_seq;
	dfrag->overhead = offset - orig_offset + sizeof(struct mptcp_data_frag);
	dfrag->offset = offset + sizeof(struct mptcp_data_frag);
	dfrag->already_sent = 0;
-	dfrag->page = pfrag->page;
+	dfrag->page = page;

	return dfrag;
}
@@ -1793,7 +1800,7 @@ static u32 mptcp_send_limit(const struct sock *sk)
static int mptcp_sendmsg(struct sock *sk, struct msghdr *msg, size_t len)
{
	struct mptcp_sock *msk = mptcp_sk(sk);
-	struct page_frag *pfrag;
+	struct page_frag_cache *pfrag;
	size_t copied = 0;
	int ret = 0;
	long timeo;
@@ -1832,9 +1839,12 @@ static int mptcp_sendmsg(struct sock *sk, struct msghdr *msg, size_t len)
	while (msg_data_left(msg)) {
		int total_ts, frag_truesize = 0;
		struct mptcp_data_frag *dfrag;
-		bool dfrag_collapsed;
-		size_t psize, offset;
+		bool dfrag_collapsed = false;
+		unsigned int offset, size;
+		struct page *page;
+		size_t psize;
		u32 copy_limit;
+		void *va;

		/* ensure fitting the notsent_lowat() constraint */
		copy_limit = mptcp_send_limit(sk);
@@ -1845,21 +1855,26 @@ static int mptcp_sendmsg(struct sock *sk, struct msghdr *msg, size_t len)
		 * page allocator
		 */
		dfrag = mptcp_pending_tail(sk);
-		dfrag_collapsed = mptcp_frag_can_collapse_to(msk, pfrag, dfrag);
+		page = page_frag_alloc_probe(pfrag, &offset, &size, &va);
+		dfrag_collapsed = mptcp_frag_can_collapse_to(msk, page, offset,
+							     size, dfrag);
		if (!dfrag_collapsed) {
-			if (!mptcp_page_frag_refill(sk, pfrag))
+			size = 32U + sizeof(struct mptcp_data_frag);
+			page = mptcp_page_frag_alloc_prepare(sk, pfrag, &offset,
+							     &size, &va);
+			if (!page)
				goto wait_for_memory;

-			dfrag = mptcp_carve_data_frag(msk, pfrag, pfrag->offset);
+			dfrag = mptcp_carve_data_frag(msk, page, offset);
			frag_truesize = dfrag->overhead;
+			va += dfrag->overhead;
		}

		/* we do not bound vs wspace, to allow a single packet.
		 * memory accounting will prevent execessive memory usage
		 * anyway
		 */
-		offset = dfrag->offset + dfrag->data_len;
-		psize = pfrag->size - offset;
+		psize = size - frag_truesize;
		psize = min_t(size_t, psize, msg_data_left(msg));
		psize = min_t(size_t, psize, copy_limit);
		total_ts = psize + frag_truesize;
@@ -1867,8 +1882,7 @@ static int mptcp_sendmsg(struct sock *sk, struct msghdr *msg, size_t len)
		if (!sk_wmem_schedule(sk, total_ts))
			goto wait_for_memory;

-		ret = do_copy_data_nocache(sk, psize, &msg->msg_iter,
-					   page_address(dfrag->page) + offset);
+		ret = do_copy_data_nocache(sk, psize, &msg->msg_iter, va);
		if (ret)
			goto do_error;

@@ -1877,7 +1891,6 @@ static int mptcp_sendmsg(struct sock *sk, struct msghdr *msg, size_t len)
		copied += psize;
		dfrag->data_len += psize;
		frag_truesize += psize;
-		pfrag->offset += frag_truesize;
		WRITE_ONCE(msk->write_seq, msk->write_seq + psize);

		/* charge data on mptcp pending queue to the msk socket
@@ -1885,11 +1898,14 @@ static int mptcp_sendmsg(struct sock *sk, struct msghdr *msg, size_t len)
		 */
		sk_wmem_queued_add(sk, frag_truesize);
		if (!dfrag_collapsed) {
-			get_page(dfrag->page);
+			page_frag_alloc_commit(pfrag, frag_truesize);
			list_add_tail(&dfrag->list, &msk->rtx_queue);
			if (!msk->first_pending)
				WRITE_ONCE(msk->first_pending, dfrag);
+		} else {
+			page_frag_alloc_commit_noref(pfrag, frag_truesize);
		}
+
		pr_debug("msk=%p dfrag at seq=%llu len=%u sent=%u new=%d", msk,
			 dfrag->data_seq, dfrag->data_len, dfrag->already_sent,
			 !dfrag_collapsed);





[Index of Archives]     [Linux Samsung SoC]     [Linux Rockchip SoC]     [Linux Actions SoC]     [Linux for Synopsys ARC Processors]     [Linux NFS]     [Linux NILFS]     [Linux USB Devel]     [Video for Linux]     [Linux Audio Users]     [Yosemite News]     [Linux Kernel]     [Linux SCSI]


  Powered by Linux