[PATCH v4 net-next 2/2] net: introduce budget_squeeze to help us tune rx behavior

Jason Xing <kerneljasonxing@xxxxxxxxx> · Wed, 15 Mar 2023 17:20:41 +0800

From: Jason Xing <kernelxing@xxxxxxxxxxx>

In our production environment, there're hundreds of machines hitting the
old time_squeeze limit often from which we cannot tell what exactly causes
such issues. Hitting limits aranged from 400 to 2000 times per second,
Especially, when users are running on the guest OS with veth policy
configured, it is relatively easier to hit the limit. After several tries
without this patch, I found it is only real time_squeeze not including
budget_squeeze that hinders the receive process.

So when we encounter some related performance issue and then get lost on
how to tune the budget limit and time limit in net_rx_action() function,
we can separately counting both of them to avoid the confusion.

Signed-off-by: Jason Xing <kernelxing@xxxxxxxxxxx>
Reviewed-by: Simon Horman <simon.horman@xxxxxxxxxxxx>
---
v4:
1) also avoid the inconsistency by caching variables suggested by Eric.
2) add more details about the real issue happened on our servers
suggested by Jakub.

v3:
1) drop the comment suggested by Simon
Link: https://lore.kernel.org/lkml/20230314030532.9238-3-kerneljasonxing@xxxxxxxxx/

v2:
1) change the coding style suggested by Stephen and Simon
2) Keep the display of the old data (time_squeeze) untouched suggested
by Kui-Feng
Link: https://lore.kernel.org/lkml/20230311163614.92296-1-kerneljasonxing@xxxxxxxxx/
---
 include/linux/netdevice.h |  1 +
 net/core/dev.c            | 12 ++++++++----
 net/core/net-procfs.c     |  9 ++++++---
 3 files changed, 15 insertions(+), 7 deletions(-)

diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h
index 6a14b7b11766..5736311a2133 100644
--- a/include/linux/netdevice.h
+++ b/include/linux/netdevice.h
@@ -3157,6 +3157,7 @@ struct softnet_data {
 	/* stats */
 	unsigned int		processed;
 	unsigned int		time_squeeze;
+	unsigned int		budget_squeeze;
 #ifdef CONFIG_RPS
 	struct softnet_data	*rps_ipi_list;
 #endif
diff --git a/net/core/dev.c b/net/core/dev.c
index 253584777101..1518a366783b 100644
--- a/net/core/dev.c
+++ b/net/core/dev.c
@@ -6637,6 +6637,7 @@ static __latent_entropy void net_rx_action(struct softirq_action *h)
 	unsigned long time_limit = jiffies +
 		usecs_to_jiffies(READ_ONCE(netdev_budget_usecs));
 	int budget = READ_ONCE(netdev_budget);
+	bool done = false;
 	LIST_HEAD(list);
 	LIST_HEAD(repoll);
 
@@ -6644,7 +6645,7 @@ static __latent_entropy void net_rx_action(struct softirq_action *h)
 	list_splice_init(&sd->poll_list, &list);
 	local_irq_enable();
 
-	for (;;) {
+	while (!done) {
 		struct napi_struct *n;
 
 		skb_defer_free_flush(sd);
@@ -6662,10 +6663,13 @@ static __latent_entropy void net_rx_action(struct softirq_action *h)
 		 * Allow this to run for 2 jiffies since which will allow
 		 * an average latency of 1.5/HZ.
 		 */
-		if (unlikely(budget <= 0 ||
-			     time_after_eq(jiffies, time_limit))) {
+		if (unlikely(budget <= 0)) {
+			sd->budget_squeeze++;
+			done = true;
+		}
+		if (unlikely(time_after_eq(jiffies, time_limit))) {
 			sd->time_squeeze++;
-			break;
+			done = true;
 		}
 	}
 
diff --git a/net/core/net-procfs.c b/net/core/net-procfs.c
index 09f7ed1a04e8..b748e85952b0 100644
--- a/net/core/net-procfs.c
+++ b/net/core/net-procfs.c
@@ -158,6 +158,8 @@ static int softnet_seq_show(struct seq_file *seq, void *v)
 	struct softnet_data *sd = v;
 	u32 input_qlen = softnet_input_pkt_queue_len(sd);
 	u32 process_qlen = softnet_process_queue_len(sd);
+	unsigned int budget_sq = sd->budget_squeeze;
+	unsigned int time_sq = sd->time_squeeze;
 	unsigned int flow_limit_count = 0;
 
 #ifdef CONFIG_NET_FLOW_LIMIT
@@ -176,13 +178,14 @@ static int softnet_seq_show(struct seq_file *seq, void *v)
 	 */
 	seq_printf(seq,
 		   "%08x %08x %08x %08x %08x %08x %08x %08x %08x %08x %08x %08x %08x "
-		   "%08x %08x\n",
-		   sd->processed, sd->dropped, sd->time_squeeze, 0,
+		   "%08x %08x %08x %08x\n",
+		   sd->processed, sd->dropped, time_sq + budget_sq, 0,
 		   0, 0, 0, 0, /* was fastroute */
 		   0,	/* was cpu_collision */
 		   sd->received_rps, flow_limit_count,
 		   input_qlen + process_qlen, (int)seq->index,
-		   input_qlen, process_qlen);
+		   input_qlen, process_qlen,
+		   time_sq, budget_sq);
 	return 0;
 }
 
-- 
2.37.3