[PATCH 4/4] mm/vmstat: do not refresh stats for nohz_full CPUs

Marcelo Tosatti <mtosatti@xxxxxxxxxx> · Tue, 30 May 2023 11:52:38 -0300

The interruption caused by queueing work on nohz_full CPUs 
is undesirable for certain aplications.

Fix by not refreshing per-CPU stats of nohz_full CPUs. 

Signed-off-by: Marcelo Tosatti <mtosatti@xxxxxxxxxx>

---

Index: linux-vmstat-remote/mm/vmstat.c
===================================================================

--- linux-vmstat-remote.orig/mm/vmstat.c
+++ linux-vmstat-remote/mm/vmstat.c
@@ -1877,12 +1877,31 @@ static void refresh_vm_stats(struct work
 	refresh_cpu_vm_stats(true);
 }
 
+#ifdef CONFIG_NO_HZ_FULL
+static inline const cpumask_t *tickless_cpumask(void)
+{
+	return tick_nohz_full_mask;
+}
+#else
+static cpumask_t empty_cpumask;
+static inline const cpumask_t *tickless_cpumask(void)
+{
+	return &empty_cpumask;
+}
+#endif
+
 int vmstat_refresh(struct ctl_table *table, int write,
 		   void *buffer, size_t *lenp, loff_t *ppos)
 {
 	long val;
 	int err;
 	int i;
+	cpumask_var_t dstmask;
+
+	if (!alloc_cpumask_var(&dstmask, GFP_KERNEL))
+		return -ENOMEM;
+
+	cpumask_andnot(dstmask, cpu_possible_mask, tickless_cpumask());
 
 	/*
 	 * The regular update, every sysctl_stat_interval, may come later
@@ -1896,7 +1915,9 @@ int vmstat_refresh(struct ctl_table *tab
 	 * transiently negative values, report an error here if any of
 	 * the stats is negative, so we know to go looking for imbalance.
 	 */
-	err = schedule_on_each_cpu(refresh_vm_stats);
+	err = schedule_on_each_cpumask(refresh_vm_stats, dstmask);
+	free_cpumask_var(dstmask);
+
 	if (err)
 		return err;
 	for (i = 0; i < NR_VM_ZONE_STAT_ITEMS; i++) {