Re: Regression in workingset_refault latency on 5.15

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

 



On Wed, Mar 02, 2022 at 02:33:41PM -0800, Ivan Babrou wrote:
On Tue, Mar 1, 2022 at 7:40 PM Ivan Babrou <ivan@xxxxxxxxxxxxxx> wrote:
[...]

Please try the following patch:


diff --git a/include/linux/memcontrol.h b/include/linux/memcontrol.h
index d9b8df5ef212..274e4ecff534 100644
--- a/include/linux/memcontrol.h
+++ b/include/linux/memcontrol.h
@@ -1002,6 +1002,7 @@ static inline unsigned long lruvec_page_state_local(struct lruvec *lruvec,
 }

 void mem_cgroup_flush_stats(void);
+void mem_cgroup_flush_stats_if_late(void);

void __mod_memcg_lruvec_state(struct lruvec *lruvec, enum node_stat_item idx,
 			      int val);
@@ -1422,6 +1423,10 @@ static inline void mem_cgroup_flush_stats(void)
 {
 }

+static inline void mem_cgroup_flush_stats_if_late(void)
+{
+}
+
 static inline void __mod_memcg_lruvec_state(struct lruvec *lruvec,
 					    enum node_stat_item idx, int val)
 {
diff --git a/mm/memcontrol.c b/mm/memcontrol.c
index 32ba963ebf2e..00c924e25ce8 100644
--- a/mm/memcontrol.c
+++ b/mm/memcontrol.c
@@ -650,6 +650,9 @@ static DECLARE_DEFERRABLE_WORK(stats_flush_dwork, flush_memcg_stats_dwork);
 static DEFINE_SPINLOCK(stats_flush_lock);
 static DEFINE_PER_CPU(unsigned int, stats_updates);
 static atomic_t stats_flush_threshold = ATOMIC_INIT(0);
+static u64 flush_next_time;
+
+#define FLUSH_TIME (2UL*HZ)

 static inline void memcg_rstat_updated(struct mem_cgroup *memcg, int val)
 {
@@ -671,6 +674,7 @@ static void __mem_cgroup_flush_stats(void)
 	if (!spin_trylock_irqsave(&stats_flush_lock, flag))
 		return;

+	flush_next_time = jiffies_64 + 2*FLUSH_TIME;
 	cgroup_rstat_flush_irqsafe(root_mem_cgroup->css.cgroup);
 	atomic_set(&stats_flush_threshold, 0);
 	spin_unlock_irqrestore(&stats_flush_lock, flag);
@@ -682,10 +686,16 @@ void mem_cgroup_flush_stats(void)
 		__mem_cgroup_flush_stats();
 }

+void mem_cgroup_flush_stats_if_late(void)
+{
+	if (time_after64(jiffies_64, flush_next_time))
+		mem_cgroup_flush_stats();
+}
+
 static void flush_memcg_stats_dwork(struct work_struct *w)
 {
 	__mem_cgroup_flush_stats();
-	queue_delayed_work(system_unbound_wq, &stats_flush_dwork, 2UL*HZ);
+	queue_delayed_work(system_unbound_wq, &stats_flush_dwork, FLUSH_TIME);
 }

 /**
@@ -4518,7 +4528,7 @@ void mem_cgroup_wb_stats(struct bdi_writeback *wb, unsigned long *pfilepages,
 	struct mem_cgroup *memcg = mem_cgroup_from_css(wb->memcg_css);
 	struct mem_cgroup *parent;

-	mem_cgroup_flush_stats();
+	mem_cgroup_flush_stats_if_late();

 	*pdirty = memcg_page_state(memcg, NR_FILE_DIRTY);
 	*pwriteback = memcg_page_state(memcg, NR_WRITEBACK);
diff --git a/mm/vmscan.c b/mm/vmscan.c
index 74296c2d1fed..fd279621c3fc 100644
--- a/mm/vmscan.c
+++ b/mm/vmscan.c
@@ -3041,7 +3041,7 @@ static void shrink_node(pg_data_t *pgdat, struct scan_control *sc)
 	 * Flush the memory cgroup stats, so that we read accurate per-memcg
 	 * lruvec stats for heuristics.
 	 */
-	mem_cgroup_flush_stats();
+	mem_cgroup_flush_stats_if_late();

 	memset(&sc->nr, 0, sizeof(sc->nr));

diff --git a/mm/workingset.c b/mm/workingset.c
index d5b81e4f4cbe..7edc52037e38 100644
--- a/mm/workingset.c
+++ b/mm/workingset.c
@@ -352,7 +352,7 @@ void workingset_refault(struct page *page, void *shadow)

 	inc_lruvec_state(lruvec, WORKINGSET_REFAULT_BASE + file);

-	mem_cgroup_flush_stats();
+	mem_cgroup_flush_stats_if_late();
 	/*
 	 * Compare the distance to the existing workingset size. We
 	 * don't activate pages that couldn't stay resident even if




[Index of Archives]     [Linux ARM Kernel]     [Linux ARM]     [Linux Omap]     [Fedora ARM]     [IETF Annouce]     [Bugtraq]     [Linux OMAP]     [Linux MIPS]     [eCos]     [Asterisk Internet PBX]     [Linux API]

  Powered by Linux