On Fri, Feb 25, 2022 at 10:08:04AM -0800, Ivan Babrou wrote:
On Fri, Feb 25, 2022 at 10:03 AM Michal Koutn� <mkoutny@xxxxxxxx> wrote:
> BTW how many levels deep is the affected memory cgroup hierarchy (where
> the workingset_refault happens)? (Self answer: Probably less than
> nr_cpus*MEMCG_BATCH, so not relevant.)
It's /system.slice/thingy.service (a regular systemd unit on
cgroupv2), so not very deep.
Hi Ivan & Daniel,
Can you please try the following patch but without the async patch and
let us know if there is any difference in regression you are observing?
diff --git a/include/linux/memcontrol.h b/include/linux/memcontrol.h
index 0abbd685703b..59014d20149a 100644
--- a/include/linux/memcontrol.h
+++ b/include/linux/memcontrol.h
@@ -999,6 +999,7 @@ static inline unsigned long
lruvec_page_state_local(struct lruvec *lruvec,
}
void mem_cgroup_flush_stats(void);
+void mem_cgroup_flush_stats_memcg(struct mem_cgroup *memcg);
void __mod_memcg_lruvec_state(struct lruvec *lruvec, enum node_stat_item
idx,
int val);
diff --git a/mm/memcontrol.c b/mm/memcontrol.c
index 36e9f38c919d..ae59bfc8788a 100644
--- a/mm/memcontrol.c
+++ b/mm/memcontrol.c
@@ -642,14 +642,14 @@ static inline void memcg_rstat_updated(struct
mem_cgroup *memcg, int val)
}
}
-static void __mem_cgroup_flush_stats(void)
+static void __mem_cgroup_flush_stats(struct mem_cgroup *memcg)
{
unsigned long flag;
if (!spin_trylock_irqsave(&stats_flush_lock, flag))
return;
- cgroup_rstat_flush_irqsafe(root_mem_cgroup->css.cgroup);
+ cgroup_rstat_flush_irqsafe(memcg->css.cgroup);
atomic_set(&stats_flush_threshold, 0);
spin_unlock_irqrestore(&stats_flush_lock, flag);
}
@@ -657,12 +657,18 @@ static void __mem_cgroup_flush_stats(void)
void mem_cgroup_flush_stats(void)
{
if (atomic_read(&stats_flush_threshold) > num_online_cpus())
- __mem_cgroup_flush_stats();
+ __mem_cgroup_flush_stats(root_mem_cgroup);
+}
+
+void mem_cgroup_flush_stats_memcg(struct mem_cgroup *memcg)
+{
+ if (atomic_read(&stats_flush_threshold) > num_online_cpus())
+ __mem_cgroup_flush_stats(memcg);
}
static void flush_memcg_stats_dwork(struct work_struct *w)
{
- __mem_cgroup_flush_stats();
+ __mem_cgroup_flush_stats(root_mem_cgroup);
queue_delayed_work(system_unbound_wq, &stats_flush_dwork, 2UL*HZ);
}
diff --git a/mm/workingset.c b/mm/workingset.c
index 8c03afe1d67c..9b9ebc5e5110 100644
--- a/mm/workingset.c
+++ b/mm/workingset.c
@@ -354,7 +354,7 @@ void workingset_refault(struct folio *folio, void
*shadow)
mod_lruvec_state(lruvec, WORKINGSET_REFAULT_BASE + file, nr);
- mem_cgroup_flush_stats();
+ mem_cgroup_flush_stats_memcg(eviction_memcg);
/*
* Compare the distance to the existing workingset size. We
* don't activate pages that couldn't stay resident even if