On Fri, 10 Sep 2021 09:08:42 +0800 Feng Tang wrote: > On Thu, Sep 09, 2021 at 05:43:40PM -0700, Shakeel Butt wrote: > > > > Feng, is it possible for you to run these benchmarks with the change > > (basically changing MEMCG_CHARGE_BATCH to 128 in the if condition > > before queue_work() inside __mod_memcg_lruvec_state())? > > When I checked this, I tried different changes, including this batch > number change :), but it didn't recover the regression (the regression > is slightly reduced to about 12%) > > Please check if my patch is what you want to test: > > diff --git a/mm/memcontrol.c b/mm/memcontrol.c > index 4d8c9af..a50a69a 100644 > --- a/mm/memcontrol.c > +++ b/mm/memcontrol.c > @@ -682,7 +682,8 @@ void __mod_memcg_lruvec_state(struct lruvec *lruvec, enum node_stat_item idx, > > /* Update lruvec */ > __this_cpu_add(pn->lruvec_stats_percpu->state[idx], val); > - if (!(__this_cpu_inc_return(stats_flush_threshold) % MEMCG_CHARGE_BATCH)) > +// if (!(__this_cpu_inc_return(stats_flush_threshold) % MEMCG_CHARGE_BATCH)) > + if (!(__this_cpu_inc_return(stats_flush_threshold) % 128)) > queue_work(system_unbound_wq, &stats_flush_work); > } Hi Feng, Would you please check if it helps fix the regression to avoid queuing a queued work by adding and checking an atomic counter. Hillf --- x/mm/memcontrol.c +++ y/mm/memcontrol.c @@ -108,6 +108,7 @@ static void flush_memcg_stats_dwork(stru static DECLARE_DEFERRABLE_WORK(stats_flush_dwork, flush_memcg_stats_dwork); static void flush_memcg_stats_work(struct work_struct *w); static DECLARE_WORK(stats_flush_work, flush_memcg_stats_work); +static atomic_t sfwork_queued; static DEFINE_PER_CPU(unsigned int, stats_flush_threshold); static DEFINE_SPINLOCK(stats_flush_lock); @@ -660,8 +661,13 @@ void __mod_memcg_lruvec_state(struct lru /* Update lruvec */ __this_cpu_add(pn->lruvec_stats_percpu->state[idx], val); - if (!(__this_cpu_inc_return(stats_flush_threshold) % MEMCG_CHARGE_BATCH)) - queue_work(system_unbound_wq, &stats_flush_work); + if (!(__this_cpu_inc_return(stats_flush_threshold) % + MEMCG_CHARGE_BATCH)) { + int queued = atomic_read(&sfwork_queued); + + if (!queued && atomic_try_cmpxchg(&sfwork_queued, &queued, 1)) + queue_work(system_unbound_wq, &stats_flush_work); + } } /** @@ -5376,6 +5382,7 @@ static void flush_memcg_stats_dwork(stru static void flush_memcg_stats_work(struct work_struct *w) { mem_cgroup_flush_stats(); + atomic_dec(&sfwork_queued); } static void mem_cgroup_css_rstat_flush(struct cgroup_subsys_state *css, int cpu)