On Sat, Mar 24, 2018 at 09:08:59AM -0700, Tejun Heo wrote: > @@ -91,6 +91,9 @@ struct mem_cgroup_stat_cpu { > unsigned long events[MEMCG_NR_EVENTS]; > unsigned long nr_page_events; > unsigned long targets[MEM_CGROUP_NTARGETS]; > + > + /* for cgroup rstat delta calculation */ > + unsigned long last_events[MEMCG_NR_EVENTS]; > }; > > struct mem_cgroup_reclaim_iter { > @@ -233,7 +236,11 @@ struct mem_cgroup { > > struct mem_cgroup_stat_cpu __percpu *stat_cpu; > atomic_long_t stat[MEMCG_NR_STAT]; > - atomic_long_t events[MEMCG_NR_EVENTS]; > + > + /* events is managed by cgroup rstat */ > + unsigned long long events[MEMCG_NR_EVENTS]; /* local */ > + unsigned long long tree_events[MEMCG_NR_EVENTS]; /* subtree */ > + unsigned long long pending_events[MEMCG_NR_EVENTS];/* propagation */ The lazy updates are neat, but I'm a little concerned at the memory footprint. On a 64-cpu machine for example, this adds close to 9000 words to struct mem_cgroup. And we really only need the accuracy for the 4 cgroup items in memory.events, not all VM events and stats. Why not restrict the patch to those? It would also get rid of the weird sharing between VM and cgroup enums.