On Wed, May 3, 2023 at 1:50 AM Michal Hocko <mhocko@xxxxxxxx> wrote: > > On Fri 28-04-23 13:24:06, Yosry Ahmed wrote: > > Commit c8713d0b2312 ("mm: memcontrol: dump memory.stat during cgroup > > OOM") made sure we dump all the stats in memory.stat during a cgroup > > OOM, but it also introduced a slight behavioral change. The code used to > > print the non-hierarchical v1 cgroup stats for the entire cgroup > > subtree, now it only prints the v2 cgroup stats for the cgroup under > > OOM. > > > > For cgroup v1 users, this introduces a few problems: > > (a) The non-hierarchical stats of the memcg under OOM are no longer > > shown. > > (b) A couple of v1-only stats (e.g. pgpgin, pgpgout) are no longer > > shown. > > (c) We show the list of cgroup v2 stats, even in cgroup v1. This list of > > stats is not tracked with v1 in mind. While most of the stats seem to be > > working on v1, there may be some stats that are not fully or correctly > > tracked. > > > > Although OOM log is not set in stone, we should not change it for no > > reason. When upgrading the kernel version to a version including > > commit c8713d0b2312 ("mm: memcontrol: dump memory.stat during cgroup > > OOM"), these behavioral changes are noticed in cgroup v1. > > > > The fix is simple. Commit c8713d0b2312 ("mm: memcontrol: dump memory.stat > > during cgroup OOM") separated stats formatting from stats display for > > v2, to reuse the stats formatting in the OOM logs. Do the same for v1. > > > > Move the v2 specific formatting from memory_stat_format() to > > memcg_stat_format(), add memcg1_stat_format() for v1, and make > > memory_stat_format() select between them based on cgroup version. > > Since memory_stat_show() now works for both v1 & v2, drop > > memcg_stat_show(). > > > > Signed-off-by: Yosry Ahmed <yosryahmed@xxxxxxxxxx> > > Acked-by: Michal Hocko <mhocko@xxxxxxxx> > Thanks Thanks Michal! > > > --- > > mm/memcontrol.c | 60 ++++++++++++++++++++++++++++--------------------- > > 1 file changed, 35 insertions(+), 25 deletions(-) > > > > diff --git a/mm/memcontrol.c b/mm/memcontrol.c > > index 5922940f92c9..2b492f8d540c 100644 > > --- a/mm/memcontrol.c > > +++ b/mm/memcontrol.c > > @@ -1551,7 +1551,7 @@ static inline unsigned long memcg_page_state_output(struct mem_cgroup *memcg, > > return memcg_page_state(memcg, item) * memcg_page_state_unit(item); > > } > > > > -static void memory_stat_format(struct mem_cgroup *memcg, struct seq_buf *s) > > +static void memcg_stat_format(struct mem_cgroup *memcg, struct seq_buf *s) > > { > > int i; > > > > @@ -1604,6 +1604,17 @@ static void memory_stat_format(struct mem_cgroup *memcg, struct seq_buf *s) > > WARN_ON_ONCE(seq_buf_has_overflowed(s)); > > } > > > > +static void memcg1_stat_format(struct mem_cgroup *memcg, struct seq_buf *s); > > + > > +static void memory_stat_format(struct mem_cgroup *memcg, struct seq_buf *s) > > +{ > > + if (cgroup_subsys_on_dfl(memory_cgrp_subsys)) > > + memcg_stat_format(memcg, s); > > + else > > + memcg1_stat_format(memcg, s); > > + WARN_ON_ONCE(seq_buf_has_overflowed(s)); > > +} > > + > > #define K(x) ((x) << (PAGE_SHIFT-10)) > > /** > > * mem_cgroup_print_oom_context: Print OOM information relevant to > > @@ -4078,9 +4089,8 @@ static const unsigned int memcg1_events[] = { > > PGMAJFAULT, > > }; > > > > -static int memcg_stat_show(struct seq_file *m, void *v) > > +static void memcg1_stat_format(struct mem_cgroup *memcg, struct seq_buf *s) > > { > > - struct mem_cgroup *memcg = mem_cgroup_from_seq(m); > > unsigned long memory, memsw; > > struct mem_cgroup *mi; > > unsigned int i; > > @@ -4095,18 +4105,18 @@ static int memcg_stat_show(struct seq_file *m, void *v) > > if (memcg1_stats[i] == MEMCG_SWAP && !do_memsw_account()) > > continue; > > nr = memcg_page_state_local(memcg, memcg1_stats[i]); > > - seq_printf(m, "%s %lu\n", memcg1_stat_names[i], > > + seq_buf_printf(s, "%s %lu\n", memcg1_stat_names[i], > > nr * memcg_page_state_unit(memcg1_stats[i])); > > } > > > > for (i = 0; i < ARRAY_SIZE(memcg1_events); i++) > > - seq_printf(m, "%s %lu\n", vm_event_name(memcg1_events[i]), > > - memcg_events_local(memcg, memcg1_events[i])); > > + seq_buf_printf(s, "%s %lu\n", vm_event_name(memcg1_events[i]), > > + memcg_events_local(memcg, memcg1_events[i])); > > > > for (i = 0; i < NR_LRU_LISTS; i++) > > - seq_printf(m, "%s %lu\n", lru_list_name(i), > > - memcg_page_state_local(memcg, NR_LRU_BASE + i) * > > - PAGE_SIZE); > > + seq_buf_printf(s, "%s %lu\n", lru_list_name(i), > > + memcg_page_state_local(memcg, NR_LRU_BASE + i) * > > + PAGE_SIZE); > > > > /* Hierarchical information */ > > memory = memsw = PAGE_COUNTER_MAX; > > @@ -4114,11 +4124,11 @@ static int memcg_stat_show(struct seq_file *m, void *v) > > memory = min(memory, READ_ONCE(mi->memory.max)); > > memsw = min(memsw, READ_ONCE(mi->memsw.max)); > > } > > - seq_printf(m, "hierarchical_memory_limit %llu\n", > > - (u64)memory * PAGE_SIZE); > > + seq_buf_printf(s, "hierarchical_memory_limit %llu\n", > > + (u64)memory * PAGE_SIZE); > > if (do_memsw_account()) > > - seq_printf(m, "hierarchical_memsw_limit %llu\n", > > - (u64)memsw * PAGE_SIZE); > > + seq_buf_printf(s, "hierarchical_memsw_limit %llu\n", > > + (u64)memsw * PAGE_SIZE); > > > > for (i = 0; i < ARRAY_SIZE(memcg1_stats); i++) { > > unsigned long nr; > > @@ -4126,19 +4136,19 @@ static int memcg_stat_show(struct seq_file *m, void *v) > > if (memcg1_stats[i] == MEMCG_SWAP && !do_memsw_account()) > > continue; > > nr = memcg_page_state(memcg, memcg1_stats[i]); > > - seq_printf(m, "total_%s %llu\n", memcg1_stat_names[i], > > + seq_buf_printf(s, "total_%s %llu\n", memcg1_stat_names[i], > > (u64)nr * memcg_page_state_unit(memcg1_stats[i])); > > } > > > > for (i = 0; i < ARRAY_SIZE(memcg1_events); i++) > > - seq_printf(m, "total_%s %llu\n", > > - vm_event_name(memcg1_events[i]), > > - (u64)memcg_events(memcg, memcg1_events[i])); > > + seq_buf_printf(s, "total_%s %llu\n", > > + vm_event_name(memcg1_events[i]), > > + (u64)memcg_events(memcg, memcg1_events[i])); > > > > for (i = 0; i < NR_LRU_LISTS; i++) > > - seq_printf(m, "total_%s %llu\n", lru_list_name(i), > > - (u64)memcg_page_state(memcg, NR_LRU_BASE + i) * > > - PAGE_SIZE); > > + seq_buf_printf(s, "total_%s %llu\n", lru_list_name(i), > > + (u64)memcg_page_state(memcg, NR_LRU_BASE + i) * > > + PAGE_SIZE); > > > > #ifdef CONFIG_DEBUG_VM > > { > > @@ -4153,12 +4163,10 @@ static int memcg_stat_show(struct seq_file *m, void *v) > > anon_cost += mz->lruvec.anon_cost; > > file_cost += mz->lruvec.file_cost; > > } > > - seq_printf(m, "anon_cost %lu\n", anon_cost); > > - seq_printf(m, "file_cost %lu\n", file_cost); > > + seq_buf_printf(s, "anon_cost %lu\n", anon_cost); > > + seq_buf_printf(s, "file_cost %lu\n", file_cost); > > } > > #endif > > - > > - return 0; > > } > > > > static u64 mem_cgroup_swappiness_read(struct cgroup_subsys_state *css, > > @@ -4998,6 +5006,8 @@ static int mem_cgroup_slab_show(struct seq_file *m, void *p) > > } > > #endif > > > > +static int memory_stat_show(struct seq_file *m, void *v); > > + > > static struct cftype mem_cgroup_legacy_files[] = { > > { > > .name = "usage_in_bytes", > > @@ -5030,7 +5040,7 @@ static struct cftype mem_cgroup_legacy_files[] = { > > }, > > { > > .name = "stat", > > - .seq_show = memcg_stat_show, > > + .seq_show = memory_stat_show, > > }, > > { > > .name = "force_empty", > > -- > > 2.40.1.495.gc816e09b53d-goog > > -- > Michal Hocko > SUSE Labs