On Thu 21-07-22 08:49:57, Tetsuo Handa wrote: > syzbot is reporting fs_reclaim allocation with oom_lock held [1]. We > must make sure that such allocation won't hit __alloc_pages_may_oom() > path which will retry forever if oom_lock is already held. > > I choose GFP_ATOMIC than GFP_NOWAIT, for since global OOM situation will > likely be avoided by killing some process in memcg, and memory will be > released after printk(), trying a little hard will be acceptable. Nope, this is not a proper fix. You are making memory.stat more likely to fail. An uncoditional GFP_KERNEL allocation is certainly not good but is there any reason to not use GFP_NOIO instead? Or even better. In an ideal world we won't allocate from here at all. Can we pre-allocate that single page and re-use it for the oom report? --- diff --git a/mm/memcontrol.c b/mm/memcontrol.c index abec50f31fe6..13483cb278bb 100644 --- a/mm/memcontrol.c +++ b/mm/memcontrol.c @@ -1460,14 +1460,12 @@ static inline unsigned long memcg_page_state_output(struct mem_cgroup *memcg, return memcg_page_state(memcg, item) * memcg_page_state_unit(item); } -static char *memory_stat_format(struct mem_cgroup *memcg) +void memory_stat_format(struct mem_cgroup *memcg, char *buf) { struct seq_buf s; int i; - seq_buf_init(&s, kmalloc(PAGE_SIZE, GFP_KERNEL), PAGE_SIZE); - if (!s.buffer) - return NULL; + seq_buf_init(&s, buf, PAGE_SIZE); /* * Provide statistics on the state of the memory subsystem as @@ -1533,8 +1531,6 @@ static char *memory_stat_format(struct mem_cgroup *memcg) /* The above should easily fit into one page */ WARN_ON_ONCE(seq_buf_has_overflowed(&s)); - - return s.buffer; } #define K(x) ((x) << (PAGE_SHIFT-10)) @@ -1563,6 +1559,12 @@ void mem_cgroup_print_oom_context(struct mem_cgroup *memcg, struct task_struct * rcu_read_unlock(); } +/* + * preallocated buffer to collect memory stats for the oom situation. + * Usage protected by oom_lock + */ +char oombuf[PAGE_SIZE]; + /** * mem_cgroup_print_oom_meminfo: Print OOM memory information relevant to * memory controller. @@ -1570,7 +1572,7 @@ void mem_cgroup_print_oom_context(struct mem_cgroup *memcg, struct task_struct * */ void mem_cgroup_print_oom_meminfo(struct mem_cgroup *memcg) { - char *buf; + lockdep_assert_held(&oom_lock); pr_info("memory: usage %llukB, limit %llukB, failcnt %lu\n", K((u64)page_counter_read(&memcg->memory)), @@ -1591,11 +1593,8 @@ void mem_cgroup_print_oom_meminfo(struct mem_cgroup *memcg) pr_info("Memory cgroup stats for "); pr_cont_cgroup_path(memcg->css.cgroup); pr_cont(":"); - buf = memory_stat_format(memcg); - if (!buf) - return; - pr_info("%s", buf); - kfree(buf); + memory_stat_format(memcg, oombuf); + pr_info("%s", oombuf); } /* @@ -6335,11 +6334,11 @@ static int memory_events_local_show(struct seq_file *m, void *v) static int memory_stat_show(struct seq_file *m, void *v) { struct mem_cgroup *memcg = mem_cgroup_from_seq(m); - char *buf; + char *buf = kmalloc(PAGE_SIZE, GFP_KERNEL); - buf = memory_stat_format(memcg); if (!buf) return -ENOMEM; + memory_stat_format(memcg, buf); seq_puts(m, buf); kfree(buf); return 0; -- Michal Hocko SUSE Labs