Just to be explicit what I've had in mind. This hasn't been even compile tested but it should provide at least an idea where I am trying to go.. --- diff --git a/mm/memcontrol.c b/mm/memcontrol.c index d5f3a62887cf..91fa05372114 100644 --- a/mm/memcontrol.c +++ b/mm/memcontrol.c @@ -1528,26 +1528,36 @@ static void memcg_oom_recover(struct mem_cgroup *memcg) static void mem_cgroup_oom(struct mem_cgroup *memcg, gfp_t mask, int order) { - if (!current->memcg_may_oom) - return; /* * We are in the middle of the charge context here, so we * don't want to block when potentially sitting on a callstack * that holds all kinds of filesystem and mm locks. * - * Also, the caller may handle a failed allocation gracefully - * (like optional page cache readahead) and so an OOM killer - * invocation might not even be necessary. + * cgroup v1 allowes sync users space handling so we cannot afford + * to get stuck here for that configuration. That's why we don't do + * anything here except remember the OOM context and then deal with + * it at the end of the page fault when the stack is unwound, the + * locks are released, and when we know whether the fault was overall + * successful. * - * That's why we don't do anything here except remember the - * OOM context and then deal with it at the end of the page - * fault when the stack is unwound, the locks are released, - * and when we know whether the fault was overall successful. + * On the other hand, in-kernel OOM killer allows for an async victim + * memory reclaim (oom_reaper) and that means that we are not solely + * relying on the oom victim to make a forward progress so we can stay + * in the the try_charge context and keep retrying as long as there + * are oom victims to select. */ - css_get(&memcg->css); - current->memcg_in_oom = memcg; - current->memcg_oom_gfp_mask = mask; - current->memcg_oom_order = order; + if (memcg->oom_kill_disable) { + if (!current->memcg_may_oom) + return false; + css_get(&memcg->css); + current->memcg_in_oom = memcg; + current->memcg_oom_gfp_mask = mask; + current->memcg_oom_order = order; + + return false; + } + + return mem_cgroup_out_of_memory(memcg, mask, order); } /** @@ -2007,8 +2017,11 @@ static int try_charge(struct mem_cgroup *memcg, gfp_t gfp_mask, mem_cgroup_event(mem_over_limit, MEMCG_OOM); - mem_cgroup_oom(mem_over_limit, gfp_mask, - get_order(nr_pages * PAGE_SIZE)); + if (mem_cgroup_oom(mem_over_limit, gfp_mask, + get_order(nr_pages * PAGE_SIZE))) { + nr_retries = MEM_CGROUP_RECLAIM_RETRIES; + goto retry; + } nomem: if (!(gfp_mask & __GFP_NOFAIL)) return -ENOMEM; -- Michal Hocko SUSE Labs