For fast cpus on slow disks, yielding the cpus repeatedly with PR_MEMACT_SLOWDOWN may not be able to slow down memory allocation enough for memory reclaim to catch up. In case a large memory block is mmap'ed and the pages are faulted in one-by-one, the syscall delays won't be activated during this process. To be safe, an additional variable delay of 20-5000 us will be added to __mem_cgroup_over_high_action() if the excess memory used is more than 1/256 of the memory limit. Signed-off-by: Waiman Long <longman@xxxxxxxxxx> --- mm/memcontrol.c | 22 ++++++++++++++++------ 1 file changed, 16 insertions(+), 6 deletions(-) diff --git a/mm/memcontrol.c b/mm/memcontrol.c index 6488f8a10d66..bddf3e659469 100644 --- a/mm/memcontrol.c +++ b/mm/memcontrol.c @@ -2643,11 +2643,10 @@ get_rss_counter(struct mm_struct *mm, int mm_bit, u16 flags, int rss_bit) static bool __mem_cgroup_over_high_action(struct mem_cgroup *memcg, u8 action, u16 flags) { - unsigned long mem = 0; + unsigned long mem = 0, limit = 0, excess = 0; bool ret = false; struct mm_struct *mm = get_task_mm(current); u8 signal = READ_ONCE(current->memcg_over_high_signal); - u32 limit; if (!mm) return true; /* No more check is needed */ @@ -2657,9 +2656,10 @@ static bool __mem_cgroup_over_high_action(struct mem_cgroup *memcg, u8 action, if (memcg) { mem = page_counter_read(&memcg->memory); - limit = READ_ONCE(current->memcg_over_high_climit); - if (mem <= memcg->memory.high + limit) + limit = READ_ONCE(current->memcg_over_high_climit) + memcg->memory.high; + if (mem <= limit) goto out; + excess = mem - limit; } /* @@ -2676,6 +2676,7 @@ static bool __mem_cgroup_over_high_action(struct mem_cgroup *memcg, u8 action, limit = READ_ONCE(current->memcg_over_high_plimit); if (mem <= limit) goto out; + excess = mem - limit; } ret = true; @@ -2685,10 +2686,19 @@ static bool __mem_cgroup_over_high_action(struct mem_cgroup *memcg, u8 action, break; case PR_MEMACT_SLOWDOWN: /* - * Slow down by yielding the cpu & adding delay to - * memory allocation syscalls. + * Slow down by yielding the cpu & adding delay to memory + * allocation syscalls. + * + * An additional 20-5000 us of delay is added in case the + * excess memory is more than 1/256 of the limit. */ WRITE_ONCE(current->memcg_over_limit, true); + limit >>= 8; + if (limit && (excess > limit)) { + int delay = min(5000UL, excess/limit * 20UL); + + udelay(delay); + } set_tsk_need_resched(current); set_preempt_need_resched(); break; -- 2.18.1