From: Zeng Jingxiang <linuszeng@xxxxxxxxxxx> memsw account is a very useful knob for container memory overcommitting: It's a great abstraction of the "expected total memory usage" of a container, so containers can't allocate too much memory using SWAP, but still be able to SWAP out. For a simple example, with memsw.limit == memory.limit, containers can't exceed their original memory limit, even with SWAP enabled, they get OOM killed as how they used to, but the host is now able to offload cold pages. Similar ability seems absent with V2: With memory.swap.max == 0, the host can't use SWAP to reclaim container memory at all. But with a value larger than that, containers are able to overuse memory, causing delayed OOM kill, thrashing, CPU/Memory usage ratio could be heavily out of balance, especially with compress SWAP backends. This patch restores the semantics of memory.swap.max to be consistent with memory.memsw.limit_in_bytes and the semantics of memory.swap.current to be consistent with memory.memsw.usage_in_bytes when MEMSW_ACCOUNT_ON_DFL config or cgroup.memsw_account_on_dfl startup parameter is enabled. Signed-off-by: Zeng Jingxiang <linuszeng@xxxxxxxxxxx> --- mm/memcontrol-v1.c | 2 +- mm/memcontrol-v1.h | 4 +++- mm/memcontrol.c | 29 +++++++++++++++++++---------- 3 files changed, 23 insertions(+), 12 deletions(-) diff --git a/mm/memcontrol-v1.c b/mm/memcontrol-v1.c index c1feb3945350..3344d5e25822 100644 --- a/mm/memcontrol-v1.c +++ b/mm/memcontrol-v1.c @@ -1436,7 +1436,7 @@ void memcg1_oom_finish(struct mem_cgroup *memcg, bool locked) static DEFINE_MUTEX(memcg_max_mutex); -static int mem_cgroup_resize_max(struct mem_cgroup *memcg, +int mem_cgroup_resize_max(struct mem_cgroup *memcg, unsigned long max, bool memsw) { bool enlarge = false; diff --git a/mm/memcontrol-v1.h b/mm/memcontrol-v1.h index 6358464bb416..7f7ef9f6d03e 100644 --- a/mm/memcontrol-v1.h +++ b/mm/memcontrol-v1.h @@ -36,10 +36,12 @@ struct mem_cgroup *mem_cgroup_id_get_online(struct mem_cgroup *memcg); /* Cgroup v1-specific declarations */ #ifdef CONFIG_MEMCG_V1 +int mem_cgroup_resize_max(struct mem_cgroup *memcg, + unsigned long max, bool memsw); /* Whether legacy memory+swap accounting is active */ static inline bool do_memsw_account(void) { - return !cgroup_subsys_on_dfl(memory_cgrp_subsys); + return !cgroup_subsys_on_dfl(memory_cgrp_subsys) || do_memsw_account_on_dfl(); } unsigned long memcg_events_local(struct mem_cgroup *memcg, int event); diff --git a/mm/memcontrol.c b/mm/memcontrol.c index 623ebf610946..d85699fa8a90 100644 --- a/mm/memcontrol.c +++ b/mm/memcontrol.c @@ -5205,9 +5205,12 @@ static ssize_t swap_max_write(struct kernfs_open_file *of, if (err) return err; - xchg(&memcg->swap.max, max); + if (do_memsw_account_on_dfl()) + err = mem_cgroup_resize_max(memcg, max, true); + else + xchg(&memcg->swap.max, max); - return nbytes; + return err ?: nbytes; } static int swap_events_show(struct seq_file *m, void *v) @@ -5224,24 +5227,28 @@ static int swap_events_show(struct seq_file *m, void *v) return 0; } -static struct cftype swap_files[] = { +static struct cftype swap_files_v1[] = { { .name = "swap.current", .flags = CFTYPE_NOT_ON_ROOT, .read_u64 = swap_current_read, }, - { - .name = "swap.high", - .flags = CFTYPE_NOT_ON_ROOT, - .seq_show = swap_high_show, - .write = swap_high_write, - }, { .name = "swap.max", .flags = CFTYPE_NOT_ON_ROOT, .seq_show = swap_max_show, .write = swap_max_write, }, + { } /* terminate */ +}; + +static struct cftype swap_files[] = { + { + .name = "swap.high", + .flags = CFTYPE_NOT_ON_ROOT, + .seq_show = swap_high_show, + .write = swap_high_write, + }, { .name = "swap.max.effective", .flags = CFTYPE_NOT_ON_ROOT, @@ -5473,7 +5480,9 @@ static int __init mem_cgroup_swap_init(void) if (mem_cgroup_disabled()) return 0; - WARN_ON(cgroup_add_dfl_cftypes(&memory_cgrp_subsys, swap_files)); + WARN_ON(cgroup_add_dfl_cftypes(&memory_cgrp_subsys, swap_files_v1)); + if (!do_memsw_account_on_dfl()) + WARN_ON(cgroup_add_dfl_cftypes(&memory_cgrp_subsys, swap_files)); #ifdef CONFIG_MEMCG_V1 WARN_ON(cgroup_add_legacy_cftypes(&memory_cgrp_subsys, memsw_files)); #endif -- 2.41.1