From: Kaiyang Zhao <kaiyang2@xxxxxxxxxx> Add a memory.low for the top-tier node (locallow) and track its usage. locallow is set by scaling low by the ratio of node 0 capacity and node 0 + node 1 capacity. Signed-off-by: Kaiyang Zhao <kaiyang2@xxxxxxxxxx> --- include/linux/page_counter.h | 16 ++++++++--- mm/hugetlb_cgroup.c | 4 +-- mm/memcontrol.c | 42 ++++++++++++++++++++++------- mm/page_counter.c | 52 ++++++++++++++++++++++++++++-------- 4 files changed, 88 insertions(+), 26 deletions(-) diff --git a/include/linux/page_counter.h b/include/linux/page_counter.h index 79dbd8bc35a7..aa56c93415ef 100644 --- a/include/linux/page_counter.h +++ b/include/linux/page_counter.h @@ -13,6 +13,7 @@ struct page_counter { * memcg->memory.usage is a hot member of struct mem_cgroup. */ atomic_long_t usage; + struct mem_cgroup *memcg; /* memcg that owns this counter */ CACHELINE_PADDING(_pad1_); /* effective memory.min and memory.min usage tracking */ @@ -25,6 +26,10 @@ struct page_counter { atomic_long_t low_usage; atomic_long_t children_low_usage; + unsigned long elocallow; + atomic_long_t locallow_usage; + atomic_long_t children_locallow_usage; + unsigned long watermark; /* Latest cg2 reset watermark */ unsigned long local_watermark; @@ -36,6 +41,7 @@ struct page_counter { bool protection_support; unsigned long min; unsigned long low; + unsigned long locallow; unsigned long high; unsigned long max; struct page_counter *parent; @@ -52,12 +58,13 @@ struct page_counter { */ static inline void page_counter_init(struct page_counter *counter, struct page_counter *parent, - bool protection_support) + bool protection_support, struct mem_cgroup *memcg) { counter->usage = (atomic_long_t)ATOMIC_LONG_INIT(0); counter->max = PAGE_COUNTER_MAX; counter->parent = parent; counter->protection_support = protection_support; + counter->memcg = memcg; } static inline unsigned long page_counter_read(struct page_counter *counter) @@ -72,7 +79,8 @@ bool page_counter_try_charge(struct page_counter *counter, struct page_counter **fail); void page_counter_uncharge(struct page_counter *counter, unsigned long nr_pages); void page_counter_set_min(struct page_counter *counter, unsigned long nr_pages); -void page_counter_set_low(struct page_counter *counter, unsigned long nr_pages); +void page_counter_set_low(struct page_counter *counter, unsigned long nr_pages, + unsigned long nr_pages_local); static inline void page_counter_set_high(struct page_counter *counter, unsigned long nr_pages) @@ -99,11 +107,11 @@ static inline void page_counter_reset_watermark(struct page_counter *counter) #ifdef CONFIG_MEMCG void page_counter_calculate_protection(struct page_counter *root, struct page_counter *counter, - bool recursive_protection); + bool recursive_protection, int is_local); #else static inline void page_counter_calculate_protection(struct page_counter *root, struct page_counter *counter, - bool recursive_protection) {} + bool recursive_protection, int is_local) {} #endif #endif /* _LINUX_PAGE_COUNTER_H */ diff --git a/mm/hugetlb_cgroup.c b/mm/hugetlb_cgroup.c index d8d0e665caed..0e07a7a1d5b8 100644 --- a/mm/hugetlb_cgroup.c +++ b/mm/hugetlb_cgroup.c @@ -114,10 +114,10 @@ static void hugetlb_cgroup_init(struct hugetlb_cgroup *h_cgroup, } page_counter_init(hugetlb_cgroup_counter_from_cgroup(h_cgroup, idx), - fault_parent, false); + fault_parent, false, NULL); page_counter_init( hugetlb_cgroup_counter_from_cgroup_rsvd(h_cgroup, idx), - rsvd_parent, false); + rsvd_parent, false, NULL); limit = round_down(PAGE_COUNTER_MAX, pages_per_huge_page(&hstates[idx])); diff --git a/mm/memcontrol.c b/mm/memcontrol.c index 20b715441332..d7c5fff12105 100644 --- a/mm/memcontrol.c +++ b/mm/memcontrol.c @@ -1497,6 +1497,9 @@ static void memcg_stat_format(struct mem_cgroup *memcg, struct seq_buf *s) vm_event_name(memcg_vm_event_stat[i]), memcg_events(memcg, memcg_vm_event_stat[i])); } + + seq_buf_printf(s, "local_usage %lu\n", + get_cgroup_local_usage(memcg, true)); } static void memory_stat_format(struct mem_cgroup *memcg, struct seq_buf *s) @@ -3597,8 +3600,8 @@ mem_cgroup_css_alloc(struct cgroup_subsys_state *parent_css) if (parent) { WRITE_ONCE(memcg->swappiness, mem_cgroup_swappiness(parent)); - page_counter_init(&memcg->memory, &parent->memory, true); - page_counter_init(&memcg->swap, &parent->swap, false); + page_counter_init(&memcg->memory, &parent->memory, true, memcg); + page_counter_init(&memcg->swap, &parent->swap, false, NULL); #ifdef CONFIG_MEMCG_V1 WRITE_ONCE(memcg->oom_kill_disable, READ_ONCE(parent->oom_kill_disable)); page_counter_init(&memcg->kmem, &parent->kmem, false); @@ -3607,8 +3610,8 @@ mem_cgroup_css_alloc(struct cgroup_subsys_state *parent_css) } else { init_memcg_stats(); init_memcg_events(); - page_counter_init(&memcg->memory, NULL, true); - page_counter_init(&memcg->swap, NULL, false); + page_counter_init(&memcg->memory, NULL, true, memcg); + page_counter_init(&memcg->swap, NULL, false, NULL); #ifdef CONFIG_MEMCG_V1 page_counter_init(&memcg->kmem, NULL, false); page_counter_init(&memcg->tcpmem, NULL, false); @@ -3677,7 +3680,7 @@ static void mem_cgroup_css_offline(struct cgroup_subsys_state *css) memcg1_css_offline(memcg); page_counter_set_min(&memcg->memory, 0); - page_counter_set_low(&memcg->memory, 0); + page_counter_set_low(&memcg->memory, 0, 0); zswap_memcg_offline_cleanup(memcg); @@ -3748,7 +3751,7 @@ static void mem_cgroup_css_reset(struct cgroup_subsys_state *css) page_counter_set_max(&memcg->tcpmem, PAGE_COUNTER_MAX); #endif page_counter_set_min(&memcg->memory, 0); - page_counter_set_low(&memcg->memory, 0); + page_counter_set_low(&memcg->memory, 0, 0); page_counter_set_high(&memcg->memory, PAGE_COUNTER_MAX); memcg1_soft_limit_reset(memcg); page_counter_set_high(&memcg->swap, PAGE_COUNTER_MAX); @@ -4051,6 +4054,12 @@ static ssize_t memory_min_write(struct kernfs_open_file *of, return nbytes; } +static int memory_locallow_show(struct seq_file *m, void *v) +{ + return seq_puts_memcg_tunable(m, + READ_ONCE(mem_cgroup_from_seq(m)->memory.locallow)); +} + static int memory_low_show(struct seq_file *m, void *v) { return seq_puts_memcg_tunable(m, @@ -4061,7 +4070,8 @@ static ssize_t memory_low_write(struct kernfs_open_file *of, char *buf, size_t nbytes, loff_t off) { struct mem_cgroup *memcg = mem_cgroup_from_css(of_css(of)); - unsigned long low; + struct sysinfo si; + unsigned long low, locallow, local_capacity, total_capacity; int err; buf = strstrip(buf); @@ -4069,7 +4079,15 @@ static ssize_t memory_low_write(struct kernfs_open_file *of, if (err) return err; - page_counter_set_low(&memcg->memory, low); + /* Hardcoded 0 for local node and 1 for remote. */ + si_meminfo_node(&si, 0); + local_capacity = si.totalram; /* In pages. */ + total_capacity = local_capacity; + si_meminfo_node(&si, 1); + total_capacity += si.totalram; + locallow = low * local_capacity / total_capacity; + + page_counter_set_low(&memcg->memory, low, locallow); return nbytes; } @@ -4394,6 +4412,11 @@ static struct cftype memory_files[] = { .seq_show = memory_low_show, .write = memory_low_write, }, + { + .name = "locallow", + .flags = CFTYPE_NOT_ON_ROOT, + .seq_show = memory_locallow_show, + }, { .name = "high", .flags = CFTYPE_NOT_ON_ROOT, @@ -4483,7 +4506,8 @@ void mem_cgroup_calculate_protection(struct mem_cgroup *root, if (!root) root = root_mem_cgroup; - page_counter_calculate_protection(&root->memory, &memcg->memory, recursive_protection); + page_counter_calculate_protection(&root->memory, &memcg->memory, + recursive_protection, false); } static int charge_memcg(struct folio *folio, struct mem_cgroup *memcg, diff --git a/mm/page_counter.c b/mm/page_counter.c index b249d15af9dd..97205aafab46 100644 --- a/mm/page_counter.c +++ b/mm/page_counter.c @@ -18,8 +18,10 @@ static bool track_protection(struct page_counter *c) return c->protection_support; } +extern unsigned long get_cgroup_local_usage(struct mem_cgroup *memcg, bool flush); + static void propagate_protected_usage(struct page_counter *c, - unsigned long usage) + unsigned long usage, unsigned long local_usage) { unsigned long protected, old_protected; long delta; @@ -44,6 +46,15 @@ static void propagate_protected_usage(struct page_counter *c, if (delta) atomic_long_add(delta, &c->parent->children_low_usage); } + + protected = min(local_usage, READ_ONCE(c->locallow)); + old_protected = atomic_long_read(&c->locallow_usage); + if (protected != old_protected) { + old_protected = atomic_long_xchg(&c->locallow_usage, protected); + delta = protected - old_protected; + if (delta) + atomic_long_add(delta, &c->parent->children_locallow_usage); + } } /** @@ -63,7 +74,8 @@ void page_counter_cancel(struct page_counter *counter, unsigned long nr_pages) atomic_long_set(&counter->usage, new); } if (track_protection(counter)) - propagate_protected_usage(counter, new); + propagate_protected_usage(counter, new, + get_cgroup_local_usage(counter->memcg, false)); } /** @@ -83,7 +95,8 @@ void page_counter_charge(struct page_counter *counter, unsigned long nr_pages) new = atomic_long_add_return(nr_pages, &c->usage); if (protection) - propagate_protected_usage(c, new); + propagate_protected_usage(c, new, + get_cgroup_local_usage(counter->memcg, false)); /* * This is indeed racy, but we can live with some * inaccuracy in the watermark. @@ -151,7 +164,8 @@ bool page_counter_try_charge(struct page_counter *counter, goto failed; } if (protection) - propagate_protected_usage(c, new); + propagate_protected_usage(c, new, + get_cgroup_local_usage(counter->memcg, false)); /* see comment on page_counter_charge */ if (new > READ_ONCE(c->local_watermark)) { @@ -238,7 +252,8 @@ void page_counter_set_min(struct page_counter *counter, unsigned long nr_pages) WRITE_ONCE(counter->min, nr_pages); for (c = counter; c; c = c->parent) - propagate_protected_usage(c, atomic_long_read(&c->usage)); + propagate_protected_usage(c, atomic_long_read(&c->usage), + get_cgroup_local_usage(counter->memcg, false)); } /** @@ -248,14 +263,17 @@ void page_counter_set_min(struct page_counter *counter, unsigned long nr_pages) * * The caller must serialize invocations on the same counter. */ -void page_counter_set_low(struct page_counter *counter, unsigned long nr_pages) +void page_counter_set_low(struct page_counter *counter, unsigned long nr_pages, + unsigned long nr_pages_local) { struct page_counter *c; WRITE_ONCE(counter->low, nr_pages); + WRITE_ONCE(counter->locallow, nr_pages_local); for (c = counter; c; c = c->parent) - propagate_protected_usage(c, atomic_long_read(&c->usage)); + propagate_protected_usage(c, atomic_long_read(&c->usage), + get_cgroup_local_usage(counter->memcg, false)); } /** @@ -421,9 +439,9 @@ static unsigned long effective_protection(unsigned long usage, */ void page_counter_calculate_protection(struct page_counter *root, struct page_counter *counter, - bool recursive_protection) + bool recursive_protection, int is_local) { - unsigned long usage, parent_usage; + unsigned long usage, parent_usage, local_usage, parent_local_usage; struct page_counter *parent = counter->parent; /* @@ -437,16 +455,19 @@ void page_counter_calculate_protection(struct page_counter *root, return; usage = page_counter_read(counter); - if (!usage) + local_usage = get_cgroup_local_usage(counter->memcg, true); + if (!usage || !local_usage) return; if (parent == root) { counter->emin = READ_ONCE(counter->min); counter->elow = READ_ONCE(counter->low); + counter->elocallow = READ_ONCE(counter->locallow); return; } parent_usage = page_counter_read(parent); + parent_local_usage = get_cgroup_local_usage(parent->memcg, true); WRITE_ONCE(counter->emin, effective_protection(usage, parent_usage, READ_ONCE(counter->min), @@ -454,7 +475,16 @@ void page_counter_calculate_protection(struct page_counter *root, atomic_long_read(&parent->children_min_usage), recursive_protection)); - WRITE_ONCE(counter->elow, effective_protection(usage, parent_usage, + if (is_local) + WRITE_ONCE(counter->elocallow, + effective_protection(local_usage, parent_local_usage, + READ_ONCE(counter->locallow), + READ_ONCE(parent->elocallow), + atomic_long_read(&parent->children_locallow_usage), + recursive_protection)); + else + WRITE_ONCE(counter->elow, + effective_protection(usage, parent_usage, READ_ONCE(counter->low), READ_ONCE(parent->elow), atomic_long_read(&parent->children_low_usage), -- 2.43.0