From: Zi Yan <ziy@xxxxxxxxxx> It prepares for the following patches to enable memcg-based NUMA node page migration. We are going to limit memory usage in each node on a per-memcg basis. Signed-off-by: Zi Yan <ziy@xxxxxxxxxx> --- include/linux/cgroup-defs.h | 1 + include/linux/memcontrol.h | 67 +++++++++++++++++++++++++++++++++++++ mm/memcontrol.c | 80 +++++++++++++++++++++++++++++++++++++++++++++ 3 files changed, 148 insertions(+) diff --git a/include/linux/cgroup-defs.h b/include/linux/cgroup-defs.h index 1c70803..7e87f5e 100644 --- a/include/linux/cgroup-defs.h +++ b/include/linux/cgroup-defs.h @@ -531,6 +531,7 @@ struct cftype { struct cgroup_subsys *ss; /* NULL for cgroup core files */ struct list_head node; /* anchored at ss->cfts */ struct kernfs_ops *kf_ops; + int numa_node_id; int (*open)(struct kernfs_open_file *of); void (*release)(struct kernfs_open_file *of); diff --git a/include/linux/memcontrol.h b/include/linux/memcontrol.h index 1f3d880..3e40321 100644 --- a/include/linux/memcontrol.h +++ b/include/linux/memcontrol.h @@ -130,6 +130,7 @@ struct mem_cgroup_per_node { atomic_long_t lruvec_stat[NR_VM_NODE_STAT_ITEMS]; unsigned long lru_zone_size[MAX_NR_ZONES][NR_LRU_LISTS]; + unsigned long max_nr_base_pages; struct mem_cgroup_reclaim_iter iter[DEF_PRIORITY + 1]; @@ -797,6 +798,51 @@ static inline void memcg_memory_event_mm(struct mm_struct *mm, void mem_cgroup_split_huge_fixup(struct page *head); #endif +static inline unsigned long lruvec_size_memcg_node(enum lru_list lru, + struct mem_cgroup *memcg, int nid) +{ + if (nid == MAX_NUMNODES) + return 0; + + VM_BUG_ON(lru < 0 || lru >= NR_LRU_LISTS); + return mem_cgroup_node_nr_lru_pages(memcg, nid, BIT(lru)); +} + +static inline unsigned long active_inactive_size_memcg_node(struct mem_cgroup *memcg, int nid, bool active) +{ + unsigned long val = 0; + enum lru_list lru; + + for_each_evictable_lru(lru) { + if ((active && is_active_lru(lru)) || + (!active && !is_active_lru(lru))) + val += mem_cgroup_node_nr_lru_pages(memcg, nid, BIT(lru)); + } + + return val; +} + +static inline unsigned long memcg_size_node(struct mem_cgroup *memcg, int nid) +{ + unsigned long val = 0; + int i; + + if (nid == MAX_NUMNODES) + return val; + + for (i = 0; i < NR_LRU_LISTS; i++) + val += mem_cgroup_node_nr_lru_pages(memcg, nid, BIT(i)); + + return val; +} + +static inline unsigned long memcg_max_size_node(struct mem_cgroup *memcg, int nid) +{ + if (nid == MAX_NUMNODES) + return 0; + return memcg->nodeinfo[nid]->max_nr_base_pages; +} + #else /* CONFIG_MEMCG */ #define MEM_CGROUP_ID_SHIFT 0 @@ -1123,6 +1169,27 @@ static inline void count_memcg_event_mm(struct mm_struct *mm, enum vm_event_item idx) { } + +static inline unsigned long lruvec_size_memcg_node(enum lru_list lru, + struct mem_cgroup *memcg, int nid) +{ + return 0; +} + +static inline unsigned long active_inactive_size_memcg_node(struct mem_cgroup *memcg, int nid, bool active) +{ + return 0; +} + +static inline unsigned long memcg_size_node(struct mem_cgroup *memcg, int nid) +{ + return 0; +} +static inline unsigned long memcg_max_size_node(struct mem_cgroup *memcg, int nid) +{ + return 0; +} + #endif /* CONFIG_MEMCG */ /* idx can be of type enum memcg_stat_item or node_stat_item */ diff --git a/mm/memcontrol.c b/mm/memcontrol.c index 532e0e2..478d216 100644 --- a/mm/memcontrol.c +++ b/mm/memcontrol.c @@ -4394,6 +4394,7 @@ static int alloc_mem_cgroup_per_node_info(struct mem_cgroup *memcg, int node) pn->usage_in_excess = 0; pn->on_tree = false; pn->memcg = memcg; + pn->max_nr_base_pages = PAGE_COUNTER_MAX; memcg->nodeinfo[node] = pn; return 0; @@ -6700,4 +6701,83 @@ static int __init mem_cgroup_swap_init(void) } subsys_initcall(mem_cgroup_swap_init); +static int memory_per_node_stat_show(struct seq_file *m, void *v) +{ + struct mem_cgroup *memcg = mem_cgroup_from_css(seq_css(m)); + struct cftype *cur_file = seq_cft(m); + int nid = cur_file->numa_node_id; + unsigned long val = 0; + int i; + + for (i = 0; i < NR_LRU_LISTS; i++) + val += mem_cgroup_node_nr_lru_pages(memcg, nid, BIT(i)); + + seq_printf(m, "%llu\n", (u64)val * PAGE_SIZE); + + return 0; +} + +static int memory_per_node_max_show(struct seq_file *m, void *v) +{ + struct mem_cgroup *memcg = mem_cgroup_from_css(seq_css(m)); + struct cftype *cur_file = seq_cft(m); + int nid = cur_file->numa_node_id; + unsigned long max = READ_ONCE(memcg->nodeinfo[nid]->max_nr_base_pages); + + if (max == PAGE_COUNTER_MAX) + seq_puts(m, "max\n"); + else + seq_printf(m, "%llu\n", (u64)max * PAGE_SIZE); + + return 0; +} + +static ssize_t memory_per_node_max_write(struct kernfs_open_file *of, + char *buf, size_t nbytes, loff_t off) +{ + struct mem_cgroup *memcg = mem_cgroup_from_css(of_css(of)); + struct cftype *cur_file = of_cft(of); + int nid = cur_file->numa_node_id; + unsigned long max; + int err; + + buf = strstrip(buf); + err = page_counter_memparse(buf, "max", &max); + if (err) + return err; + + xchg(&memcg->nodeinfo[nid]->max_nr_base_pages, max); + + return nbytes; +} + +static struct cftype memcg_per_node_stats_files[N_MEMORY]; +static struct cftype memcg_per_node_max_files[N_MEMORY]; + +static int __init mem_cgroup_per_node_init(void) +{ + int nid; + + for_each_node_state(nid, N_MEMORY) { + snprintf(memcg_per_node_stats_files[nid].name, MAX_CFTYPE_NAME, + "size_at_node:%d", nid); + memcg_per_node_stats_files[nid].flags = CFTYPE_NOT_ON_ROOT; + memcg_per_node_stats_files[nid].seq_show = memory_per_node_stat_show; + memcg_per_node_stats_files[nid].numa_node_id = nid; + + snprintf(memcg_per_node_max_files[nid].name, MAX_CFTYPE_NAME, + "max_at_node:%d", nid); + memcg_per_node_max_files[nid].flags = CFTYPE_NOT_ON_ROOT; + memcg_per_node_max_files[nid].seq_show = memory_per_node_max_show; + memcg_per_node_max_files[nid].write = memory_per_node_max_write; + memcg_per_node_max_files[nid].numa_node_id = nid; + } + WARN_ON(cgroup_add_dfl_cftypes(&memory_cgrp_subsys, + memcg_per_node_stats_files)); + WARN_ON(cgroup_add_dfl_cftypes(&memory_cgrp_subsys, + memcg_per_node_max_files)); + return 0; +} +subsys_initcall(mem_cgroup_per_node_init); + #endif /* CONFIG_MEMCG_SWAP */ -- 2.7.4