On Mon 24-06-24 17:59:02, Roman Gushchin wrote: > Move legacy cgroup v1 memory controller interfaces and corresponding > code into memcontrol-v1.c. > > Signed-off-by: Roman Gushchin <roman.gushchin@xxxxxxxxx> Acked-by: Michal Hocko <mhocko@xxxxxxxx> > --- > mm/memcontrol-v1.c | 739 ++++++++++++++++++++++++++++++++++++++++++++- > mm/memcontrol-v1.h | 29 +- > mm/memcontrol.c | 721 +------------------------------------------ > 3 files changed, 767 insertions(+), 722 deletions(-) > > diff --git a/mm/memcontrol-v1.c b/mm/memcontrol-v1.c > index 1d5608ee1606..1b7337d0170d 100644 > --- a/mm/memcontrol-v1.c > +++ b/mm/memcontrol-v1.c > @@ -10,6 +10,7 @@ > #include <linux/poll.h> > #include <linux/sort.h> > #include <linux/file.h> > +#include <linux/seq_buf.h> > > #include "internal.h" > #include "swap.h" > @@ -110,6 +111,18 @@ struct mem_cgroup_event { > struct work_struct remove; > }; > > +#define MEMFILE_PRIVATE(x, val) ((x) << 16 | (val)) > +#define MEMFILE_TYPE(val) ((val) >> 16 & 0xffff) > +#define MEMFILE_ATTR(val) ((val) & 0xffff) > + > +enum { > + RES_USAGE, > + RES_LIMIT, > + RES_MAX_USAGE, > + RES_FAILCNT, > + RES_SOFT_LIMIT, > +}; > + > #ifdef CONFIG_LOCKDEP > static struct lockdep_map memcg_oom_lock_dep_map = { > .name = "memcg_oom_lock", > @@ -577,14 +590,14 @@ static inline int mem_cgroup_move_swap_account(swp_entry_t entry, > } > #endif > > -u64 mem_cgroup_move_charge_read(struct cgroup_subsys_state *css, > +static u64 mem_cgroup_move_charge_read(struct cgroup_subsys_state *css, > struct cftype *cft) > { > return mem_cgroup_from_css(css)->move_charge_at_immigrate; > } > > #ifdef CONFIG_MMU > -int mem_cgroup_move_charge_write(struct cgroup_subsys_state *css, > +static int mem_cgroup_move_charge_write(struct cgroup_subsys_state *css, > struct cftype *cft, u64 val) > { > struct mem_cgroup *memcg = mem_cgroup_from_css(css); > @@ -606,7 +619,7 @@ int mem_cgroup_move_charge_write(struct cgroup_subsys_state *css, > return 0; > } > #else > -int mem_cgroup_move_charge_write(struct cgroup_subsys_state *css, > +static int mem_cgroup_move_charge_write(struct cgroup_subsys_state *css, > struct cftype *cft, u64 val) > { > return -ENOSYS; > @@ -1803,8 +1816,8 @@ static void memcg_event_ptable_queue_proc(struct file *file, > * Input must be in format '<event_fd> <control_fd> <args>'. > * Interpretation of args is defined by control file implementation. > */ > -ssize_t memcg_write_event_control(struct kernfs_open_file *of, > - char *buf, size_t nbytes, loff_t off) > +static ssize_t memcg_write_event_control(struct kernfs_open_file *of, > + char *buf, size_t nbytes, loff_t off) > { > struct cgroup_subsys_state *css = of_css(of); > struct mem_cgroup *memcg = mem_cgroup_from_css(css); > @@ -2184,6 +2197,722 @@ void memcg1_oom_finish(struct mem_cgroup *memcg, bool locked) > mem_cgroup_oom_unlock(memcg); > } > > +static DEFINE_MUTEX(memcg_max_mutex); > + > +static int mem_cgroup_resize_max(struct mem_cgroup *memcg, > + unsigned long max, bool memsw) > +{ > + bool enlarge = false; > + bool drained = false; > + int ret; > + bool limits_invariant; > + struct page_counter *counter = memsw ? &memcg->memsw : &memcg->memory; > + > + do { > + if (signal_pending(current)) { > + ret = -EINTR; > + break; > + } > + > + mutex_lock(&memcg_max_mutex); > + /* > + * Make sure that the new limit (memsw or memory limit) doesn't > + * break our basic invariant rule memory.max <= memsw.max. > + */ > + limits_invariant = memsw ? max >= READ_ONCE(memcg->memory.max) : > + max <= memcg->memsw.max; > + if (!limits_invariant) { > + mutex_unlock(&memcg_max_mutex); > + ret = -EINVAL; > + break; > + } > + if (max > counter->max) > + enlarge = true; > + ret = page_counter_set_max(counter, max); > + mutex_unlock(&memcg_max_mutex); > + > + if (!ret) > + break; > + > + if (!drained) { > + drain_all_stock(memcg); > + drained = true; > + continue; > + } > + > + if (!try_to_free_mem_cgroup_pages(memcg, 1, GFP_KERNEL, > + memsw ? 0 : MEMCG_RECLAIM_MAY_SWAP, NULL)) { > + ret = -EBUSY; > + break; > + } > + } while (true); > + > + if (!ret && enlarge) > + memcg1_oom_recover(memcg); > + > + return ret; > +} > + > +/* > + * Reclaims as many pages from the given memcg as possible. > + * > + * Caller is responsible for holding css reference for memcg. > + */ > +static int mem_cgroup_force_empty(struct mem_cgroup *memcg) > +{ > + int nr_retries = MAX_RECLAIM_RETRIES; > + > + /* we call try-to-free pages for make this cgroup empty */ > + lru_add_drain_all(); > + > + drain_all_stock(memcg); > + > + /* try to free all pages in this cgroup */ > + while (nr_retries && page_counter_read(&memcg->memory)) { > + if (signal_pending(current)) > + return -EINTR; > + > + if (!try_to_free_mem_cgroup_pages(memcg, 1, GFP_KERNEL, > + MEMCG_RECLAIM_MAY_SWAP, NULL)) > + nr_retries--; > + } > + > + return 0; > +} > + > +static ssize_t mem_cgroup_force_empty_write(struct kernfs_open_file *of, > + char *buf, size_t nbytes, > + loff_t off) > +{ > + struct mem_cgroup *memcg = mem_cgroup_from_css(of_css(of)); > + > + if (mem_cgroup_is_root(memcg)) > + return -EINVAL; > + return mem_cgroup_force_empty(memcg) ?: nbytes; > +} > + > +static u64 mem_cgroup_hierarchy_read(struct cgroup_subsys_state *css, > + struct cftype *cft) > +{ > + return 1; > +} > + > +static int mem_cgroup_hierarchy_write(struct cgroup_subsys_state *css, > + struct cftype *cft, u64 val) > +{ > + if (val == 1) > + return 0; > + > + pr_warn_once("Non-hierarchical mode is deprecated. " > + "Please report your usecase to linux-mm@xxxxxxxxx if you " > + "depend on this functionality.\n"); > + > + return -EINVAL; > +} > + > +static u64 mem_cgroup_read_u64(struct cgroup_subsys_state *css, > + struct cftype *cft) > +{ > + struct mem_cgroup *memcg = mem_cgroup_from_css(css); > + struct page_counter *counter; > + > + switch (MEMFILE_TYPE(cft->private)) { > + case _MEM: > + counter = &memcg->memory; > + break; > + case _MEMSWAP: > + counter = &memcg->memsw; > + break; > + case _KMEM: > + counter = &memcg->kmem; > + break; > + case _TCP: > + counter = &memcg->tcpmem; > + break; > + default: > + BUG(); > + } > + > + switch (MEMFILE_ATTR(cft->private)) { > + case RES_USAGE: > + if (counter == &memcg->memory) > + return (u64)mem_cgroup_usage(memcg, false) * PAGE_SIZE; > + if (counter == &memcg->memsw) > + return (u64)mem_cgroup_usage(memcg, true) * PAGE_SIZE; > + return (u64)page_counter_read(counter) * PAGE_SIZE; > + case RES_LIMIT: > + return (u64)counter->max * PAGE_SIZE; > + case RES_MAX_USAGE: > + return (u64)counter->watermark * PAGE_SIZE; > + case RES_FAILCNT: > + return counter->failcnt; > + case RES_SOFT_LIMIT: > + return (u64)READ_ONCE(memcg->soft_limit) * PAGE_SIZE; > + default: > + BUG(); > + } > +} > + > +/* > + * This function doesn't do anything useful. Its only job is to provide a read > + * handler for a file so that cgroup_file_mode() will add read permissions. > + */ > +static int mem_cgroup_dummy_seq_show(__always_unused struct seq_file *m, > + __always_unused void *v) > +{ > + return -EINVAL; > +} > + > +static int memcg_update_tcp_max(struct mem_cgroup *memcg, unsigned long max) > +{ > + int ret; > + > + mutex_lock(&memcg_max_mutex); > + > + ret = page_counter_set_max(&memcg->tcpmem, max); > + if (ret) > + goto out; > + > + if (!memcg->tcpmem_active) { > + /* > + * The active flag needs to be written after the static_key > + * update. This is what guarantees that the socket activation > + * function is the last one to run. See mem_cgroup_sk_alloc() > + * for details, and note that we don't mark any socket as > + * belonging to this memcg until that flag is up. > + * > + * We need to do this, because static_keys will span multiple > + * sites, but we can't control their order. If we mark a socket > + * as accounted, but the accounting functions are not patched in > + * yet, we'll lose accounting. > + * > + * We never race with the readers in mem_cgroup_sk_alloc(), > + * because when this value change, the code to process it is not > + * patched in yet. > + */ > + static_branch_inc(&memcg_sockets_enabled_key); > + memcg->tcpmem_active = true; > + } > +out: > + mutex_unlock(&memcg_max_mutex); > + return ret; > +} > + > +/* > + * The user of this function is... > + * RES_LIMIT. > + */ > +static ssize_t mem_cgroup_write(struct kernfs_open_file *of, > + char *buf, size_t nbytes, loff_t off) > +{ > + struct mem_cgroup *memcg = mem_cgroup_from_css(of_css(of)); > + unsigned long nr_pages; > + int ret; > + > + buf = strstrip(buf); > + ret = page_counter_memparse(buf, "-1", &nr_pages); > + if (ret) > + return ret; > + > + switch (MEMFILE_ATTR(of_cft(of)->private)) { > + case RES_LIMIT: > + if (mem_cgroup_is_root(memcg)) { /* Can't set limit on root */ > + ret = -EINVAL; > + break; > + } > + switch (MEMFILE_TYPE(of_cft(of)->private)) { > + case _MEM: > + ret = mem_cgroup_resize_max(memcg, nr_pages, false); > + break; > + case _MEMSWAP: > + ret = mem_cgroup_resize_max(memcg, nr_pages, true); > + break; > + case _KMEM: > + pr_warn_once("kmem.limit_in_bytes is deprecated and will be removed. " > + "Writing any value to this file has no effect. " > + "Please report your usecase to linux-mm@xxxxxxxxx if you " > + "depend on this functionality.\n"); > + ret = 0; > + break; > + case _TCP: > + ret = memcg_update_tcp_max(memcg, nr_pages); > + break; > + } > + break; > + case RES_SOFT_LIMIT: > + if (IS_ENABLED(CONFIG_PREEMPT_RT)) { > + ret = -EOPNOTSUPP; > + } else { > + WRITE_ONCE(memcg->soft_limit, nr_pages); > + ret = 0; > + } > + break; > + } > + return ret ?: nbytes; > +} > + > +static ssize_t mem_cgroup_reset(struct kernfs_open_file *of, char *buf, > + size_t nbytes, loff_t off) > +{ > + struct mem_cgroup *memcg = mem_cgroup_from_css(of_css(of)); > + struct page_counter *counter; > + > + switch (MEMFILE_TYPE(of_cft(of)->private)) { > + case _MEM: > + counter = &memcg->memory; > + break; > + case _MEMSWAP: > + counter = &memcg->memsw; > + break; > + case _KMEM: > + counter = &memcg->kmem; > + break; > + case _TCP: > + counter = &memcg->tcpmem; > + break; > + default: > + BUG(); > + } > + > + switch (MEMFILE_ATTR(of_cft(of)->private)) { > + case RES_MAX_USAGE: > + page_counter_reset_watermark(counter); > + break; > + case RES_FAILCNT: > + counter->failcnt = 0; > + break; > + default: > + BUG(); > + } > + > + return nbytes; > +} > + > +#ifdef CONFIG_NUMA > + > +#define LRU_ALL_FILE (BIT(LRU_INACTIVE_FILE) | BIT(LRU_ACTIVE_FILE)) > +#define LRU_ALL_ANON (BIT(LRU_INACTIVE_ANON) | BIT(LRU_ACTIVE_ANON)) > +#define LRU_ALL ((1 << NR_LRU_LISTS) - 1) > + > +/* static unsigned long mem_cgroup_node_nr_lru_pages(struct mem_cgroup *memcg, */ > +/* int nid, unsigned int lru_mask, bool tree) */ > +/* { */ > +/* struct lruvec *lruvec = mem_cgroup_lruvec(memcg, NODE_DATA(nid)); */ > +/* unsigned long nr = 0; */ > +/* enum lru_list lru; */ > + > +/* VM_BUG_ON((unsigned)nid >= nr_node_ids); */ > + > +/* for_each_lru(lru) { */ > +/* if (!(BIT(lru) & lru_mask)) */ > +/* continue; */ > +/* if (tree) */ > +/* nr += lruvec_page_state(lruvec, NR_LRU_BASE + lru); */ > +/* else */ > +/* nr += lruvec_page_state_local(lruvec, NR_LRU_BASE + lru); */ > +/* } */ > +/* return nr; */ > +/* } */ > + > +/* static unsigned long mem_cgroup_nr_lru_pages(struct mem_cgroup *memcg, */ > +/* unsigned int lru_mask, */ > +/* bool tree) */ > +/* { */ > +/* unsigned long nr = 0; */ > +/* enum lru_list lru; */ > + > +/* for_each_lru(lru) { */ > +/* if (!(BIT(lru) & lru_mask)) */ > +/* continue; */ > +/* if (tree) */ > +/* nr += memcg_page_state(memcg, NR_LRU_BASE + lru); */ > +/* else */ > +/* nr += memcg_page_state_local(memcg, NR_LRU_BASE + lru); */ > +/* } */ > +/* return nr; */ > +/* } */ > + > +static int memcg_numa_stat_show(struct seq_file *m, void *v) > +{ > + struct numa_stat { > + const char *name; > + unsigned int lru_mask; > + }; > + > + static const struct numa_stat stats[] = { > + { "total", LRU_ALL }, > + { "file", LRU_ALL_FILE }, > + { "anon", LRU_ALL_ANON }, > + { "unevictable", BIT(LRU_UNEVICTABLE) }, > + }; > + const struct numa_stat *stat; > + int nid; > + struct mem_cgroup *memcg = mem_cgroup_from_seq(m); > + > + mem_cgroup_flush_stats(memcg); > + > + for (stat = stats; stat < stats + ARRAY_SIZE(stats); stat++) { > + seq_printf(m, "%s=%lu", stat->name, > + mem_cgroup_nr_lru_pages(memcg, stat->lru_mask, > + false)); > + for_each_node_state(nid, N_MEMORY) > + seq_printf(m, " N%d=%lu", nid, > + mem_cgroup_node_nr_lru_pages(memcg, nid, > + stat->lru_mask, false)); > + seq_putc(m, '\n'); > + } > + > + for (stat = stats; stat < stats + ARRAY_SIZE(stats); stat++) { > + > + seq_printf(m, "hierarchical_%s=%lu", stat->name, > + mem_cgroup_nr_lru_pages(memcg, stat->lru_mask, > + true)); > + for_each_node_state(nid, N_MEMORY) > + seq_printf(m, " N%d=%lu", nid, > + mem_cgroup_node_nr_lru_pages(memcg, nid, > + stat->lru_mask, true)); > + seq_putc(m, '\n'); > + } > + > + return 0; > +} > +#endif /* CONFIG_NUMA */ > + > +static const unsigned int memcg1_stats[] = { > + NR_FILE_PAGES, > + NR_ANON_MAPPED, > +#ifdef CONFIG_TRANSPARENT_HUGEPAGE > + NR_ANON_THPS, > +#endif > + NR_SHMEM, > + NR_FILE_MAPPED, > + NR_FILE_DIRTY, > + NR_WRITEBACK, > + WORKINGSET_REFAULT_ANON, > + WORKINGSET_REFAULT_FILE, > +#ifdef CONFIG_SWAP > + MEMCG_SWAP, > + NR_SWAPCACHE, > +#endif > +}; > + > +static const char *const memcg1_stat_names[] = { > + "cache", > + "rss", > +#ifdef CONFIG_TRANSPARENT_HUGEPAGE > + "rss_huge", > +#endif > + "shmem", > + "mapped_file", > + "dirty", > + "writeback", > + "workingset_refault_anon", > + "workingset_refault_file", > +#ifdef CONFIG_SWAP > + "swap", > + "swapcached", > +#endif > +}; > + > +/* Universal VM events cgroup1 shows, original sort order */ > +static const unsigned int memcg1_events[] = { > + PGPGIN, > + PGPGOUT, > + PGFAULT, > + PGMAJFAULT, > +}; > + > +void memcg1_stat_format(struct mem_cgroup *memcg, struct seq_buf *s) > +{ > + unsigned long memory, memsw; > + struct mem_cgroup *mi; > + unsigned int i; > + > + BUILD_BUG_ON(ARRAY_SIZE(memcg1_stat_names) != ARRAY_SIZE(memcg1_stats)); > + > + mem_cgroup_flush_stats(memcg); > + > + for (i = 0; i < ARRAY_SIZE(memcg1_stats); i++) { > + unsigned long nr; > + > + nr = memcg_page_state_local_output(memcg, memcg1_stats[i]); > + seq_buf_printf(s, "%s %lu\n", memcg1_stat_names[i], nr); > + } > + > + for (i = 0; i < ARRAY_SIZE(memcg1_events); i++) > + seq_buf_printf(s, "%s %lu\n", vm_event_name(memcg1_events[i]), > + memcg_events_local(memcg, memcg1_events[i])); > + > + for (i = 0; i < NR_LRU_LISTS; i++) > + seq_buf_printf(s, "%s %lu\n", lru_list_name(i), > + memcg_page_state_local(memcg, NR_LRU_BASE + i) * > + PAGE_SIZE); > + > + /* Hierarchical information */ > + memory = memsw = PAGE_COUNTER_MAX; > + for (mi = memcg; mi; mi = parent_mem_cgroup(mi)) { > + memory = min(memory, READ_ONCE(mi->memory.max)); > + memsw = min(memsw, READ_ONCE(mi->memsw.max)); > + } > + seq_buf_printf(s, "hierarchical_memory_limit %llu\n", > + (u64)memory * PAGE_SIZE); > + seq_buf_printf(s, "hierarchical_memsw_limit %llu\n", > + (u64)memsw * PAGE_SIZE); > + > + for (i = 0; i < ARRAY_SIZE(memcg1_stats); i++) { > + unsigned long nr; > + > + nr = memcg_page_state_output(memcg, memcg1_stats[i]); > + seq_buf_printf(s, "total_%s %llu\n", memcg1_stat_names[i], > + (u64)nr); > + } > + > + for (i = 0; i < ARRAY_SIZE(memcg1_events); i++) > + seq_buf_printf(s, "total_%s %llu\n", > + vm_event_name(memcg1_events[i]), > + (u64)memcg_events(memcg, memcg1_events[i])); > + > + for (i = 0; i < NR_LRU_LISTS; i++) > + seq_buf_printf(s, "total_%s %llu\n", lru_list_name(i), > + (u64)memcg_page_state(memcg, NR_LRU_BASE + i) * > + PAGE_SIZE); > + > +#ifdef CONFIG_DEBUG_VM > + { > + pg_data_t *pgdat; > + struct mem_cgroup_per_node *mz; > + unsigned long anon_cost = 0; > + unsigned long file_cost = 0; > + > + for_each_online_pgdat(pgdat) { > + mz = memcg->nodeinfo[pgdat->node_id]; > + > + anon_cost += mz->lruvec.anon_cost; > + file_cost += mz->lruvec.file_cost; > + } > + seq_buf_printf(s, "anon_cost %lu\n", anon_cost); > + seq_buf_printf(s, "file_cost %lu\n", file_cost); > + } > +#endif > +} > + > +static u64 mem_cgroup_swappiness_read(struct cgroup_subsys_state *css, > + struct cftype *cft) > +{ > + struct mem_cgroup *memcg = mem_cgroup_from_css(css); > + > + return mem_cgroup_swappiness(memcg); > +} > + > +static int mem_cgroup_swappiness_write(struct cgroup_subsys_state *css, > + struct cftype *cft, u64 val) > +{ > + struct mem_cgroup *memcg = mem_cgroup_from_css(css); > + > + if (val > MAX_SWAPPINESS) > + return -EINVAL; > + > + if (!mem_cgroup_is_root(memcg)) > + WRITE_ONCE(memcg->swappiness, val); > + else > + WRITE_ONCE(vm_swappiness, val); > + > + return 0; > +} > + > +static int mem_cgroup_oom_control_read(struct seq_file *sf, void *v) > +{ > + struct mem_cgroup *memcg = mem_cgroup_from_seq(sf); > + > + seq_printf(sf, "oom_kill_disable %d\n", READ_ONCE(memcg->oom_kill_disable)); > + seq_printf(sf, "under_oom %d\n", (bool)memcg->under_oom); > + seq_printf(sf, "oom_kill %lu\n", > + atomic_long_read(&memcg->memory_events[MEMCG_OOM_KILL])); > + return 0; > +} > + > +static int mem_cgroup_oom_control_write(struct cgroup_subsys_state *css, > + struct cftype *cft, u64 val) > +{ > + struct mem_cgroup *memcg = mem_cgroup_from_css(css); > + > + /* cannot set to root cgroup and only 0 and 1 are allowed */ > + if (mem_cgroup_is_root(memcg) || !((val == 0) || (val == 1))) > + return -EINVAL; > + > + WRITE_ONCE(memcg->oom_kill_disable, val); > + if (!val) > + memcg1_oom_recover(memcg); > + > + return 0; > +} > + > +#if defined(CONFIG_MEMCG_KMEM) && defined(CONFIG_SLUB_DEBUG) > +static int mem_cgroup_slab_show(struct seq_file *m, void *p) > +{ > + /* > + * Deprecated. > + * Please, take a look at tools/cgroup/memcg_slabinfo.py . > + */ > + return 0; > +} > +#endif > + > +struct cftype mem_cgroup_legacy_files[] = { > + { > + .name = "usage_in_bytes", > + .private = MEMFILE_PRIVATE(_MEM, RES_USAGE), > + .read_u64 = mem_cgroup_read_u64, > + }, > + { > + .name = "max_usage_in_bytes", > + .private = MEMFILE_PRIVATE(_MEM, RES_MAX_USAGE), > + .write = mem_cgroup_reset, > + .read_u64 = mem_cgroup_read_u64, > + }, > + { > + .name = "limit_in_bytes", > + .private = MEMFILE_PRIVATE(_MEM, RES_LIMIT), > + .write = mem_cgroup_write, > + .read_u64 = mem_cgroup_read_u64, > + }, > + { > + .name = "soft_limit_in_bytes", > + .private = MEMFILE_PRIVATE(_MEM, RES_SOFT_LIMIT), > + .write = mem_cgroup_write, > + .read_u64 = mem_cgroup_read_u64, > + }, > + { > + .name = "failcnt", > + .private = MEMFILE_PRIVATE(_MEM, RES_FAILCNT), > + .write = mem_cgroup_reset, > + .read_u64 = mem_cgroup_read_u64, > + }, > + { > + .name = "stat", > + .seq_show = memory_stat_show, > + }, > + { > + .name = "force_empty", > + .write = mem_cgroup_force_empty_write, > + }, > + { > + .name = "use_hierarchy", > + .write_u64 = mem_cgroup_hierarchy_write, > + .read_u64 = mem_cgroup_hierarchy_read, > + }, > + { > + .name = "cgroup.event_control", /* XXX: for compat */ > + .write = memcg_write_event_control, > + .flags = CFTYPE_NO_PREFIX | CFTYPE_WORLD_WRITABLE, > + }, > + { > + .name = "swappiness", > + .read_u64 = mem_cgroup_swappiness_read, > + .write_u64 = mem_cgroup_swappiness_write, > + }, > + { > + .name = "move_charge_at_immigrate", > + .read_u64 = mem_cgroup_move_charge_read, > + .write_u64 = mem_cgroup_move_charge_write, > + }, > + { > + .name = "oom_control", > + .seq_show = mem_cgroup_oom_control_read, > + .write_u64 = mem_cgroup_oom_control_write, > + }, > + { > + .name = "pressure_level", > + .seq_show = mem_cgroup_dummy_seq_show, > + }, > +#ifdef CONFIG_NUMA > + { > + .name = "numa_stat", > + .seq_show = memcg_numa_stat_show, > + }, > +#endif > + { > + .name = "kmem.limit_in_bytes", > + .private = MEMFILE_PRIVATE(_KMEM, RES_LIMIT), > + .write = mem_cgroup_write, > + .read_u64 = mem_cgroup_read_u64, > + }, > + { > + .name = "kmem.usage_in_bytes", > + .private = MEMFILE_PRIVATE(_KMEM, RES_USAGE), > + .read_u64 = mem_cgroup_read_u64, > + }, > + { > + .name = "kmem.failcnt", > + .private = MEMFILE_PRIVATE(_KMEM, RES_FAILCNT), > + .write = mem_cgroup_reset, > + .read_u64 = mem_cgroup_read_u64, > + }, > + { > + .name = "kmem.max_usage_in_bytes", > + .private = MEMFILE_PRIVATE(_KMEM, RES_MAX_USAGE), > + .write = mem_cgroup_reset, > + .read_u64 = mem_cgroup_read_u64, > + }, > +#if defined(CONFIG_MEMCG_KMEM) && defined(CONFIG_SLUB_DEBUG) > + { > + .name = "kmem.slabinfo", > + .seq_show = mem_cgroup_slab_show, > + }, > +#endif > + { > + .name = "kmem.tcp.limit_in_bytes", > + .private = MEMFILE_PRIVATE(_TCP, RES_LIMIT), > + .write = mem_cgroup_write, > + .read_u64 = mem_cgroup_read_u64, > + }, > + { > + .name = "kmem.tcp.usage_in_bytes", > + .private = MEMFILE_PRIVATE(_TCP, RES_USAGE), > + .read_u64 = mem_cgroup_read_u64, > + }, > + { > + .name = "kmem.tcp.failcnt", > + .private = MEMFILE_PRIVATE(_TCP, RES_FAILCNT), > + .write = mem_cgroup_reset, > + .read_u64 = mem_cgroup_read_u64, > + }, > + { > + .name = "kmem.tcp.max_usage_in_bytes", > + .private = MEMFILE_PRIVATE(_TCP, RES_MAX_USAGE), > + .write = mem_cgroup_reset, > + .read_u64 = mem_cgroup_read_u64, > + }, > + { }, /* terminate */ > +}; > + > +struct cftype memsw_files[] = { > + { > + .name = "memsw.usage_in_bytes", > + .private = MEMFILE_PRIVATE(_MEMSWAP, RES_USAGE), > + .read_u64 = mem_cgroup_read_u64, > + }, > + { > + .name = "memsw.max_usage_in_bytes", > + .private = MEMFILE_PRIVATE(_MEMSWAP, RES_MAX_USAGE), > + .write = mem_cgroup_reset, > + .read_u64 = mem_cgroup_read_u64, > + }, > + { > + .name = "memsw.limit_in_bytes", > + .private = MEMFILE_PRIVATE(_MEMSWAP, RES_LIMIT), > + .write = mem_cgroup_write, > + .read_u64 = mem_cgroup_read_u64, > + }, > + { > + .name = "memsw.failcnt", > + .private = MEMFILE_PRIVATE(_MEMSWAP, RES_FAILCNT), > + .write = mem_cgroup_reset, > + .read_u64 = mem_cgroup_read_u64, > + }, > + { }, /* terminate */ > +}; > + > static int __init memcg1_init(void) > { > int node; > diff --git a/mm/memcontrol-v1.h b/mm/memcontrol-v1.h > index 972c493a8ae3..7be4670d9abb 100644 > --- a/mm/memcontrol-v1.h > +++ b/mm/memcontrol-v1.h > @@ -3,6 +3,8 @@ > #ifndef __MM_MEMCONTROL_V1_H > #define __MM_MEMCONTROL_V1_H > > +#include <linux/cgroup-defs.h> > + > void memcg1_update_tree(struct mem_cgroup *memcg, int nid); > void memcg1_remove_from_trees(struct mem_cgroup *memcg); > > @@ -34,12 +36,6 @@ int memcg1_can_attach(struct cgroup_taskset *tset); > void memcg1_cancel_attach(struct cgroup_taskset *tset); > void memcg1_move_task(void); > > -struct cftype; > -u64 mem_cgroup_move_charge_read(struct cgroup_subsys_state *css, > - struct cftype *cft); > -int mem_cgroup_move_charge_write(struct cgroup_subsys_state *css, > - struct cftype *cft, u64 val); > - > /* > * Per memcg event counter is incremented at every pagein/pageout. With THP, > * it will be incremented by the number of pages. This counter is used > @@ -86,11 +82,28 @@ enum res_type { > bool mem_cgroup_event_ratelimit(struct mem_cgroup *memcg, > enum mem_cgroup_events_target target); > unsigned long mem_cgroup_usage(struct mem_cgroup *memcg, bool swap); > -ssize_t memcg_write_event_control(struct kernfs_open_file *of, > - char *buf, size_t nbytes, loff_t off); > > bool memcg1_oom_prepare(struct mem_cgroup *memcg, bool *locked); > void memcg1_oom_finish(struct mem_cgroup *memcg, bool locked); > void memcg1_oom_recover(struct mem_cgroup *memcg); > > +void drain_all_stock(struct mem_cgroup *root_memcg); > +unsigned long mem_cgroup_nr_lru_pages(struct mem_cgroup *memcg, > + unsigned int lru_mask, bool tree); > +unsigned long mem_cgroup_node_nr_lru_pages(struct mem_cgroup *memcg, > + int nid, unsigned int lru_mask, > + bool tree); > + > +unsigned long memcg_events(struct mem_cgroup *memcg, int event); > +unsigned long memcg_events_local(struct mem_cgroup *memcg, int event); > +unsigned long memcg_page_state_local(struct mem_cgroup *memcg, int idx); > +unsigned long memcg_page_state_output(struct mem_cgroup *memcg, int item); > +unsigned long memcg_page_state_local_output(struct mem_cgroup *memcg, int item); > +int memory_stat_show(struct seq_file *m, void *v); > + > +void memcg1_stat_format(struct mem_cgroup *memcg, struct seq_buf *s); > + > +extern struct cftype memsw_files[]; > +extern struct cftype mem_cgroup_legacy_files[]; > + > #endif /* __MM_MEMCONTROL_V1_H */ > diff --git a/mm/memcontrol.c b/mm/memcontrol.c > index 37e0af5b26f3..c7341e811945 100644 > --- a/mm/memcontrol.c > +++ b/mm/memcontrol.c > @@ -96,10 +96,6 @@ static DECLARE_WAIT_QUEUE_HEAD(memcg_cgwb_frn_waitq); > #define THRESHOLDS_EVENTS_TARGET 128 > #define SOFTLIMIT_EVENTS_TARGET 1024 > > -#define MEMFILE_PRIVATE(x, val) ((x) << 16 | (val)) > -#define MEMFILE_TYPE(val) ((val) >> 16 & 0xffff) > -#define MEMFILE_ATTR(val) ((val) & 0xffff) > - > static inline bool task_is_dying(void) > { > return tsk_is_oom_victim(current) || fatal_signal_pending(current) || > @@ -676,7 +672,7 @@ void __mod_memcg_state(struct mem_cgroup *memcg, enum memcg_stat_item idx, > } > > /* idx can be of type enum memcg_stat_item or node_stat_item. */ > -static unsigned long memcg_page_state_local(struct mem_cgroup *memcg, int idx) > +unsigned long memcg_page_state_local(struct mem_cgroup *memcg, int idx) > { > long x; > int i = memcg_stats_index(idx); > @@ -825,7 +821,7 @@ void __count_memcg_events(struct mem_cgroup *memcg, enum vm_event_item idx, > memcg_stats_unlock(); > } > > -static unsigned long memcg_events(struct mem_cgroup *memcg, int event) > +unsigned long memcg_events(struct mem_cgroup *memcg, int event) > { > int i = memcg_events_index(event); > > @@ -835,7 +831,7 @@ static unsigned long memcg_events(struct mem_cgroup *memcg, int event) > return READ_ONCE(memcg->vmstats->events[i]); > } > > -static unsigned long memcg_events_local(struct mem_cgroup *memcg, int event) > +unsigned long memcg_events_local(struct mem_cgroup *memcg, int event) > { > int i = memcg_events_index(event); > > @@ -1420,15 +1416,13 @@ static int memcg_page_state_output_unit(int item) > } > } > > -static inline unsigned long memcg_page_state_output(struct mem_cgroup *memcg, > - int item) > +unsigned long memcg_page_state_output(struct mem_cgroup *memcg, int item) > { > return memcg_page_state(memcg, item) * > memcg_page_state_output_unit(item); > } > > -static inline unsigned long memcg_page_state_local_output( > - struct mem_cgroup *memcg, int item) > +unsigned long memcg_page_state_local_output(struct mem_cgroup *memcg, int item) > { > return memcg_page_state_local(memcg, item) * > memcg_page_state_output_unit(item); > @@ -1487,8 +1481,6 @@ static void memcg_stat_format(struct mem_cgroup *memcg, struct seq_buf *s) > WARN_ON_ONCE(seq_buf_has_overflowed(s)); > } > > -static void memcg1_stat_format(struct mem_cgroup *memcg, struct seq_buf *s); > - > static void memory_stat_format(struct mem_cgroup *memcg, struct seq_buf *s) > { > if (cgroup_subsys_on_dfl(memory_cgrp_subsys)) > @@ -1861,7 +1853,7 @@ static void refill_stock(struct mem_cgroup *memcg, unsigned int nr_pages) > * Drains all per-CPU charge caches for given root_memcg resp. subtree > * of the hierarchy under it. > */ > -static void drain_all_stock(struct mem_cgroup *root_memcg) > +void drain_all_stock(struct mem_cgroup *root_memcg) > { > int cpu, curcpu; > > @@ -3115,120 +3107,6 @@ void split_page_memcg(struct page *head, int old_order, int new_order) > css_get_many(&memcg->css, old_nr / new_nr - 1); > } > > - > -static DEFINE_MUTEX(memcg_max_mutex); > - > -static int mem_cgroup_resize_max(struct mem_cgroup *memcg, > - unsigned long max, bool memsw) > -{ > - bool enlarge = false; > - bool drained = false; > - int ret; > - bool limits_invariant; > - struct page_counter *counter = memsw ? &memcg->memsw : &memcg->memory; > - > - do { > - if (signal_pending(current)) { > - ret = -EINTR; > - break; > - } > - > - mutex_lock(&memcg_max_mutex); > - /* > - * Make sure that the new limit (memsw or memory limit) doesn't > - * break our basic invariant rule memory.max <= memsw.max. > - */ > - limits_invariant = memsw ? max >= READ_ONCE(memcg->memory.max) : > - max <= memcg->memsw.max; > - if (!limits_invariant) { > - mutex_unlock(&memcg_max_mutex); > - ret = -EINVAL; > - break; > - } > - if (max > counter->max) > - enlarge = true; > - ret = page_counter_set_max(counter, max); > - mutex_unlock(&memcg_max_mutex); > - > - if (!ret) > - break; > - > - if (!drained) { > - drain_all_stock(memcg); > - drained = true; > - continue; > - } > - > - if (!try_to_free_mem_cgroup_pages(memcg, 1, GFP_KERNEL, > - memsw ? 0 : MEMCG_RECLAIM_MAY_SWAP, NULL)) { > - ret = -EBUSY; > - break; > - } > - } while (true); > - > - if (!ret && enlarge) > - memcg1_oom_recover(memcg); > - > - return ret; > -} > - > -/* > - * Reclaims as many pages from the given memcg as possible. > - * > - * Caller is responsible for holding css reference for memcg. > - */ > -static int mem_cgroup_force_empty(struct mem_cgroup *memcg) > -{ > - int nr_retries = MAX_RECLAIM_RETRIES; > - > - /* we call try-to-free pages for make this cgroup empty */ > - lru_add_drain_all(); > - > - drain_all_stock(memcg); > - > - /* try to free all pages in this cgroup */ > - while (nr_retries && page_counter_read(&memcg->memory)) { > - if (signal_pending(current)) > - return -EINTR; > - > - if (!try_to_free_mem_cgroup_pages(memcg, 1, GFP_KERNEL, > - MEMCG_RECLAIM_MAY_SWAP, NULL)) > - nr_retries--; > - } > - > - return 0; > -} > - > -static ssize_t mem_cgroup_force_empty_write(struct kernfs_open_file *of, > - char *buf, size_t nbytes, > - loff_t off) > -{ > - struct mem_cgroup *memcg = mem_cgroup_from_css(of_css(of)); > - > - if (mem_cgroup_is_root(memcg)) > - return -EINVAL; > - return mem_cgroup_force_empty(memcg) ?: nbytes; > -} > - > -static u64 mem_cgroup_hierarchy_read(struct cgroup_subsys_state *css, > - struct cftype *cft) > -{ > - return 1; > -} > - > -static int mem_cgroup_hierarchy_write(struct cgroup_subsys_state *css, > - struct cftype *cft, u64 val) > -{ > - if (val == 1) > - return 0; > - > - pr_warn_once("Non-hierarchical mode is deprecated. " > - "Please report your usecase to linux-mm@xxxxxxxxx if you " > - "depend on this functionality.\n"); > - > - return -EINVAL; > -} > - > unsigned long mem_cgroup_usage(struct mem_cgroup *memcg, bool swap) > { > unsigned long val; > @@ -3251,67 +3129,6 @@ unsigned long mem_cgroup_usage(struct mem_cgroup *memcg, bool swap) > return val; > } > > -enum { > - RES_USAGE, > - RES_LIMIT, > - RES_MAX_USAGE, > - RES_FAILCNT, > - RES_SOFT_LIMIT, > -}; > - > -static u64 mem_cgroup_read_u64(struct cgroup_subsys_state *css, > - struct cftype *cft) > -{ > - struct mem_cgroup *memcg = mem_cgroup_from_css(css); > - struct page_counter *counter; > - > - switch (MEMFILE_TYPE(cft->private)) { > - case _MEM: > - counter = &memcg->memory; > - break; > - case _MEMSWAP: > - counter = &memcg->memsw; > - break; > - case _KMEM: > - counter = &memcg->kmem; > - break; > - case _TCP: > - counter = &memcg->tcpmem; > - break; > - default: > - BUG(); > - } > - > - switch (MEMFILE_ATTR(cft->private)) { > - case RES_USAGE: > - if (counter == &memcg->memory) > - return (u64)mem_cgroup_usage(memcg, false) * PAGE_SIZE; > - if (counter == &memcg->memsw) > - return (u64)mem_cgroup_usage(memcg, true) * PAGE_SIZE; > - return (u64)page_counter_read(counter) * PAGE_SIZE; > - case RES_LIMIT: > - return (u64)counter->max * PAGE_SIZE; > - case RES_MAX_USAGE: > - return (u64)counter->watermark * PAGE_SIZE; > - case RES_FAILCNT: > - return counter->failcnt; > - case RES_SOFT_LIMIT: > - return (u64)READ_ONCE(memcg->soft_limit) * PAGE_SIZE; > - default: > - BUG(); > - } > -} > - > -/* > - * This function doesn't do anything useful. Its only job is to provide a read > - * handler for a file so that cgroup_file_mode() will add read permissions. > - */ > -static int mem_cgroup_dummy_seq_show(__always_unused struct seq_file *m, > - __always_unused void *v) > -{ > - return -EINVAL; > -} > - > #ifdef CONFIG_MEMCG_KMEM > static int memcg_online_kmem(struct mem_cgroup *memcg) > { > @@ -3373,139 +3190,9 @@ static void memcg_offline_kmem(struct mem_cgroup *memcg) > } > #endif /* CONFIG_MEMCG_KMEM */ > > -static int memcg_update_tcp_max(struct mem_cgroup *memcg, unsigned long max) > -{ > - int ret; > - > - mutex_lock(&memcg_max_mutex); > - > - ret = page_counter_set_max(&memcg->tcpmem, max); > - if (ret) > - goto out; > - > - if (!memcg->tcpmem_active) { > - /* > - * The active flag needs to be written after the static_key > - * update. This is what guarantees that the socket activation > - * function is the last one to run. See mem_cgroup_sk_alloc() > - * for details, and note that we don't mark any socket as > - * belonging to this memcg until that flag is up. > - * > - * We need to do this, because static_keys will span multiple > - * sites, but we can't control their order. If we mark a socket > - * as accounted, but the accounting functions are not patched in > - * yet, we'll lose accounting. > - * > - * We never race with the readers in mem_cgroup_sk_alloc(), > - * because when this value change, the code to process it is not > - * patched in yet. > - */ > - static_branch_inc(&memcg_sockets_enabled_key); > - memcg->tcpmem_active = true; > - } > -out: > - mutex_unlock(&memcg_max_mutex); > - return ret; > -} > - > -/* > - * The user of this function is... > - * RES_LIMIT. > - */ > -static ssize_t mem_cgroup_write(struct kernfs_open_file *of, > - char *buf, size_t nbytes, loff_t off) > -{ > - struct mem_cgroup *memcg = mem_cgroup_from_css(of_css(of)); > - unsigned long nr_pages; > - int ret; > - > - buf = strstrip(buf); > - ret = page_counter_memparse(buf, "-1", &nr_pages); > - if (ret) > - return ret; > - > - switch (MEMFILE_ATTR(of_cft(of)->private)) { > - case RES_LIMIT: > - if (mem_cgroup_is_root(memcg)) { /* Can't set limit on root */ > - ret = -EINVAL; > - break; > - } > - switch (MEMFILE_TYPE(of_cft(of)->private)) { > - case _MEM: > - ret = mem_cgroup_resize_max(memcg, nr_pages, false); > - break; > - case _MEMSWAP: > - ret = mem_cgroup_resize_max(memcg, nr_pages, true); > - break; > - case _KMEM: > - pr_warn_once("kmem.limit_in_bytes is deprecated and will be removed. " > - "Writing any value to this file has no effect. " > - "Please report your usecase to linux-mm@xxxxxxxxx if you " > - "depend on this functionality.\n"); > - ret = 0; > - break; > - case _TCP: > - ret = memcg_update_tcp_max(memcg, nr_pages); > - break; > - } > - break; > - case RES_SOFT_LIMIT: > - if (IS_ENABLED(CONFIG_PREEMPT_RT)) { > - ret = -EOPNOTSUPP; > - } else { > - WRITE_ONCE(memcg->soft_limit, nr_pages); > - ret = 0; > - } > - break; > - } > - return ret ?: nbytes; > -} > - > -static ssize_t mem_cgroup_reset(struct kernfs_open_file *of, char *buf, > - size_t nbytes, loff_t off) > -{ > - struct mem_cgroup *memcg = mem_cgroup_from_css(of_css(of)); > - struct page_counter *counter; > - > - switch (MEMFILE_TYPE(of_cft(of)->private)) { > - case _MEM: > - counter = &memcg->memory; > - break; > - case _MEMSWAP: > - counter = &memcg->memsw; > - break; > - case _KMEM: > - counter = &memcg->kmem; > - break; > - case _TCP: > - counter = &memcg->tcpmem; > - break; > - default: > - BUG(); > - } > - > - switch (MEMFILE_ATTR(of_cft(of)->private)) { > - case RES_MAX_USAGE: > - page_counter_reset_watermark(counter); > - break; > - case RES_FAILCNT: > - counter->failcnt = 0; > - break; > - default: > - BUG(); > - } > - > - return nbytes; > -} > - > -#ifdef CONFIG_NUMA > - > -#define LRU_ALL_FILE (BIT(LRU_INACTIVE_FILE) | BIT(LRU_ACTIVE_FILE)) > -#define LRU_ALL_ANON (BIT(LRU_INACTIVE_ANON) | BIT(LRU_ACTIVE_ANON)) > -#define LRU_ALL ((1 << NR_LRU_LISTS) - 1) > - > -static unsigned long mem_cgroup_node_nr_lru_pages(struct mem_cgroup *memcg, > - int nid, unsigned int lru_mask, bool tree) > +unsigned long mem_cgroup_node_nr_lru_pages(struct mem_cgroup *memcg, > + int nid, unsigned int lru_mask, > + bool tree) > { > struct lruvec *lruvec = mem_cgroup_lruvec(memcg, NODE_DATA(nid)); > unsigned long nr = 0; > @@ -3524,9 +3211,8 @@ static unsigned long mem_cgroup_node_nr_lru_pages(struct mem_cgroup *memcg, > return nr; > } > > -static unsigned long mem_cgroup_nr_lru_pages(struct mem_cgroup *memcg, > - unsigned int lru_mask, > - bool tree) > +unsigned long mem_cgroup_nr_lru_pages(struct mem_cgroup *memcg, > + unsigned int lru_mask, bool tree) > { > unsigned long nr = 0; > enum lru_list lru; > @@ -3542,221 +3228,6 @@ static unsigned long mem_cgroup_nr_lru_pages(struct mem_cgroup *memcg, > return nr; > } > > -static int memcg_numa_stat_show(struct seq_file *m, void *v) > -{ > - struct numa_stat { > - const char *name; > - unsigned int lru_mask; > - }; > - > - static const struct numa_stat stats[] = { > - { "total", LRU_ALL }, > - { "file", LRU_ALL_FILE }, > - { "anon", LRU_ALL_ANON }, > - { "unevictable", BIT(LRU_UNEVICTABLE) }, > - }; > - const struct numa_stat *stat; > - int nid; > - struct mem_cgroup *memcg = mem_cgroup_from_seq(m); > - > - mem_cgroup_flush_stats(memcg); > - > - for (stat = stats; stat < stats + ARRAY_SIZE(stats); stat++) { > - seq_printf(m, "%s=%lu", stat->name, > - mem_cgroup_nr_lru_pages(memcg, stat->lru_mask, > - false)); > - for_each_node_state(nid, N_MEMORY) > - seq_printf(m, " N%d=%lu", nid, > - mem_cgroup_node_nr_lru_pages(memcg, nid, > - stat->lru_mask, false)); > - seq_putc(m, '\n'); > - } > - > - for (stat = stats; stat < stats + ARRAY_SIZE(stats); stat++) { > - > - seq_printf(m, "hierarchical_%s=%lu", stat->name, > - mem_cgroup_nr_lru_pages(memcg, stat->lru_mask, > - true)); > - for_each_node_state(nid, N_MEMORY) > - seq_printf(m, " N%d=%lu", nid, > - mem_cgroup_node_nr_lru_pages(memcg, nid, > - stat->lru_mask, true)); > - seq_putc(m, '\n'); > - } > - > - return 0; > -} > -#endif /* CONFIG_NUMA */ > - > -static const unsigned int memcg1_stats[] = { > - NR_FILE_PAGES, > - NR_ANON_MAPPED, > -#ifdef CONFIG_TRANSPARENT_HUGEPAGE > - NR_ANON_THPS, > -#endif > - NR_SHMEM, > - NR_FILE_MAPPED, > - NR_FILE_DIRTY, > - NR_WRITEBACK, > - WORKINGSET_REFAULT_ANON, > - WORKINGSET_REFAULT_FILE, > -#ifdef CONFIG_SWAP > - MEMCG_SWAP, > - NR_SWAPCACHE, > -#endif > -}; > - > -static const char *const memcg1_stat_names[] = { > - "cache", > - "rss", > -#ifdef CONFIG_TRANSPARENT_HUGEPAGE > - "rss_huge", > -#endif > - "shmem", > - "mapped_file", > - "dirty", > - "writeback", > - "workingset_refault_anon", > - "workingset_refault_file", > -#ifdef CONFIG_SWAP > - "swap", > - "swapcached", > -#endif > -}; > - > -/* Universal VM events cgroup1 shows, original sort order */ > -static const unsigned int memcg1_events[] = { > - PGPGIN, > - PGPGOUT, > - PGFAULT, > - PGMAJFAULT, > -}; > - > -static void memcg1_stat_format(struct mem_cgroup *memcg, struct seq_buf *s) > -{ > - unsigned long memory, memsw; > - struct mem_cgroup *mi; > - unsigned int i; > - > - BUILD_BUG_ON(ARRAY_SIZE(memcg1_stat_names) != ARRAY_SIZE(memcg1_stats)); > - > - mem_cgroup_flush_stats(memcg); > - > - for (i = 0; i < ARRAY_SIZE(memcg1_stats); i++) { > - unsigned long nr; > - > - nr = memcg_page_state_local_output(memcg, memcg1_stats[i]); > - seq_buf_printf(s, "%s %lu\n", memcg1_stat_names[i], nr); > - } > - > - for (i = 0; i < ARRAY_SIZE(memcg1_events); i++) > - seq_buf_printf(s, "%s %lu\n", vm_event_name(memcg1_events[i]), > - memcg_events_local(memcg, memcg1_events[i])); > - > - for (i = 0; i < NR_LRU_LISTS; i++) > - seq_buf_printf(s, "%s %lu\n", lru_list_name(i), > - memcg_page_state_local(memcg, NR_LRU_BASE + i) * > - PAGE_SIZE); > - > - /* Hierarchical information */ > - memory = memsw = PAGE_COUNTER_MAX; > - for (mi = memcg; mi; mi = parent_mem_cgroup(mi)) { > - memory = min(memory, READ_ONCE(mi->memory.max)); > - memsw = min(memsw, READ_ONCE(mi->memsw.max)); > - } > - seq_buf_printf(s, "hierarchical_memory_limit %llu\n", > - (u64)memory * PAGE_SIZE); > - seq_buf_printf(s, "hierarchical_memsw_limit %llu\n", > - (u64)memsw * PAGE_SIZE); > - > - for (i = 0; i < ARRAY_SIZE(memcg1_stats); i++) { > - unsigned long nr; > - > - nr = memcg_page_state_output(memcg, memcg1_stats[i]); > - seq_buf_printf(s, "total_%s %llu\n", memcg1_stat_names[i], > - (u64)nr); > - } > - > - for (i = 0; i < ARRAY_SIZE(memcg1_events); i++) > - seq_buf_printf(s, "total_%s %llu\n", > - vm_event_name(memcg1_events[i]), > - (u64)memcg_events(memcg, memcg1_events[i])); > - > - for (i = 0; i < NR_LRU_LISTS; i++) > - seq_buf_printf(s, "total_%s %llu\n", lru_list_name(i), > - (u64)memcg_page_state(memcg, NR_LRU_BASE + i) * > - PAGE_SIZE); > - > -#ifdef CONFIG_DEBUG_VM > - { > - pg_data_t *pgdat; > - struct mem_cgroup_per_node *mz; > - unsigned long anon_cost = 0; > - unsigned long file_cost = 0; > - > - for_each_online_pgdat(pgdat) { > - mz = memcg->nodeinfo[pgdat->node_id]; > - > - anon_cost += mz->lruvec.anon_cost; > - file_cost += mz->lruvec.file_cost; > - } > - seq_buf_printf(s, "anon_cost %lu\n", anon_cost); > - seq_buf_printf(s, "file_cost %lu\n", file_cost); > - } > -#endif > -} > - > -static u64 mem_cgroup_swappiness_read(struct cgroup_subsys_state *css, > - struct cftype *cft) > -{ > - struct mem_cgroup *memcg = mem_cgroup_from_css(css); > - > - return mem_cgroup_swappiness(memcg); > -} > - > -static int mem_cgroup_swappiness_write(struct cgroup_subsys_state *css, > - struct cftype *cft, u64 val) > -{ > - struct mem_cgroup *memcg = mem_cgroup_from_css(css); > - > - if (val > MAX_SWAPPINESS) > - return -EINVAL; > - > - if (!mem_cgroup_is_root(memcg)) > - WRITE_ONCE(memcg->swappiness, val); > - else > - WRITE_ONCE(vm_swappiness, val); > - > - return 0; > -} > - > -static int mem_cgroup_oom_control_read(struct seq_file *sf, void *v) > -{ > - struct mem_cgroup *memcg = mem_cgroup_from_seq(sf); > - > - seq_printf(sf, "oom_kill_disable %d\n", READ_ONCE(memcg->oom_kill_disable)); > - seq_printf(sf, "under_oom %d\n", (bool)memcg->under_oom); > - seq_printf(sf, "oom_kill %lu\n", > - atomic_long_read(&memcg->memory_events[MEMCG_OOM_KILL])); > - return 0; > -} > - > -static int mem_cgroup_oom_control_write(struct cgroup_subsys_state *css, > - struct cftype *cft, u64 val) > -{ > - struct mem_cgroup *memcg = mem_cgroup_from_css(css); > - > - /* cannot set to root cgroup and only 0 and 1 are allowed */ > - if (mem_cgroup_is_root(memcg) || !((val == 0) || (val == 1))) > - return -EINVAL; > - > - WRITE_ONCE(memcg->oom_kill_disable, val); > - if (!val) > - memcg1_oom_recover(memcg); > - > - return 0; > -} > - > #ifdef CONFIG_CGROUP_WRITEBACK > > #include <trace/events/writeback.h> > @@ -3970,147 +3441,6 @@ static void memcg_wb_domain_size_changed(struct mem_cgroup *memcg) > > #endif /* CONFIG_CGROUP_WRITEBACK */ > > -#if defined(CONFIG_MEMCG_KMEM) && defined(CONFIG_SLUB_DEBUG) > -static int mem_cgroup_slab_show(struct seq_file *m, void *p) > -{ > - /* > - * Deprecated. > - * Please, take a look at tools/cgroup/memcg_slabinfo.py . > - */ > - return 0; > -} > -#endif > - > -static int memory_stat_show(struct seq_file *m, void *v); > - > -static struct cftype mem_cgroup_legacy_files[] = { > - { > - .name = "usage_in_bytes", > - .private = MEMFILE_PRIVATE(_MEM, RES_USAGE), > - .read_u64 = mem_cgroup_read_u64, > - }, > - { > - .name = "max_usage_in_bytes", > - .private = MEMFILE_PRIVATE(_MEM, RES_MAX_USAGE), > - .write = mem_cgroup_reset, > - .read_u64 = mem_cgroup_read_u64, > - }, > - { > - .name = "limit_in_bytes", > - .private = MEMFILE_PRIVATE(_MEM, RES_LIMIT), > - .write = mem_cgroup_write, > - .read_u64 = mem_cgroup_read_u64, > - }, > - { > - .name = "soft_limit_in_bytes", > - .private = MEMFILE_PRIVATE(_MEM, RES_SOFT_LIMIT), > - .write = mem_cgroup_write, > - .read_u64 = mem_cgroup_read_u64, > - }, > - { > - .name = "failcnt", > - .private = MEMFILE_PRIVATE(_MEM, RES_FAILCNT), > - .write = mem_cgroup_reset, > - .read_u64 = mem_cgroup_read_u64, > - }, > - { > - .name = "stat", > - .seq_show = memory_stat_show, > - }, > - { > - .name = "force_empty", > - .write = mem_cgroup_force_empty_write, > - }, > - { > - .name = "use_hierarchy", > - .write_u64 = mem_cgroup_hierarchy_write, > - .read_u64 = mem_cgroup_hierarchy_read, > - }, > - { > - .name = "cgroup.event_control", /* XXX: for compat */ > - .write = memcg_write_event_control, > - .flags = CFTYPE_NO_PREFIX | CFTYPE_WORLD_WRITABLE, > - }, > - { > - .name = "swappiness", > - .read_u64 = mem_cgroup_swappiness_read, > - .write_u64 = mem_cgroup_swappiness_write, > - }, > - { > - .name = "move_charge_at_immigrate", > - .read_u64 = mem_cgroup_move_charge_read, > - .write_u64 = mem_cgroup_move_charge_write, > - }, > - { > - .name = "oom_control", > - .seq_show = mem_cgroup_oom_control_read, > - .write_u64 = mem_cgroup_oom_control_write, > - }, > - { > - .name = "pressure_level", > - .seq_show = mem_cgroup_dummy_seq_show, > - }, > -#ifdef CONFIG_NUMA > - { > - .name = "numa_stat", > - .seq_show = memcg_numa_stat_show, > - }, > -#endif > - { > - .name = "kmem.limit_in_bytes", > - .private = MEMFILE_PRIVATE(_KMEM, RES_LIMIT), > - .write = mem_cgroup_write, > - .read_u64 = mem_cgroup_read_u64, > - }, > - { > - .name = "kmem.usage_in_bytes", > - .private = MEMFILE_PRIVATE(_KMEM, RES_USAGE), > - .read_u64 = mem_cgroup_read_u64, > - }, > - { > - .name = "kmem.failcnt", > - .private = MEMFILE_PRIVATE(_KMEM, RES_FAILCNT), > - .write = mem_cgroup_reset, > - .read_u64 = mem_cgroup_read_u64, > - }, > - { > - .name = "kmem.max_usage_in_bytes", > - .private = MEMFILE_PRIVATE(_KMEM, RES_MAX_USAGE), > - .write = mem_cgroup_reset, > - .read_u64 = mem_cgroup_read_u64, > - }, > -#if defined(CONFIG_MEMCG_KMEM) && defined(CONFIG_SLUB_DEBUG) > - { > - .name = "kmem.slabinfo", > - .seq_show = mem_cgroup_slab_show, > - }, > -#endif > - { > - .name = "kmem.tcp.limit_in_bytes", > - .private = MEMFILE_PRIVATE(_TCP, RES_LIMIT), > - .write = mem_cgroup_write, > - .read_u64 = mem_cgroup_read_u64, > - }, > - { > - .name = "kmem.tcp.usage_in_bytes", > - .private = MEMFILE_PRIVATE(_TCP, RES_USAGE), > - .read_u64 = mem_cgroup_read_u64, > - }, > - { > - .name = "kmem.tcp.failcnt", > - .private = MEMFILE_PRIVATE(_TCP, RES_FAILCNT), > - .write = mem_cgroup_reset, > - .read_u64 = mem_cgroup_read_u64, > - }, > - { > - .name = "kmem.tcp.max_usage_in_bytes", > - .private = MEMFILE_PRIVATE(_TCP, RES_MAX_USAGE), > - .write = mem_cgroup_reset, > - .read_u64 = mem_cgroup_read_u64, > - }, > - { }, /* terminate */ > -}; > - > /* > * Private memory cgroup IDR > * > @@ -4902,7 +4232,7 @@ static int memory_events_local_show(struct seq_file *m, void *v) > return 0; > } > > -static int memory_stat_show(struct seq_file *m, void *v) > +int memory_stat_show(struct seq_file *m, void *v) > { > struct mem_cgroup *memcg = mem_cgroup_from_seq(m); > char *buf = kmalloc(PAGE_SIZE, GFP_KERNEL); > @@ -6133,33 +5463,6 @@ static struct cftype swap_files[] = { > { } /* terminate */ > }; > > -static struct cftype memsw_files[] = { > - { > - .name = "memsw.usage_in_bytes", > - .private = MEMFILE_PRIVATE(_MEMSWAP, RES_USAGE), > - .read_u64 = mem_cgroup_read_u64, > - }, > - { > - .name = "memsw.max_usage_in_bytes", > - .private = MEMFILE_PRIVATE(_MEMSWAP, RES_MAX_USAGE), > - .write = mem_cgroup_reset, > - .read_u64 = mem_cgroup_read_u64, > - }, > - { > - .name = "memsw.limit_in_bytes", > - .private = MEMFILE_PRIVATE(_MEMSWAP, RES_LIMIT), > - .write = mem_cgroup_write, > - .read_u64 = mem_cgroup_read_u64, > - }, > - { > - .name = "memsw.failcnt", > - .private = MEMFILE_PRIVATE(_MEMSWAP, RES_FAILCNT), > - .write = mem_cgroup_reset, > - .read_u64 = mem_cgroup_read_u64, > - }, > - { }, /* terminate */ > -}; > - > #if defined(CONFIG_MEMCG_KMEM) && defined(CONFIG_ZSWAP) > /** > * obj_cgroup_may_zswap - check if this cgroup can zswap > -- > 2.45.2 -- Michal Hocko SUSE Labs