Starting with 8f9ac36d2cbb (cgroup: distinguish the default and legacy hierarchies when handling cftypes) memory cgroup controller doesn't export any knobs because all of them are marked as legacy. The idea is that only selected knobs are exported for the new cgroup API. This patch exports the core knobs for the memory controller. The following knobs are not and won't be available in the default (aka unified) hierarchy: - use_hierarchy - was one of the biggest mistakes when memory controller was introduced. It allows for creating hierarchical cgroups structure which doesn't have any hierarchical accounting. This leads to really strange configurations where other co-mounted controllers behave hierarchically while memory controller doesn't. All controllers have to be hierarchical with the new cgroups API so this knob doesn't make any sense here. - force_empty - has been introduced primarily to drop memory before it gets reparented on the group removal. This alone doesn't sound fully justified because reparented pages which are not in use can be reclaimed also later when there is a memory pressure on the parent level. Another use-case would be something like per-memcg /proc/sys/vm/drop_caches which doesn't sound like a great idea either. We are trying to get away from using it on the global level so we shouldn't allow that on per-memcg level as well. - soft_limit_in_bytes - has been originally introduced to help to recover from the overcommit situations where the overall hard limits on the system are higher than the available memory. A group which has the largest excess on the soft limit is reclaimed to help to reduce memory pressure during the global memory pressure. The primary problem with this tunable is that every memcg is soft unlimited by default which is reverse to what would be expected from such a knob. Another problem is that soft limit is considered only during the global memory pressure rather than on an external memory pressure in general (e.g. triggered by the limit hit on a parent up the hierarchy). There are other issues which are tight to the implementation (e.g. priority-0 reclaim used for the soft limit reclaim etc.) which are really hard to fix without breaking potential users. There will be a replacement for the soft limit in the unified hierarchy and users will be encouraged to switch their configuration to the new scheme. Until this is available users are suggested to stay with the legacy cgroup API. TCP kmem sub-controller is not exported at this stage because this one has seen basically no traction since it was merged and it is not entirely clear why kmem controller cannot be used for the same purpose. Having 2 controllers for tracking kernel memory allocations sounds like too much. If there are use-cases and reasons for not merging it into kmem then we can reconsider and allow it for the new cgroups API later. Signed-off-by: Michal Hocko <mhocko@xxxxxxx> --- Documentation/cgroups/memory.txt | 19 ++++--- mm/memcontrol.c | 105 ++++++++++++++++++++++++++++++++++++++- 2 files changed, 115 insertions(+), 9 deletions(-) diff --git a/Documentation/cgroups/memory.txt b/Documentation/cgroups/memory.txt index 02ab997a1ed2..a8f01497c5de 100644 --- a/Documentation/cgroups/memory.txt +++ b/Documentation/cgroups/memory.txt @@ -62,10 +62,10 @@ Brief summary of control files. memory.memsw.failcnt # show the number of memory+Swap hits limits memory.max_usage_in_bytes # show max memory usage recorded memory.memsw.max_usage_in_bytes # show max memory+Swap usage recorded - memory.soft_limit_in_bytes # set/show soft limit of memory usage +[D] memory.soft_limit_in_bytes # set/show soft limit of memory usage memory.stat # show various statistics - memory.use_hierarchy # set/show hierarchical account enabled - memory.force_empty # trigger forced move charge to parent +[D] memory.use_hierarchy # set/show hierarchical account enabled +[D] memory.force_empty # trigger forced move charge to parent memory.pressure_level # set memory pressure notifications memory.swappiness # set/show swappiness parameter of vmscan (See sysctl's vm.swappiness) @@ -78,10 +78,15 @@ Brief summary of control files. memory.kmem.failcnt # show the number of kernel memory usage hits limits memory.kmem.max_usage_in_bytes # show max kernel memory usage recorded - memory.kmem.tcp.limit_in_bytes # set/show hard limit for tcp buf memory - memory.kmem.tcp.usage_in_bytes # show current tcp buf memory allocation - memory.kmem.tcp.failcnt # show the number of tcp buf memory usage hits limits - memory.kmem.tcp.max_usage_in_bytes # show max tcp buf memory usage recorded +[D] memory.kmem.tcp.limit_in_bytes # set/show hard limit for tcp buf memory +[D] memory.kmem.tcp.usage_in_bytes # show current tcp buf memory allocation +[D] memory.kmem.tcp.failcnt # show the number of tcp buf memory usage hits limits +[D] memory.kmem.tcp.max_usage_in_bytes # show max tcp buf memory usage recorded + +Knobs marked as [D] are considered deprecated and they won't be available in +the new cgroup Unified hierarchy API (see +Documentation/cgroups/unified-hierarchy.txt for more information). They are +still available with the legacy hierarchy though. 1. History diff --git a/mm/memcontrol.c b/mm/memcontrol.c index fa99a3e2e427..9ed40a045d27 100644 --- a/mm/memcontrol.c +++ b/mm/memcontrol.c @@ -5226,7 +5226,11 @@ out_kfree: return ret; } -static struct cftype mem_cgroup_files[] = { +/* + * memcg knobs for the legacy cgroup API. No new files should be + * added here. + */ +static struct cftype legacy_mem_cgroup_files[] = { { .name = "usage_in_bytes", .private = MEMFILE_PRIVATE(_MEM, RES_USAGE), @@ -5334,6 +5338,100 @@ static struct cftype mem_cgroup_files[] = { { }, /* terminate */ }; +/* memcg knobs for new cgroups API (default aka unified hierarchy) */ +static struct cftype dfl_mem_cgroup_files[] = { + { + .name = "usage_in_bytes", + .private = MEMFILE_PRIVATE(_MEM, RES_USAGE), + .read_u64 = mem_cgroup_read_u64, + }, + { + .name = "max_usage_in_bytes", + .private = MEMFILE_PRIVATE(_MEM, RES_MAX_USAGE), + .write = mem_cgroup_reset, + .read_u64 = mem_cgroup_read_u64, + }, + { + .name = "limit_in_bytes", + .private = MEMFILE_PRIVATE(_MEM, RES_LIMIT), + .write = mem_cgroup_write, + .read_u64 = mem_cgroup_read_u64, + }, + { + .name = "failcnt", + .private = MEMFILE_PRIVATE(_MEM, RES_FAILCNT), + .write = mem_cgroup_reset, + .read_u64 = mem_cgroup_read_u64, + }, + { + .name = "stat", + .seq_show = memcg_stat_show, + }, + { + .name = "cgroup.event_control", /* XXX: for compat */ + .write = memcg_write_event_control, + .flags = CFTYPE_NO_PREFIX, + .mode = S_IWUGO, + }, + { + .name = "swappiness", + .read_u64 = mem_cgroup_swappiness_read, + .write_u64 = mem_cgroup_swappiness_write, + }, + { + .name = "move_charge_at_immigrate", + .read_u64 = mem_cgroup_move_charge_read, + .write_u64 = mem_cgroup_move_charge_write, + }, + { + .name = "oom_control", + .seq_show = mem_cgroup_oom_control_read, + .write_u64 = mem_cgroup_oom_control_write, + .private = MEMFILE_PRIVATE(_OOM_TYPE, OOM_CONTROL), + }, + { + .name = "pressure_level", + }, +#ifdef CONFIG_NUMA + { + .name = "numa_stat", + .seq_show = memcg_numa_stat_show, + }, +#endif +#ifdef CONFIG_MEMCG_KMEM + { + .name = "kmem.limit_in_bytes", + .private = MEMFILE_PRIVATE(_KMEM, RES_LIMIT), + .write = mem_cgroup_write, + .read_u64 = mem_cgroup_read_u64, + }, + { + .name = "kmem.max_usage_in_bytes", + .private = MEMFILE_PRIVATE(_KMEM, RES_MAX_USAGE), + .write = mem_cgroup_reset, + .read_u64 = mem_cgroup_read_u64, + }, + { + .name = "kmem.usage_in_bytes", + .private = MEMFILE_PRIVATE(_KMEM, RES_USAGE), + .read_u64 = mem_cgroup_read_u64, + }, + { + .name = "kmem.failcnt", + .private = MEMFILE_PRIVATE(_KMEM, RES_FAILCNT), + .write = mem_cgroup_reset, + .read_u64 = mem_cgroup_read_u64, + }, +#ifdef CONFIG_SLABINFO + { + .name = "kmem.slabinfo", + .seq_show = mem_cgroup_slabinfo_read, + }, +#endif +#endif + { }, /* terminate */ +}; + #ifdef CONFIG_MEMCG_SWAP static struct cftype memsw_cgroup_files[] = { { @@ -6266,7 +6364,8 @@ struct cgroup_subsys memory_cgrp_subsys = { .cancel_attach = mem_cgroup_cancel_attach, .attach = mem_cgroup_move_task, .bind = mem_cgroup_bind, - .legacy_cftypes = mem_cgroup_files, + .legacy_cftypes = legacy_mem_cgroup_files, + .dfl_cftypes = dfl_mem_cgroup_files, .early_init = 0, }; @@ -6285,6 +6384,8 @@ static void __init memsw_file_init(void) { WARN_ON(cgroup_add_legacy_cftypes(&memory_cgrp_subsys, memsw_cgroup_files)); + WARN_ON(cgroup_add_dfl_cftypes(&memory_cgrp_subsys, + memsw_cgroup_files)); } static void __init enable_swap_cgroup(void) -- 2.0.1 -- To unsubscribe, send a message with 'unsubscribe linux-mm' in the body to majordomo@xxxxxxxxx. For more info on Linux MM, see: http://www.linux-mm.org/ . Don't email: <a href=mailto:"dont@xxxxxxxxx"> email@xxxxxxxxx </a>