[PATCH RFC 1/5] vm_cgroup: basic infrastructure

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

 



This patch introduces the vm cgroup to control address space expansion
of tasks that belong to a cgroup. The idea is to provide a mechanism to
limit memory overcommit not only for the whole system, but also on per
cgroup basis.

This patch only adds some basic cgroup methods, like alloc/free and
write/read, while the real accounting/limiting is done in the following
patches.

Signed-off-by: Vladimir Davydov <vdavydov@xxxxxxxxxxxxx>
---
 include/linux/cgroup_subsys.h |    4 ++
 include/linux/vm_cgroup.h     |   18 ++++++
 init/Kconfig                  |    4 ++
 mm/Makefile                   |    1 +
 mm/vm_cgroup.c                |  131 +++++++++++++++++++++++++++++++++++++++++
 5 files changed, 158 insertions(+)
 create mode 100644 include/linux/vm_cgroup.h
 create mode 100644 mm/vm_cgroup.c

diff --git a/include/linux/cgroup_subsys.h b/include/linux/cgroup_subsys.h
index 98c4f9b12b03..8eb7db12f6ea 100644
--- a/include/linux/cgroup_subsys.h
+++ b/include/linux/cgroup_subsys.h
@@ -47,6 +47,10 @@ SUBSYS(net_prio)
 SUBSYS(hugetlb)
 #endif
 
+#if IS_ENABLED(CONFIG_CGROUP_VM)
+SUBSYS(vm)
+#endif
+
 /*
  * The following subsystems are not supported on the default hierarchy.
  */
diff --git a/include/linux/vm_cgroup.h b/include/linux/vm_cgroup.h
new file mode 100644
index 000000000000..b629c9affa4b
--- /dev/null
+++ b/include/linux/vm_cgroup.h
@@ -0,0 +1,18 @@
+#ifndef _LINUX_VM_CGROUP_H
+#define _LINUX_VM_CGROUP_H
+
+#ifdef CONFIG_CGROUP_VM
+static inline bool vm_cgroup_disabled(void)
+{
+	if (vm_cgrp_subsys.disabled)
+		return true;
+	return false;
+}
+#else /* !CONFIG_CGROUP_VM */
+static inline bool vm_cgroup_disabled(void)
+{
+	return true;
+}
+#endif /* CONFIG_CGROUP_VM */
+
+#endif /* _LINUX_VM_CGROUP_H */
diff --git a/init/Kconfig b/init/Kconfig
index 9d76b99af1b9..4419835bea7c 100644
--- a/init/Kconfig
+++ b/init/Kconfig
@@ -1008,6 +1008,10 @@ config MEMCG_KMEM
 	  unusable in real life so DO NOT SELECT IT unless for development
 	  purposes.
 
+config CGROUP_VM
+	bool "Virtual Memory Resource Controller for Control Groups"
+	default n
+
 config CGROUP_HUGETLB
 	bool "HugeTLB Resource Controller for Control Groups"
 	depends on RESOURCE_COUNTERS && HUGETLB_PAGE
diff --git a/mm/Makefile b/mm/Makefile
index 4064f3ec145e..914520d2669f 100644
--- a/mm/Makefile
+++ b/mm/Makefile
@@ -52,6 +52,7 @@ obj-$(CONFIG_MIGRATION) += migrate.o
 obj-$(CONFIG_QUICKLIST) += quicklist.o
 obj-$(CONFIG_TRANSPARENT_HUGEPAGE) += huge_memory.o
 obj-$(CONFIG_MEMCG) += memcontrol.o page_cgroup.o vmpressure.o
+obj-$(CONFIG_CGROUP_VM) += vm_cgroup.o
 obj-$(CONFIG_CGROUP_HUGETLB) += hugetlb_cgroup.o
 obj-$(CONFIG_MEMORY_FAILURE) += memory-failure.o
 obj-$(CONFIG_HWPOISON_INJECT) += hwpoison-inject.o
diff --git a/mm/vm_cgroup.c b/mm/vm_cgroup.c
new file mode 100644
index 000000000000..7f5b81482748
--- /dev/null
+++ b/mm/vm_cgroup.c
@@ -0,0 +1,131 @@
+#include <linux/cgroup.h>
+#include <linux/res_counter.h>
+#include <linux/mm.h>
+#include <linux/slab.h>
+#include <linux/vm_cgroup.h>
+
+struct vm_cgroup {
+	struct cgroup_subsys_state css;
+
+	/*
+	 * The counter to account for vm usage.
+	 */
+	struct res_counter res;
+};
+
+static struct vm_cgroup *root_vm_cgroup __read_mostly;
+
+static inline bool vm_cgroup_is_root(struct vm_cgroup *vmcg)
+{
+	return vmcg == root_vm_cgroup;
+}
+
+static struct vm_cgroup *vm_cgroup_from_css(struct cgroup_subsys_state *s)
+{
+	return s ? container_of(s, struct vm_cgroup, css) : NULL;
+}
+
+static struct cgroup_subsys_state *
+vm_cgroup_css_alloc(struct cgroup_subsys_state *parent_css)
+{
+	struct vm_cgroup *parent = vm_cgroup_from_css(parent_css);
+	struct vm_cgroup *vmcg;
+
+	vmcg = kzalloc(sizeof(*vmcg), GFP_KERNEL);
+	if (!vmcg)
+		return ERR_PTR(-ENOMEM);
+
+	res_counter_init(&vmcg->res, parent ? &parent->res : NULL);
+
+	if (!parent)
+		root_vm_cgroup = vmcg;
+
+	return &vmcg->css;
+}
+
+static void vm_cgroup_css_free(struct cgroup_subsys_state *css)
+{
+	struct vm_cgroup *vmcg = vm_cgroup_from_css(css);
+
+	kfree(vmcg);
+}
+
+static u64 vm_cgroup_read_u64(struct cgroup_subsys_state *css,
+			      struct cftype *cft)
+{
+	struct vm_cgroup *vmcg = vm_cgroup_from_css(css);
+	int memb = cft->private;
+
+	return res_counter_read_u64(&vmcg->res, memb);
+}
+
+static ssize_t vm_cgroup_write(struct kernfs_open_file *of,
+			       char *buf, size_t nbytes, loff_t off)
+{
+	struct vm_cgroup *vmcg = vm_cgroup_from_css(of_css(of));
+	unsigned long long val;
+	int ret;
+
+	if (vm_cgroup_is_root(vmcg))
+		return -EINVAL;
+
+	buf = strstrip(buf);
+	ret = res_counter_memparse_write_strategy(buf, &val);
+	if (ret)
+		return ret;
+
+	ret = res_counter_set_limit(&vmcg->res, val);
+	return ret ?: nbytes;
+}
+
+static ssize_t vm_cgroup_reset(struct kernfs_open_file *of, char *buf,
+			       size_t nbytes, loff_t off)
+{
+	struct vm_cgroup *vmcg= vm_cgroup_from_css(of_css(of));
+	int memb = of_cft(of)->private;
+
+	switch (memb) {
+	case RES_MAX_USAGE:
+		res_counter_reset_max(&vmcg->res);
+		break;
+	case RES_FAILCNT:
+		res_counter_reset_failcnt(&vmcg->res);
+		break;
+	default:
+		BUG();
+	}
+	return nbytes;
+}
+
+static struct cftype vm_cgroup_files[] = {
+	{
+		.name = "usage_in_bytes",
+		.private = RES_USAGE,
+		.read_u64 = vm_cgroup_read_u64,
+	},
+	{
+		.name = "max_usage_in_bytes",
+		.private = RES_MAX_USAGE,
+		.write = vm_cgroup_reset,
+		.read_u64 = vm_cgroup_read_u64,
+	},
+	{
+		.name = "limit_in_bytes",
+		.private = RES_LIMIT,
+		.write = vm_cgroup_write,
+		.read_u64 = vm_cgroup_read_u64,
+	},
+	{
+		.name = "failcnt",
+		.private = RES_FAILCNT,
+		.write = vm_cgroup_reset,
+		.read_u64 = vm_cgroup_read_u64,
+	},
+	{ },	/* terminate */
+};
+
+struct cgroup_subsys vm_cgrp_subsys = {
+	.css_alloc = vm_cgroup_css_alloc,
+	.css_free = vm_cgroup_css_free,
+	.base_cftypes = vm_cgroup_files,
+};
-- 
1.7.10.4

--
To unsubscribe, send a message with 'unsubscribe linux-mm' in
the body to majordomo@xxxxxxxxx.  For more info on Linux MM,
see: http://www.linux-mm.org/ .
Don't email: <a href=mailto:"dont@xxxxxxxxx";> email@xxxxxxxxx </a>




[Index of Archives]     [Linux ARM Kernel]     [Linux ARM]     [Linux Omap]     [Fedora ARM]     [IETF Annouce]     [Bugtraq]     [Linux]     [Linux OMAP]     [Linux MIPS]     [ECOS]     [Asterisk Internet PBX]     [Linux API]