Re: Control page reclaim granularity

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

 



Zheng Liu <gnehzuil.liu@xxxxxxxxx> writes:
> Hi Greg,
>
> Sorry, I forgot to say that I don't subscribe linux-mm and linux-kernel
> mailing list.  So please Cc me.
>
> I am glad to receive your reply and I am very interesting for your
> approach.  Actually I am not very familiar with CGroup.  So would you
> please send your patch to me if you can?  Thank you all the same.
>
> Regards,
> Zheng

Sorry for the delay, I had trouble finding my old prototype patch.  The
patch below is based on v2.6.34.  The patch is just an idea not a
complete solution.

>From b1b127e0e1443446d51353b0d7a776bddc046009 Mon Sep 17 00:00:00 2001
From: Greg Thelen <gthelen@xxxxxxxxxx>
Date: Sat, 5 Jun 2010 17:26:06 -0700
Subject: [PATCH] memcg: prototype of dentry/cgroup binding.

JUST A PROTOTYPE: DO NOT SUBMIT

This creates a /dev/cgroup/memory/X/memory.dir_roots file which one can
use to register a directory file descriptors.  The idea is that future
charges to registered directories, including child inodes, will be
billed to memcg X rather than whatever memcg the faulting process runs
within.
---
 fs/dcache.c                |    4 +++
 include/linux/dcache.h     |    1 +
 include/linux/memcontrol.h |    2 +-
 mm/filemap.c               |    3 ++
 mm/memcontrol.c            |   64 ++++++++++++++++++++++++++++++++++++++++++++
 5 files changed, 73 insertions(+), 1 deletions(-)

diff --git a/fs/dcache.c b/fs/dcache.c
index f1358e5..dda48d7 100644
--- a/fs/dcache.c
+++ b/fs/dcache.c
@@ -70,6 +70,7 @@ struct dentry_stat_t dentry_stat = {
 static void __d_free(struct dentry *dentry)
 {
 	WARN_ON(!list_empty(&dentry->d_alias));
+	BUG_ON(dentry->d_mem);
 	if (dname_external(dentry))
 		kfree(dentry->d_name.name);
 	kmem_cache_free(dentry_cache, dentry); 
@@ -172,6 +173,7 @@ static struct dentry *d_kill(struct dentry *dentry)
 	struct dentry *parent;
 
 	list_del(&dentry->d_u.d_child);
+	mem_cgroup_disassociate_from_dentry(dentry);
 	dentry_stat.nr_dentry--;	/* For d_free, below */
 	/*drops the locks, at that point nobody can reach this dentry */
 	dentry_iput(dentry);
@@ -953,6 +955,7 @@ struct dentry *d_alloc(struct dentry * parent, const struct qstr *name)
 	dentry->d_inode = NULL;
 	dentry->d_parent = NULL;
 	dentry->d_sb = NULL;
+	dentry->d_mem = NULL;
 	dentry->d_op = NULL;
 	dentry->d_fsdata = NULL;
 	dentry->d_mounted = 0;
@@ -964,6 +967,7 @@ struct dentry *d_alloc(struct dentry * parent, const struct qstr *name)
 	if (parent) {
 		dentry->d_parent = dget(parent);
 		dentry->d_sb = parent->d_sb;
+		dentry->d_mem = parent->d_mem;
 	} else {
 		INIT_LIST_HEAD(&dentry->d_u.d_child);
 	}
diff --git a/include/linux/dcache.h b/include/linux/dcache.h
index eebb617..523d58b 100644
--- a/include/linux/dcache.h
+++ b/include/linux/dcache.h
@@ -114,6 +114,7 @@ struct dentry {
 	unsigned long d_time;		/* used by d_revalidate */
 	const struct dentry_operations *d_op;
 	struct super_block *d_sb;	/* The root of the dentry tree */
+	struct mem_cgroup *d_mem;	/* Optional memcg */
 	void *d_fsdata;			/* fs-specific data */
 
 	unsigned char d_iname[DNAME_INLINE_LEN_MIN];	/* small names */
diff --git a/include/linux/memcontrol.h b/include/linux/memcontrol.h
index 44301c6..a8b54f9 100644
--- a/include/linux/memcontrol.h
+++ b/include/linux/memcontrol.h
@@ -71,6 +71,7 @@ extern unsigned long mem_cgroup_isolate_pages(unsigned long nr_to_scan,
 					struct mem_cgroup *mem_cont,
 					int active, int file);
 extern void mem_cgroup_out_of_memory(struct mem_cgroup *mem, gfp_t gfp_mask);
+void mem_cgroup_disassociate_from_dentry(struct dentry *dentry);
 int task_in_mem_cgroup(struct task_struct *task, const struct mem_cgroup *mem);
 
 extern struct mem_cgroup *try_get_mem_cgroup_from_page(struct page *page);
@@ -309,4 +310,3 @@ unsigned long mem_cgroup_soft_limit_reclaim(struct zone *zone, int order,
 #endif /* CONFIG_CGROUP_MEM_CONT */
 
 #endif /* _LINUX_MEMCONTROL_H */
-
diff --git a/mm/filemap.c b/mm/filemap.c
index 140ebda..a9a525b 100644
--- a/mm/filemap.c
+++ b/mm/filemap.c
@@ -400,8 +400,11 @@ int add_to_page_cache_locked(struct page *page, struct address_space *mapping,
 
 	VM_BUG_ON(!PageLocked(page));
 
+	VM_BUG_ON(page->mapping != NULL);
+	page->mapping = mapping; /* XXX: hack? */
 	error = mem_cgroup_cache_charge(page, current->mm,
 					gfp_mask & GFP_RECLAIM_MASK);
+	page->mapping = NULL; /* XXX: hack? */
 	if (error)
 		goto out;
 
diff --git a/mm/memcontrol.c b/mm/memcontrol.c
index 8a79a6f..de9f150 100644
--- a/mm/memcontrol.c
+++ b/mm/memcontrol.c
@@ -793,6 +793,23 @@ void mem_cgroup_move_lists(struct page *page,
 	mem_cgroup_add_lru_list(page, to);
 }
 
+static void mem_cgroup_associate_dentry(struct mem_cgroup *mem,
+					struct dentry *dentry)
+{
+	css_get(&mem->css);
+	BUG_ON(dentry->d_mem);
+	dentry->d_mem = mem;
+}
+
+void mem_cgroup_disassociate_from_dentry(struct dentry *dentry)
+{
+	if (!dentry->d_mem)
+		return;
+
+	css_put(&dentry->d_mem->css);
+	dentry->d_mem = NULL;
+}
+
 int task_in_mem_cgroup(struct task_struct *task, const struct mem_cgroup *mem)
 {
 	int ret;
@@ -1914,6 +1931,29 @@ static int mem_cgroup_charge_common(struct page *page, struct mm_struct *mm,
 		return 0;
 	prefetchw(pc);
 
+	/*
+	 * If the page is inode and related dentry indicates a cgroup, then
+	 * charge that cgroup.  Otherwise fallback on the mm's cgroup.
+	 *
+	 * TODO(gthelen): this needs more thought.
+	 */
+	if ((memcg == NULL) && !PageAnon(page)) {
+		struct address_space *as;
+		struct inode *inode;
+		struct dentry *dentry;
+
+		/* what kind of locking is needed to walk this?  dcache_lock (gulp)? */
+		as = (struct address_space *)page_rmapping(page);
+		if (as != NULL) {
+			inode = as->host;
+			BUG_ON(inode == NULL);
+			list_for_each_entry(dentry, &inode->i_dentry, d_alias) {
+				memcg = dentry->d_mem;
+				break;
+			}
+		}
+	}
+
 	mem = memcg;
 	ret = __mem_cgroup_try_charge(mm, gfp_mask, &mem, true);
 	if (ret || !mem)
@@ -3539,6 +3579,26 @@ unlock:
 	return ret;
 }
 
+static int mem_cgroup_dir_roots_write(struct cgroup *cgrp, struct cftype *cft,
+				      u64 dir_fd)
+{
+	struct mem_cgroup *mem = mem_cgroup_from_cont(cgrp);
+	struct file *dir;
+	int status = 0;
+
+	dir = fget(dir_fd);
+	if (!dir)
+		return -EINVAL;
+
+	if (dir->f_dentry->d_mem)
+		status = -EINVAL;
+	else
+		mem_cgroup_associate_dentry(mem, dir->f_dentry);
+
+	fput(dir);
+	return status;
+}
+
 static struct cftype mem_cgroup_files[] = {
 	{
 		.name = "usage_in_bytes",
@@ -3594,6 +3654,10 @@ static struct cftype mem_cgroup_files[] = {
 		.read_u64 = mem_cgroup_move_charge_read,
 		.write_u64 = mem_cgroup_move_charge_write,
 	},
+	{
+		.name = "dir_roots",
+		.write_u64  = mem_cgroup_dir_roots_write,
+	},
 };
 
 #ifdef CONFIG_CGROUP_MEM_RES_CTLR_SWAP
-- 
1.7.7.3

--
To unsubscribe, send a message with 'unsubscribe linux-mm' in
the body to majordomo@xxxxxxxxx.  For more info on Linux MM,
see: http://www.linux-mm.org/ .
Fight unfair telecom internet charges in Canada: sign http://stopthemeter.ca/
Don't email: <a href=mailto:"dont@xxxxxxxxx";> email@xxxxxxxxx </a>


[Index of Archives]     [Linux ARM Kernel]     [Linux ARM]     [Linux Omap]     [Fedora ARM]     [IETF Annouce]     [Bugtraq]     [Linux]     [Linux OMAP]     [Linux MIPS]     [ECOS]     [Asterisk Internet PBX]     [Linux API]