Re: [Experimental] [PATCH 0/5] page_cgroup->flags diet.

KAMEZAWA Hiroyuki <kamezawa.hiroyu@xxxxxxxxxxxxxx> · Thu, 15 Dec 2011 17:44:18 +0900

This untested patch is for reducing size of page_cgroup to be 8 bytes.
After enough tests, we'll be ready to integrate page_cgroup as a member of
struct page (with CONFIG?)
I'll start tests when I can..

BTW, I don't consider how to track blkio owner for supporting buffered I/O
in blkio cgroup. But I wonder it's enough to add an interface to tie memcg
and blkio cgroup even if they are not bind-mounted...
Then, blkio_id can be gotten by page-> memcg -> blkio_id.
Another idea is using page->private (or some) for recording who is the writer
This info can be propageted to buffer_head or bio.

Worst idea is adding a new field to page_cgroup.

==

diff --git a/include/linux/page_cgroup.h b/include/linux/page_cgroup.h
index f9441ca..48be740 100644
--- a/include/linux/page_cgroup.h
+++ b/include/linux/page_cgroup.h
@@ -23,8 +23,7 @@ enum {
  * then the page cgroup for pfn always exists.
  */
 struct page_cgroup {
-	unsigned long flags;
-	struct mem_cgroup *mem_cgroup;
+	unsigned long _flags; /* This flag only uses lower 3bits */
 };
 
 void __meminit pgdat_page_cgroup_init(struct pglist_data *pgdat);
@@ -46,19 +45,19 @@ struct page *lookup_cgroup_page(struct page_cgroup *pc);
 
 #define TESTPCGFLAG(uname, lname)			\
 static inline int PageCgroup##uname(struct page_cgroup *pc)	\
-	{ return test_bit(PCG_##lname, &pc->flags); }
+	{ return test_bit(PCG_##lname, &pc->_flags); }
 
 #define SETPCGFLAG(uname, lname)			\
 static inline void SetPageCgroup##uname(struct page_cgroup *pc)\
-	{ set_bit(PCG_##lname, &pc->flags);  }
+	{ set_bit(PCG_##lname, &pc->_flags);  }
 
 #define CLEARPCGFLAG(uname, lname)			\
 static inline void ClearPageCgroup##uname(struct page_cgroup *pc)	\
-	{ clear_bit(PCG_##lname, &pc->flags);  }
+	{ clear_bit(PCG_##lname, &pc->_flags);  }
 
 #define TESTCLEARPCGFLAG(uname, lname)			\
 static inline int TestClearPageCgroup##uname(struct page_cgroup *pc)	\
-	{ return test_and_clear_bit(PCG_##lname, &pc->flags);  }
+	{ return test_and_clear_bit(PCG_##lname, &pc->_flags);  }
 
 TESTPCGFLAG(Used, USED)
 CLEARPCGFLAG(Used, USED)
@@ -68,18 +67,33 @@ SETPCGFLAG(Migration, MIGRATION)
 CLEARPCGFLAG(Migration, MIGRATION)
 TESTPCGFLAG(Migration, MIGRATION)
 
+#define PCG_FLAG_MASK	((1 << (__NR_PCG_FLAGS)) - 1)
+
 static inline void lock_page_cgroup(struct page_cgroup *pc)
 {
 	/*
 	 * Don't take this lock in IRQ context.
 	 * This lock is for pc->mem_cgroup, USED, CACHE, MIGRATION
 	 */
-	bit_spin_lock(PCG_LOCK, &pc->flags);
+	bit_spin_lock(PCG_LOCK, &pc->_flags);
 }
 
 static inline void unlock_page_cgroup(struct page_cgroup *pc)
 {
-	bit_spin_unlock(PCG_LOCK, &pc->flags);
+	bit_spin_unlock(PCG_LOCK, &pc->_flags);
+}
+
+static inline struct mem_cgroup *pc_to_memcg(struct page_cgroup *pc)
+{
+	return (struct mem_cgroup *)
+		((unsigned long)pc->_flags & ~PCG_FLAG_MASK);
+}
+
+static inline void
+pc_set_memcg(struct page_cgroup *pc, struct mem_cgroup *memcg)
+{
+	unsigned long val = pc->_flags & PCG_FLAG_MASK;
+	pc->_flags = (unsigned long)memcg | val;
 }
 
 #else /* CONFIG_CGROUP_MEM_RES_CTLR */
diff --git a/mm/memcontrol.c b/mm/memcontrol.c
index 66e03ad..8750e5a 100644
--- a/mm/memcontrol.c
+++ b/mm/memcontrol.c
@@ -1103,7 +1103,7 @@ struct lruvec *mem_cgroup_lru_add_list(struct zone *zone, struct page *page,
 		return &zone->lruvec;
 
 	pc = lookup_page_cgroup(page);
-	memcg = pc->mem_cgroup;
+	memcg = pc_to_memcg(pc);
 	VM_BUG_ON(!memcg);
 	mz = page_cgroup_zoneinfo(memcg, page);
 	/* compound_order() is stabilized through lru_lock */
@@ -1131,7 +1131,7 @@ void mem_cgroup_lru_del_list(struct page *page, enum lru_list lru)
 		return;
 
 	pc = lookup_page_cgroup(page);
-	memcg = pc->mem_cgroup;
+	memcg = pc_to_memcg(pc);
 	VM_BUG_ON(!memcg);
 	mz = page_cgroup_zoneinfo(memcg, page);
 	/* huge page split is done under lru_lock. so, we have no races. */
@@ -1268,7 +1268,7 @@ mem_cgroup_get_reclaim_stat_from_page(struct page *page)
 		return NULL;
 	/* Ensure pc->mem_cgroup is visible after reading PCG_USED. */
 	smp_rmb();
-	mz = page_cgroup_zoneinfo(pc->mem_cgroup, page);
+	mz = page_cgroup_zoneinfo(pc_to_memcg(pc), page);
 	return &mz->reclaim_stat;
 }
 
@@ -1897,7 +1897,7 @@ bool __mem_cgroup_begin_update_page_stats(struct page *page, unsigned long *flag
 	bool need_unlock = false;
 
 	rcu_read_lock();
-	memcg = pc->mem_cgroup;
+	memcg = pc_to_memcg(pc);
 	if (!memcg  || !PageCgroupUsed(pc))
 		goto out;
 	if (unlikely(mem_cgroup_stealed(memcg)) || PageTransHuge(page)) {
@@ -1926,7 +1926,7 @@ void __mem_cgroup_update_page_stat(struct page *page,
 				 enum mem_cgroup_page_stat_item idx, int val)
 {
 	struct page_cgroup *pc = lookup_page_cgroup(page);
-	struct mem_cgroup *memcg = pc->mem_cgroup;
+	struct mem_cgroup *memcg = pc_to_memcg(pc);
 
 	if (!memcg || !PageCgroupUsed(pc))
 		return;
@@ -2400,7 +2400,7 @@ struct mem_cgroup *try_get_mem_cgroup_from_page(struct page *page)
 	pc = lookup_page_cgroup(page);
 	lock_page_cgroup(pc);
 	if (PageCgroupUsed(pc)) {
-		memcg = pc->mem_cgroup;
+		memcg = pc_to_memcg(pc);
 		if (memcg && !css_tryget(&memcg->css))
 			memcg = NULL;
 	} else if (PageSwapCache(page)) {
@@ -2434,7 +2434,7 @@ static void __mem_cgroup_commit_charge(struct mem_cgroup *memcg,
 	 * we don't need page_cgroup_lock about tail pages, becase they are not
 	 * accessed by any other context at this point.
 	 */
-	pc->mem_cgroup = memcg;
+	pc_set_memcg(pc, memcg);
 	/*
 	 * We access a page_cgroup asynchronously without lock_page_cgroup().
 	 * Especially when a page_cgroup is taken from a page, pc->mem_cgroup
@@ -2469,7 +2469,6 @@ static void __mem_cgroup_commit_charge(struct mem_cgroup *memcg,
 
 #ifdef CONFIG_TRANSPARENT_HUGEPAGE
 
-#define PCGF_NOCOPY_AT_SPLIT ((1 << PCG_LOCK) | (1 << PCG_MIGRATION))
 /*
  * Because tail pages are not marked as "used", set it. We're under
  * zone->lru_lock, 'splitting on pmd' and compound_lock.
@@ -2481,23 +2480,26 @@ void mem_cgroup_split_huge_fixup(struct page *head)
 	struct page_cgroup *head_pc = lookup_page_cgroup(head);
 	struct page_cgroup *pc;
 	struct mem_cgroup_per_zone *mz;
+	struct mem_cgroup *head_memcg;
 	enum lru_list lru;
 	int i;
 
 	if (mem_cgroup_disabled())
 		return;
+	head_memcg = pc_to_memcg(head_pc);
 	for (i = 1; i < HPAGE_PMD_NR; i++) {
 		pc = head_pc + i;
-		pc->mem_cgroup = head_pc->mem_cgroup;
+		pc_set_memcg(pc, head_memcg);
 		smp_wmb();/* see __commit_charge() */
-		pc->flags = head_pc->flags & ~PCGF_NOCOPY_AT_SPLIT;
+		/* this page is never be under page migration */
+		SetPageCgroupUsed(pc);
 	}
 	/* 
 	 * Tail pages will be added to LRU.
 	 * We hold lru_lock,then,reduce counter directly.
 	 */
 	lru = page_lru(head);
-	mz = page_cgroup_zoneinfo(head_pc->mem_cgroup, head);
+	mz = page_cgroup_zoneinfo(head_memcg, head);
 	MEM_CGROUP_ZSTAT(mz, lru) -= HPAGE_PMD_NR - 1;
 }
 #endif
@@ -2545,7 +2547,7 @@ static int mem_cgroup_move_account(struct page *page,
 	lock_page_cgroup(pc);
 
 	ret = -EINVAL;
-	if (!PageCgroupUsed(pc) || pc->mem_cgroup != from)
+	if (!PageCgroupUsed(pc) || pc_to_memcg(pc) != from)
 		goto unlock;
 
 	mem_cgroup_move_account_wlock(page, &flags);
@@ -2563,7 +2565,7 @@ static int mem_cgroup_move_account(struct page *page,
 		__mem_cgroup_cancel_charge(from, nr_pages);
 
 	/* caller should have done css_get */
-	pc->mem_cgroup = to;
+	pc_set_memcg(pc, to);
 	mem_cgroup_charge_statistics(to, !PageAnon(page), nr_pages);
 	/*
 	 * We charges against "to" which may not have any tasks. Then, "to"
@@ -2928,7 +2930,7 @@ __mem_cgroup_uncharge_common(struct page *page, enum charge_type ctype)
 
 	lock_page_cgroup(pc);
 
-	memcg = pc->mem_cgroup;
+	memcg = pc_to_memcg(pc);
 
 	if (!PageCgroupUsed(pc))
 		goto unlock_out;
@@ -3109,7 +3111,7 @@ void mem_cgroup_reset_owner(struct page *newpage)
 
 	pc = lookup_page_cgroup(newpage);
 	VM_BUG_ON(PageCgroupUsed(pc));
-	pc->mem_cgroup = root_mem_cgroup;
+	pc_set_memcg(pc, root_mem_cgroup);
 }
 
 /**
@@ -3191,7 +3193,7 @@ int mem_cgroup_prepare_migration(struct page *page,
 	pc = lookup_page_cgroup(page);
 	lock_page_cgroup(pc);
 	if (PageCgroupUsed(pc)) {
-		memcg = pc->mem_cgroup;
+		memcg = pc_to_memcg(pc);
 		css_get(&memcg->css);
 		/*
 		 * At migrating an anonymous page, its mapcount goes down
@@ -3329,7 +3331,7 @@ void mem_cgroup_replace_page_cache(struct page *oldpage,
 	pc = lookup_page_cgroup(oldpage);
 	/* fix accounting on old pages */
 	lock_page_cgroup(pc);
-	memcg = pc->mem_cgroup;
+	memcg = pc_to_memcg(pc);
 	mem_cgroup_charge_statistics(memcg, !PageAnon(oldpage), -1);
 	ClearPageCgroupUsed(pc);
 	unlock_page_cgroup(pc);
@@ -3376,14 +3378,15 @@ void mem_cgroup_print_bad_page(struct page *page)
 	if (pc) {
 		int ret = -1;
 		char *path;
+		struct mem_cgroup *memcg = pc_to_memcg(pc);
 
 		printk(KERN_ALERT "pc:%p pc->flags:%lx pc->mem_cgroup:%p",
-		       pc, pc->flags, pc->mem_cgroup);
+		       pc, pc->_flags, memcg);
 
 		path = kmalloc(PATH_MAX, GFP_KERNEL);
 		if (path) {
 			rcu_read_lock();
-			ret = cgroup_path(pc->mem_cgroup->css.cgroup,
+			ret = cgroup_path(memcg->css.cgroup,
 							path, PATH_MAX);
 			rcu_read_unlock();
 		}
@@ -5247,7 +5250,7 @@ static int is_target_pte_for_mc(struct vm_area_struct *vma,
 		 * mem_cgroup_move_account() checks the pc is valid or not under
 		 * the lock.
 		 */
-		if (PageCgroupUsed(pc) && pc->mem_cgroup == mc.from) {
+		if (PageCgroupUsed(pc) && pc_to_memcg(pc) == mc.from) {
 			ret = MC_TARGET_PAGE;
 			if (target)
 				target->page = page;

--
To unsubscribe, send a message with 'unsubscribe linux-mm' in
the body to majordomo@xxxxxxxxx.  For more info on Linux MM,
see: http://www.linux-mm.org/ .
Fight unfair telecom internet charges in Canada: sign http://stopthemeter.ca/
Don't email: <a href=mailto:"dont@xxxxxxxxx";> email@xxxxxxxxx </a>