[RFC] [PATCH 5/5] memcg: remove PCG_MOVE_LOCK

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

 



>From feb695347b46885b5c785892271e19e503a43aa4 Mon Sep 17 00:00:00 2001
From: KAMEZAWA Hiroyuki <kamezawa.hiroyu@xxxxxxxxxxxxxx>
Date: Thu, 15 Dec 2011 14:00:15 +0900
Subject: [PATCH 5/5] memcg: remove PCG_MOVE_LOCK.

PCG_MOVE_LOCK was introduced to prevent pc->mem_cgroup from being
overwritten by move_account() while updating the states.
lock_page_cgroup() cannot be used because page status can be updated
by IRQ context.

Considering again, in most case, this lock will not be held. It's
held when a task is moving between cgroups (with account_move flag)
and rmdir() is called. So, it seems that having 1bit per page for
avoiding this race is not very good.

This patch adds a hash of rwlock to update page status. Hash size
is NR_CPUS and we have rwlock array mem_cgroup_move_lock[hash(page)]

  - move account need to take wlock for the page with IRQ off.
  - stat updates need to take rwlock.

After this patch, PCG_MOVE_LOCK bit can go away. And, at updating
page stats, the caller doesn't need to call irq_disable.

In old path
   - disable_irq -> bit_spin_lock per page
In new path
   - read_lock on hashed rwlock.


Signed-off-by: KAMEZAWA Hiroyuki <kamezawa.hiroyu@xxxxxxxxxxxxxx>
---
 include/linux/page_cgroup.h |   20 -------------
 mm/memcontrol.c             |   66 +++++++++++++++++++++++++++++++++++++++----
 2 files changed, 60 insertions(+), 26 deletions(-)

diff --git a/include/linux/page_cgroup.h b/include/linux/page_cgroup.h
index 86967ed..f9441ca 100644
--- a/include/linux/page_cgroup.h
+++ b/include/linux/page_cgroup.h
@@ -6,8 +6,6 @@ enum {
 	PCG_LOCK,  /* Lock for pc->mem_cgroup and following bits. */
 	PCG_USED, /* this object is in use. */
 	PCG_MIGRATION, /* under page migration */
-	/* flags for mem_cgroup and file and I/O status */
-	PCG_MOVE_LOCK, /* For race between move_account v.s. following bits */
 	__NR_PCG_FLAGS,
 };
 
@@ -84,24 +82,6 @@ static inline void unlock_page_cgroup(struct page_cgroup *pc)
 	bit_spin_unlock(PCG_LOCK, &pc->flags);
 }
 
-static inline void move_lock_page_cgroup(struct page_cgroup *pc,
-	unsigned long *flags)
-{
-	/*
-	 * We know updates to pc->flags of page cache's stats are from both of
-	 * usual context or IRQ context. Disable IRQ to avoid deadlock.
-	 */
-	local_irq_save(*flags);
-	bit_spin_lock(PCG_MOVE_LOCK, &pc->flags);
-}
-
-static inline void move_unlock_page_cgroup(struct page_cgroup *pc,
-	unsigned long *flags)
-{
-	bit_spin_unlock(PCG_MOVE_LOCK, &pc->flags);
-	local_irq_restore(*flags);
-}
-
 #else /* CONFIG_CGROUP_MEM_RES_CTLR */
 struct page_cgroup;
 
diff --git a/mm/memcontrol.c b/mm/memcontrol.c
index c9b1131..66e03ad 100644
--- a/mm/memcontrol.c
+++ b/mm/memcontrol.c
@@ -307,6 +307,61 @@ struct mem_cgroup {
 #endif
 };
 
+/* locks for account moving between memcg. */
+#define NR_MOVE_ACCOUNT_LOCKS	(NR_CPUS)
+static rwlock_t move_account_locks[NR_MOVE_ACCOUNT_LOCKS];
+#define move_account_hash(page)	((page_to_pfn(page) % NR_MOVE_ACCOUNT_LOCKS))
+
+static rwlock_t *move_account_rwlock(struct page *page)
+{
+	int hnum = page_to_pfn(page) % NR_MOVE_ACCOUNT_LOCKS;
+
+	return &move_account_locks[hnum];
+}
+
+/*
+ * Read move lock is taken at updating statistics.
+ */
+static void mem_cgroup_move_account_rlock(struct page *page)
+{
+	rwlock_t *lock = move_account_rwlock(page);
+
+	read_lock(lock);	
+}
+
+static void mem_cgroup_move_account_runlock(struct page *page)
+{
+	rwlock_t *lock = move_account_rwlock(page);
+
+	read_unlock(lock);
+}
+/*
+ * Write move lock is taken when moving task account information.
+ */
+static void mem_cgroup_move_account_wlock(struct page *page, unsigned long *flags)
+{
+	rwlock_t *lock = move_account_rwlock(page);
+
+	write_lock_irqsave(lock, *flags);
+}
+
+static void mem_cgroup_move_account_wunlock(struct page *page, unsigned long *flags)
+{
+	rwlock_t *lock = move_account_rwlock(page);
+
+	write_unlock_irqrestore(lock, *flags);
+}
+
+static int __init mem_cgroup_rwlock_init(void)
+{
+	int num;
+
+	for (num = 0; num < NR_MOVE_ACCOUNT_LOCKS; num++)
+		rwlock_init(&move_account_locks[num]);
+	return 0;
+}
+late_initcall(mem_cgroup_rwlock_init);
+
 /* Stuffs for move charges at task migration. */
 /*
  * Types of charges to be moved. "move_charge_at_immitgrate" is treated as a
@@ -1846,7 +1901,7 @@ bool __mem_cgroup_begin_update_page_stats(struct page *page, unsigned long *flag
 	if (!memcg  || !PageCgroupUsed(pc))
 		goto out;
 	if (unlikely(mem_cgroup_stealed(memcg)) || PageTransHuge(page)) {
-		move_lock_page_cgroup(pc, flags);
+		mem_cgroup_move_account_rlock(page);
 		need_unlock = true;
 	}
 out:
@@ -1861,7 +1916,7 @@ void __mem_cgroup_end_update_page_stats(struct page *page, bool locked,
 
 	if (unlikely(locked)) {
 		pc = lookup_page_cgroup(page);
-		move_unlock_page_cgroup(pc, flags);
+		mem_cgroup_move_account_runlock(page);
 	}
 	rcu_read_unlock();
 }
@@ -2414,8 +2469,7 @@ static void __mem_cgroup_commit_charge(struct mem_cgroup *memcg,
 
 #ifdef CONFIG_TRANSPARENT_HUGEPAGE
 
-#define PCGF_NOCOPY_AT_SPLIT ((1 << PCG_LOCK) | (1 << PCG_MOVE_LOCK) |\
-			(1 << PCG_MIGRATION))
+#define PCGF_NOCOPY_AT_SPLIT ((1 << PCG_LOCK) | (1 << PCG_MIGRATION))
 /*
  * Because tail pages are not marked as "used", set it. We're under
  * zone->lru_lock, 'splitting on pmd' and compound_lock.
@@ -2494,7 +2548,7 @@ static int mem_cgroup_move_account(struct page *page,
 	if (!PageCgroupUsed(pc) || pc->mem_cgroup != from)
 		goto unlock;
 
-	move_lock_page_cgroup(pc, &flags);
+	mem_cgroup_move_account_wlock(page, &flags);
 
 	if (page_mapped(page) && !PageAnon(page)) {
 		/* Update mapped_file data for mem_cgroup */
@@ -2518,7 +2572,7 @@ static int mem_cgroup_move_account(struct page *page,
 	 * guaranteed that "to" is never removed. So, we don't check rmdir
 	 * status here.
 	 */
-	move_unlock_page_cgroup(pc, &flags);
+	mem_cgroup_move_account_wunlock(page, &flags);
 	ret = 0;
 unlock:
 	unlock_page_cgroup(pc);
-- 
1.7.4.1


--
To unsubscribe, send a message with 'unsubscribe linux-mm' in
the body to majordomo@xxxxxxxxx.  For more info on Linux MM,
see: http://www.linux-mm.org/ .
Fight unfair telecom internet charges in Canada: sign http://stopthemeter.ca/
Don't email: <a href=mailto:"dont@xxxxxxxxx";> email@xxxxxxxxx </a>


[Index of Archives]     [Linux ARM Kernel]     [Linux ARM]     [Linux Omap]     [Fedora ARM]     [IETF Annouce]     [Bugtraq]     [Linux]     [Linux OMAP]     [Linux MIPS]     [ECOS]     [Asterisk Internet PBX]     [Linux API]