On Wed 25-01-12 14:41:00, KAMEZAWA Hiroyuki wrote: [...] > Subject: [PATCH v5] memcg: remove PCG_CACHE > > We record 'the page is cache' by PCG_CACHE bit to page_cgroup. > Here, "CACHE" means anonymous user pages (and SwapCache). This > doesn't include shmem. > > Consdering callers, at charge/uncharge, the caller should know > what the page is and we don't need to record it by using 1bit > per page. > > This patch removes PCG_CACHE bit and make callers of > mem_cgroup_charge_statistics() to specify what the page is. > > About page migration: > Mapping of the used page is not touched during migration (see > page_remove_rmap) so we can rely on it and push the correct charge type > down to __mem_cgroup_uncharge_common from end_migration for unused page. > The force flag was misleading was abused for skipping the needless > page_mapped() / PageCgroupMigration() check, as we know the unused page > is no longer mapped and cleared the migration flag just a few lines > up. But doing the checks is no biggie and it's not worth adding another > flag just to skip them. > > Changelog since v4 > - fixed a bug at page migration by Michal Hokko. Would be more fair: - fixed a bug at page migration by Johannes Weiner > Changelog since v3 > - renamed a variable 'rss' to 'anon' > > Changelog since v2 > - removed 'not_rss', added 'anon' > - changed a meaning of arguments to mem_cgroup_charge_statisitcs() > - removed a patch to mem_cgroup_uncharge_cache > - simplified comment. > > Changelog since RFC. > - rebased onto memcg-devel > - rename 'file' to 'not_rss' > - some cleanup and added comment. > > Signed-off-by: KAMEZAWA Hiroyuki <kamezawa.hiroyu@xxxxxxxxxxxxxx> Anyway it looks good now (unless I missed something ;)). Acked-by: Michal Hocko <mhocko@xxxxxxx> > --- > include/linux/page_cgroup.h | 8 +----- > mm/memcontrol.c | 57 ++++++++++++++++++++++++------------------- > 2 files changed, 33 insertions(+), 32 deletions(-) > > diff --git a/include/linux/page_cgroup.h b/include/linux/page_cgroup.h > index a2d1177..1060292 100644 > --- a/include/linux/page_cgroup.h > +++ b/include/linux/page_cgroup.h > @@ -4,7 +4,6 @@ > enum { > /* flags for mem_cgroup */ > PCG_LOCK, /* Lock for pc->mem_cgroup and following bits. */ > - PCG_CACHE, /* charged as cache */ > PCG_USED, /* this object is in use. */ > PCG_MIGRATION, /* under page migration */ > /* flags for mem_cgroup and file and I/O status */ > @@ -64,11 +63,6 @@ static inline void ClearPageCgroup##uname(struct page_cgroup *pc) \ > static inline int TestClearPageCgroup##uname(struct page_cgroup *pc) \ > { return test_and_clear_bit(PCG_##lname, &pc->flags); } > > -/* Cache flag is set only once (at allocation) */ > -TESTPCGFLAG(Cache, CACHE) > -CLEARPCGFLAG(Cache, CACHE) > -SETPCGFLAG(Cache, CACHE) > - > TESTPCGFLAG(Used, USED) > CLEARPCGFLAG(Used, USED) > SETPCGFLAG(Used, USED) > @@ -85,7 +79,7 @@ static inline void lock_page_cgroup(struct page_cgroup *pc) > { > /* > * Don't take this lock in IRQ context. > - * This lock is for pc->mem_cgroup, USED, CACHE, MIGRATION > + * This lock is for pc->mem_cgroup, USED, MIGRATION > */ > bit_spin_lock(PCG_LOCK, &pc->flags); > } > diff --git a/mm/memcontrol.c b/mm/memcontrol.c > index 1c56c5f..45dab06 100644 > --- a/mm/memcontrol.c > +++ b/mm/memcontrol.c > @@ -670,15 +670,19 @@ static unsigned long mem_cgroup_read_events(struct mem_cgroup *memcg, > } > > static void mem_cgroup_charge_statistics(struct mem_cgroup *memcg, > - bool file, int nr_pages) > + bool anon, int nr_pages) > { > preempt_disable(); > > - if (file) > - __this_cpu_add(memcg->stat->count[MEM_CGROUP_STAT_CACHE], > + /* > + * Here, RSS means 'mapped anon' and anon's SwapCache. Shmem/tmpfs is > + * counted as CACHE even if it's on ANON LRU. > + */ > + if (anon) > + __this_cpu_add(memcg->stat->count[MEM_CGROUP_STAT_RSS], > nr_pages); > else > - __this_cpu_add(memcg->stat->count[MEM_CGROUP_STAT_RSS], > + __this_cpu_add(memcg->stat->count[MEM_CGROUP_STAT_CACHE], > nr_pages); > > /* pagein of a big page is an event. So, ignore page size */ > @@ -2405,6 +2409,8 @@ static void __mem_cgroup_commit_charge(struct mem_cgroup *memcg, > struct page_cgroup *pc, > enum charge_type ctype) > { > + bool anon; > + > lock_page_cgroup(pc); > if (unlikely(PageCgroupUsed(pc))) { > unlock_page_cgroup(pc); > @@ -2424,21 +2430,14 @@ static void __mem_cgroup_commit_charge(struct mem_cgroup *memcg, > * See mem_cgroup_add_lru_list(), etc. > */ > smp_wmb(); > - switch (ctype) { > - case MEM_CGROUP_CHARGE_TYPE_CACHE: > - case MEM_CGROUP_CHARGE_TYPE_SHMEM: > - SetPageCgroupCache(pc); > - SetPageCgroupUsed(pc); > - break; > - case MEM_CGROUP_CHARGE_TYPE_MAPPED: > - ClearPageCgroupCache(pc); > - SetPageCgroupUsed(pc); > - break; > - default: > - break; > - } > > - mem_cgroup_charge_statistics(memcg, PageCgroupCache(pc), nr_pages); > + SetPageCgroupUsed(pc); > + if (ctype == MEM_CGROUP_CHARGE_TYPE_MAPPED) > + anon = true; > + else > + anon = false; > + > + mem_cgroup_charge_statistics(memcg, anon, nr_pages); > unlock_page_cgroup(pc); > WARN_ON_ONCE(PageLRU(page)); > /* > @@ -2503,6 +2502,7 @@ static int mem_cgroup_move_account(struct page *page, > { > unsigned long flags; > int ret; > + bool anon = PageAnon(page); > > VM_BUG_ON(from == to); > VM_BUG_ON(PageLRU(page)); > @@ -2531,14 +2531,14 @@ static int mem_cgroup_move_account(struct page *page, > __this_cpu_inc(to->stat->count[MEM_CGROUP_STAT_FILE_MAPPED]); > preempt_enable(); > } > - mem_cgroup_charge_statistics(from, PageCgroupCache(pc), -nr_pages); > + mem_cgroup_charge_statistics(from, anon, -nr_pages); > if (uncharge) > /* This is not "cancel", but cancel_charge does all we need. */ > __mem_cgroup_cancel_charge(from, nr_pages); > > /* caller should have done css_get */ > pc->mem_cgroup = to; > - mem_cgroup_charge_statistics(to, PageCgroupCache(pc), nr_pages); > + mem_cgroup_charge_statistics(to, anon, nr_pages); > /* > * We charges against "to" which may not have any tasks. Then, "to" > * can be under rmdir(). But in current implementation, caller of > @@ -2884,6 +2884,7 @@ __mem_cgroup_uncharge_common(struct page *page, enum charge_type ctype) > struct mem_cgroup *memcg = NULL; > unsigned int nr_pages = 1; > struct page_cgroup *pc; > + bool anon; > > if (mem_cgroup_disabled()) > return NULL; > @@ -2915,6 +2916,7 @@ __mem_cgroup_uncharge_common(struct page *page, enum charge_type ctype) > /* See mem_cgroup_prepare_migration() */ > if (page_mapped(page) || PageCgroupMigration(pc)) > goto unlock_out; > + anon = true; > break; > case MEM_CGROUP_CHARGE_TYPE_SWAPOUT: > if (!PageAnon(page)) { /* Shared memory */ > @@ -2922,12 +2924,14 @@ __mem_cgroup_uncharge_common(struct page *page, enum charge_type ctype) > goto unlock_out; > } else if (page_mapped(page)) /* Anon */ > goto unlock_out; > + anon = true; > break; > default: > + anon = false; > break; > } > > - mem_cgroup_charge_statistics(memcg, PageCgroupCache(pc), -nr_pages); > + mem_cgroup_charge_statistics(memcg, anon, -nr_pages); > > ClearPageCgroupUsed(pc); > /* > @@ -3251,6 +3255,7 @@ void mem_cgroup_end_migration(struct mem_cgroup *memcg, > { > struct page *used, *unused; > struct page_cgroup *pc; > + bool anon; > > if (!memcg) > return; > @@ -3272,8 +3277,10 @@ void mem_cgroup_end_migration(struct mem_cgroup *memcg, > lock_page_cgroup(pc); > ClearPageCgroupMigration(pc); > unlock_page_cgroup(pc); > - > - __mem_cgroup_uncharge_common(unused, MEM_CGROUP_CHARGE_TYPE_FORCE); > + anon = PageAnon(used); > + __mem_cgroup_uncharge_common(unused, > + anon ? MEM_CGROUP_CHARGE_TYPE_MAPPED > + : MEM_CGROUP_CHARGE_TYPE_CACHE); > > /* > * If a page is a file cache, radix-tree replacement is very atomic > @@ -3283,7 +3290,7 @@ void mem_cgroup_end_migration(struct mem_cgroup *memcg, > * and USED bit check in mem_cgroup_uncharge_page() will do enough > * check. (see prepare_charge() also) > */ > - if (PageAnon(used)) > + if (anon) > mem_cgroup_uncharge_page(used); > /* > * At migration, we may charge account against cgroup which has no > @@ -3313,7 +3320,7 @@ void mem_cgroup_replace_page_cache(struct page *oldpage, > /* fix accounting on old pages */ > lock_page_cgroup(pc); > memcg = pc->mem_cgroup; > - mem_cgroup_charge_statistics(memcg, PageCgroupCache(pc), -1); > + mem_cgroup_charge_statistics(memcg, false, -1); > ClearPageCgroupUsed(pc); > unlock_page_cgroup(pc); > > -- > 1.7.4.1 > > > > -- > To unsubscribe from this list: send the line "unsubscribe linux-kernel" in > the body of a message to majordomo@xxxxxxxxxxxxxxx > More majordomo info at http://vger.kernel.org/majordomo-info.html > Please read the FAQ at http://www.tux.org/lkml/ -- Michal Hocko SUSE Labs SUSE LINUX s.r.o. Lihovarska 1060/12 190 00 Praha 9 Czech Republic -- To unsubscribe, send a message with 'unsubscribe linux-mm' in the body to majordomo@xxxxxxxxx. For more info on Linux MM, see: http://www.linux-mm.org/ . Fight unfair telecom internet charges in Canada: sign http://stopthemeter.ca/ Don't email: <a href=mailto:"dont@xxxxxxxxx"> email@xxxxxxxxx </a>