The patch titled bugfix for memory controller: add helper function for assigning cgroup to page has been added to the -mm tree. Its filename is bugfix-for-memory-controller-add-helper-function-for-assigning-cgroup-to-page.patch *** Remember to use Documentation/SubmitChecklist when testing your code *** See http://www.zip.com.au/~akpm/linux/patches/stuff/added-to-mm.txt to find out what to do about this ------------------------------------------------------ Subject: bugfix for memory controller: add helper function for assigning cgroup to page From: KAMEZAWA Hiroyuki <kamezawa.hiroyu@xxxxxxxxxxxxxx> This patch adds following functions. - clear_page_cgroup(page, pc) - page_cgroup_assign_new_page_group(page, pc) Mainly for cleanup. A manner "check page->cgroup again after lock_page_cgroup()" is implemented in straight way. A comment in mem_cgroup_uncharge() will be removed by force-empty patch Signed-off-by: KAMEZAWA Hiroyuki <kamezawa.hiroyu@xxxxxxxxxxxxxx> Cc: Balbir Singh <balbir@xxxxxxxxxxxxxxxxxx> Cc: Pavel Emelianov <xemul@xxxxxxxxxx> Cc: Paul Menage <menage@xxxxxxxxxx> Cc: Peter Zijlstra <a.p.zijlstra@xxxxxxxxx> Cc: "Eric W. Biederman" <ebiederm@xxxxxxxxxxxx> Cc: Nick Piggin <nickpiggin@xxxxxxxxxxxx> Cc: Kirill Korotaev <dev@xxxxx> Cc: Herbert Poetzl <herbert@xxxxxxxxxxxx> Cc: David Rientjes <rientjes@xxxxxxxxxx> Cc: Vaidyanathan Srinivasan <svaidy@xxxxxxxxxxxxxxxxxx> Signed-off-by: Andrew Morton <akpm@xxxxxxxxxxxxxxxxxxxx> --- mm/memcontrol.c | 105 +++++++++++++++++++++++++++++++++------------- 1 file changed, 76 insertions(+), 29 deletions(-) diff -puN mm/memcontrol.c~bugfix-for-memory-controller-add-helper-function-for-assigning-cgroup-to-page mm/memcontrol.c --- a/mm/memcontrol.c~bugfix-for-memory-controller-add-helper-function-for-assigning-cgroup-to-page +++ a/mm/memcontrol.c @@ -162,6 +162,48 @@ static void __always_inline unlock_page_ bit_spin_unlock(PAGE_CGROUP_LOCK_BIT, &page->page_cgroup); } +/* + * Tie new page_cgroup to struct page under lock_page_cgroup() + * This can fail if the page has been tied to a page_cgroup. + * If success, returns 0. + */ +static inline int +page_cgroup_assign_new_page_cgroup(struct page *page, struct page_cgroup *pc) +{ + int ret = 0; + + lock_page_cgroup(page); + if (!page_get_page_cgroup(page)) + page_assign_page_cgroup(page, pc); + else /* A page is tied to other pc. */ + ret = 1; + unlock_page_cgroup(page); + return ret; +} + +/* + * Clear page->page_cgroup member under lock_page_cgroup(). + * If given "pc" value is different from one page->page_cgroup, + * page->cgroup is not cleared. + * Returns a value of page->page_cgroup at lock taken. + * A can can detect failure of clearing by following + * clear_page_cgroup(page, pc) == pc + */ + +static inline struct page_cgroup * +clear_page_cgroup(struct page *page, struct page_cgroup *pc) +{ + struct page_cgroup *ret; + /* lock and clear */ + lock_page_cgroup(page); + ret = page_get_page_cgroup(page); + if (likely(ret == pc)) + page_assign_page_cgroup(page, NULL); + unlock_page_cgroup(page); + return ret; +} + + static void __mem_cgroup_move_lists(struct page_cgroup *pc, bool active) { if (active) @@ -270,7 +312,7 @@ int mem_cgroup_charge(struct page *page, gfp_t gfp_mask) { struct mem_cgroup *mem; - struct page_cgroup *pc, *race_pc; + struct page_cgroup *pc; unsigned long flags; unsigned long nr_retries = MEM_CGROUP_RECLAIM_RETRIES; @@ -293,8 +335,10 @@ retry: unlock_page_cgroup(page); cpu_relax(); goto retry; - } else + } else { + unlock_page_cgroup(page); goto done; + } } unlock_page_cgroup(page); @@ -364,31 +408,26 @@ noreclaim: goto free_pc; } - lock_page_cgroup(page); - /* - * Check if somebody else beat us to allocating the page_cgroup - */ - race_pc = page_get_page_cgroup(page); - if (race_pc) { - kfree(pc); - pc = race_pc; - atomic_inc(&pc->ref_cnt); - res_counter_uncharge(&mem->res, PAGE_SIZE); - css_put(&mem->css); - goto done; - } - atomic_set(&pc->ref_cnt, 1); pc->mem_cgroup = mem; pc->page = page; - page_assign_page_cgroup(page, pc); + if (page_cgroup_assign_new_page_cgroup(page, pc)) { + /* + * an another charge is added to this page already. + * we do take lock_page_cgroup(page) again and read + * page->cgroup, increment refcnt.... just retry is OK. + */ + res_counter_uncharge(&mem->res, PAGE_SIZE); + css_put(&mem->css); + kfree(pc); + goto retry; + } spin_lock_irqsave(&mem->lru_lock, flags); list_add(&pc->lru, &mem->active_list); spin_unlock_irqrestore(&mem->lru_lock, flags); done: - unlock_page_cgroup(page); return 0; free_pc: kfree(pc); @@ -432,17 +471,25 @@ void mem_cgroup_uncharge(struct page_cgr if (atomic_dec_and_test(&pc->ref_cnt)) { page = pc->page; - lock_page_cgroup(page); - mem = pc->mem_cgroup; - css_put(&mem->css); - page_assign_page_cgroup(page, NULL); - unlock_page_cgroup(page); - res_counter_uncharge(&mem->res, PAGE_SIZE); - - spin_lock_irqsave(&mem->lru_lock, flags); - list_del_init(&pc->lru); - spin_unlock_irqrestore(&mem->lru_lock, flags); - kfree(pc); + /* + * get page->cgroup and clear it under lock. + */ + if (clear_page_cgroup(page, pc) == pc) { + mem = pc->mem_cgroup; + css_put(&mem->css); + res_counter_uncharge(&mem->res, PAGE_SIZE); + spin_lock_irqsave(&mem->lru_lock, flags); + list_del_init(&pc->lru); + spin_unlock_irqrestore(&mem->lru_lock, flags); + kfree(pc); + } else { + /* + * Note:This will be removed when force-empty patch is + * applied. just show warning here. + */ + printk(KERN_ERR "Race in mem_cgroup_uncharge() ?"); + dump_stack(); + } } } _ Patches currently in -mm which might be from kamezawa.hiroyu@xxxxxxxxxxxxxx are origin.patch git-sh.patch task-containersv11-shared-container-subsystem-group-arrays-simplify-proc-cgroups.patch task-containersv11-shared-container-subsystem-group-arrays-simplify-proc-cgroups-fix.patch memory-controller-make-charging-gfp-mask-aware-fix.patch bugfix-for-memory-cgroup-controller-charge-refcnt-race-fix.patch bugfix-for-memory-cgroup-controller-fix-error-handling-path-in-mem_charge_cgroup.patch bugfix-for-memory-controller-add-helper-function-for-assigning-cgroup-to-page.patch bugfix-for-memory-cgroup-controller-avoid-pagelru-page-in-mem_cgroup_isolate_pages.patch bugfix-for-memory-cgroup-controller-migration-under-memory-controller-fix.patch cpu-hotplug-avoid-hotadd-when-proper-possible_map-isnt-specified.patch cpu-hotplug-avoid-hotadd-when-proper-possible_map-isnt-specified-checkpatch-fixes.patch add-irq-protection-in-the-percpu-counters-cpu-hotplug-callback-path.patch - To unsubscribe from this list: send the line "unsubscribe mm-commits" in the body of a message to majordomo@xxxxxxxxxxxxxxx More majordomo info at http://vger.kernel.org/majordomo-info.html