When the task that charged surplus hugepages moves memory cgroup, it updates the statistical information correctly. Signed-off-by: TSUKADA Koutaro <tsukada@xxxxxxxxxxxx> --- memcontrol.c | 99 +++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 99 insertions(+) diff --git a/mm/memcontrol.c b/mm/memcontrol.c index a8f1ff8..63f0922 100644 --- a/mm/memcontrol.c +++ b/mm/memcontrol.c @@ -4698,12 +4698,110 @@ static int mem_cgroup_count_precharge_pte_range(pmd_t *pmd, return 0; } +#ifdef CONFIG_HUGETLB_PAGE +static enum mc_target_type get_mctgt_type_hugetlb(struct vm_area_struct *vma, + unsigned long addr, pte_t *pte, union mc_target *target) +{ + struct page *page = NULL; + pte_t entry; + enum mc_target_type ret = MC_TARGET_NONE; + + if (!(mc.flags & MOVE_ANON)) + return ret; + + entry = huge_ptep_get(pte); + if (!pte_present(entry)) + return ret; + + page = pte_page(entry); + VM_BUG_ON_PAGE(!page || !PageHead(page), page); + if (likely(!PageSurplusCharge(page))) + return ret; + if (page->mem_cgroup == mc.from) { + ret = MC_TARGET_PAGE; + if (target) { + get_page(page); + target->page = page; + } + } + + return ret; +} + +static int hugetlb_count_precharge_pte_range(pte_t *pte, unsigned long hmask, + unsigned long addr, unsigned long end, + struct mm_walk *walk) +{ + struct vm_area_struct *vma = walk->vma; + struct mm_struct *mm = walk->mm; + spinlock_t *ptl; + union mc_target target; + + ptl = huge_pte_lock(hstate_vma(vma), mm, pte); + if (get_mctgt_type_hugetlb(vma, addr, pte, &target) == MC_TARGET_PAGE) { + mc.precharge += (1 << compound_order(target.page)); + put_page(target.page); + } + spin_unlock(ptl); + + return 0; +} + +static int hugetlb_move_charge_pte_range(pte_t *pte, unsigned long hmask, + unsigned long addr, unsigned long end, + struct mm_walk *walk) +{ + struct vm_area_struct *vma = walk->vma; + struct mm_struct *mm = walk->mm; + spinlock_t *ptl; + enum mc_target_type target_type; + union mc_target target; + struct page *page; + unsigned long nr_pages; + + ptl = huge_pte_lock(hstate_vma(vma), mm, pte); + target_type = get_mctgt_type_hugetlb(vma, addr, pte, &target); + if (target_type == MC_TARGET_PAGE) { + page = target.page; + nr_pages = (1 << compound_order(page)); + if (mc.precharge < nr_pages) { + put_page(page); + goto unlock; + } + if (!mem_cgroup_move_account(page, true, mc.from, mc.to)) { + mc.precharge -= nr_pages; + mc.moved_charge += nr_pages; + } + put_page(page); + } +unlock: + spin_unlock(ptl); + + return 0; +} +#else +static int hugetlb_count_precharge_pte_range(pte_t *pte, unsigned long hmask, + unsigned long addr, unsigned long end, + struct mm_walk *walk) +{ + return 0; +} + +static int hugetlb_move_charge_pte_range(pte_t *pte, unsigned long hmask, + unsigned long addr, unsigned long end, + struct mm_walk *walk) +{ + return 0; +} +#endif + static unsigned long mem_cgroup_count_precharge(struct mm_struct *mm) { unsigned long precharge; struct mm_walk mem_cgroup_count_precharge_walk = { .pmd_entry = mem_cgroup_count_precharge_pte_range, + .hugetlb_entry = hugetlb_count_precharge_pte_range, .mm = mm, }; down_read(&mm->mmap_sem); @@ -4981,6 +5079,7 @@ static void mem_cgroup_move_charge(void) { struct mm_walk mem_cgroup_move_charge_walk = { .pmd_entry = mem_cgroup_move_charge_pte_range, + .hugetlb_entry = hugetlb_move_charge_pte_range, .mm = mc.mm, }; -- Tsukada