The patch titled Subject: mm: fix huge zero page accounting in smaps report has been added to the -mm tree. Its filename is mm-fix-huge-zero-page-accounting-in-smaps-report.patch This patch should soon appear at http://ozlabs.org/~akpm/mmots/broken-out/mm-fix-huge-zero-page-accounting-in-smaps-report.patch and later at http://ozlabs.org/~akpm/mmotm/broken-out/mm-fix-huge-zero-page-accounting-in-smaps-report.patch Before you just go and hit "reply", please: a) Consider who else should be cc'ed b) Prefer to cc a suitable mailing list as well c) Ideally: find the original patch on the mailing list and do a reply-to-all to that, adding suitable additional cc's *** Remember to use Documentation/SubmitChecklist when testing your code *** The -mm tree is included into linux-next and is updated there every 3-4 working days ------------------------------------------------------ From: "Kirill A. Shutemov" <kirill.shutemov@xxxxxxxxxxxxxxx> Subject: mm: fix huge zero page accounting in smaps report As a small zero page, huge zero page should not be accounted in smaps report as normal page. For small pages we rely on vm_normal_page() to filter out zero page, but vm_normal_page() is not designed to handle pmds. We only get here due hackish cast pmd to pte in smaps_pte_range() -- pte and pmd format is not necessary compatible on each and every architecture. Let's add separate codepath to handle pmds. follow_trans_huge_pmd() will detect huge zero page for us. We would need pmd_dirty() helper to do this properly. The patch adds it to THP-enabled architectures which don't yet have one. Signed-off-by: Kirill A. Shutemov <kirill.shutemov@xxxxxxxxxxxxxxx> Reported-by: Fengguang Wu <fengguang.wu@xxxxxxxxx> Tested-by: Fengwei Yin <yfw.kernel@xxxxxxxxx> Signed-off-by: Andrew Morton <akpm@xxxxxxxxxxxxxxxxxxxx> --- arch/arm64/include/asm/pgtable.h | 1 arch/powerpc/include/asm/pgtable-ppc64.h | 1 arch/sparc/include/asm/pgtable_64.h | 7 + arch/x86/include/asm/pgtable.h | 5 + fs/proc/task_mmu.c | 101 +++++++++++++-------- 5 files changed, 79 insertions(+), 36 deletions(-) diff -puN arch/arm64/include/asm/pgtable.h~mm-fix-huge-zero-page-accounting-in-smaps-report arch/arm64/include/asm/pgtable.h --- a/arch/arm64/include/asm/pgtable.h~mm-fix-huge-zero-page-accounting-in-smaps-report +++ a/arch/arm64/include/asm/pgtable.h @@ -279,6 +279,7 @@ void pmdp_splitting_flush(struct vm_area #endif /* CONFIG_HAVE_RCU_TABLE_FREE */ #endif /* CONFIG_TRANSPARENT_HUGEPAGE */ +#define pmd_dirty(pmd) pte_dirty(pmd_pte(pmd)) #define pmd_young(pmd) pte_young(pmd_pte(pmd)) #define pmd_wrprotect(pmd) pte_pmd(pte_wrprotect(pmd_pte(pmd))) #define pmd_mksplitting(pmd) pte_pmd(pte_mkspecial(pmd_pte(pmd))) diff -puN arch/powerpc/include/asm/pgtable-ppc64.h~mm-fix-huge-zero-page-accounting-in-smaps-report arch/powerpc/include/asm/pgtable-ppc64.h --- a/arch/powerpc/include/asm/pgtable-ppc64.h~mm-fix-huge-zero-page-accounting-in-smaps-report +++ a/arch/powerpc/include/asm/pgtable-ppc64.h @@ -467,6 +467,7 @@ static inline pte_t *pmdp_ptep(pmd_t *pm } #define pmd_pfn(pmd) pte_pfn(pmd_pte(pmd)) +#define pmd_dirty(pmd) pte_dirty(pmd_pte(pmd)) #define pmd_young(pmd) pte_young(pmd_pte(pmd)) #define pmd_mkold(pmd) pte_pmd(pte_mkold(pmd_pte(pmd))) #define pmd_wrprotect(pmd) pte_pmd(pte_wrprotect(pmd_pte(pmd))) diff -puN arch/sparc/include/asm/pgtable_64.h~mm-fix-huge-zero-page-accounting-in-smaps-report arch/sparc/include/asm/pgtable_64.h --- a/arch/sparc/include/asm/pgtable_64.h~mm-fix-huge-zero-page-accounting-in-smaps-report +++ a/arch/sparc/include/asm/pgtable_64.h @@ -667,6 +667,13 @@ static inline unsigned long pmd_pfn(pmd_ } #ifdef CONFIG_TRANSPARENT_HUGEPAGE +static inline pmd_t pmd_dirty(pmd_t pmd) +{ + pte_t pte = __pte(pmd_val(pmd)); + + return pte_dirty(pte); +} + static inline unsigned long pmd_young(pmd_t pmd) { pte_t pte = __pte(pmd_val(pmd)); diff -puN arch/x86/include/asm/pgtable.h~mm-fix-huge-zero-page-accounting-in-smaps-report arch/x86/include/asm/pgtable.h --- a/arch/x86/include/asm/pgtable.h~mm-fix-huge-zero-page-accounting-in-smaps-report +++ a/arch/x86/include/asm/pgtable.h @@ -99,6 +99,11 @@ static inline int pte_young(pte_t pte) return pte_flags(pte) & _PAGE_ACCESSED; } +static inline int pmd_dirty(pmd_t pmd) +{ + return pmd_flags(pmd) & _PAGE_DIRTY; +} + static inline int pmd_young(pmd_t pmd) { return pmd_flags(pmd) & _PAGE_ACCESSED; diff -puN fs/proc/task_mmu.c~mm-fix-huge-zero-page-accounting-in-smaps-report fs/proc/task_mmu.c --- a/fs/proc/task_mmu.c~mm-fix-huge-zero-page-accounting-in-smaps-report +++ a/fs/proc/task_mmu.c @@ -447,58 +447,88 @@ struct mem_size_stats { u64 pss; }; +static void smaps_account(struct mem_size_stats *mss, struct page *page, + unsigned long size, bool young, bool dirty) +{ + int mapcount; -static void smaps_pte_entry(pte_t ptent, unsigned long addr, - unsigned long ptent_size, struct mm_walk *walk) + if (PageAnon(page)) + mss->anonymous += size; + + mss->resident += size; + /* Accumulate the size in pages that have been accessed. */ + if (young || PageReferenced(page)) + mss->referenced += size; + mapcount = page_mapcount(page); + if (mapcount >= 2) { + if (dirty || PageDirty(page)) + mss->shared_dirty += size; + else + mss->shared_clean += size; + mss->pss += (size << PSS_SHIFT) / mapcount; + } else { + if (dirty || PageDirty(page)) + mss->private_dirty += size; + else + mss->private_clean += size; + mss->pss += (size << PSS_SHIFT); + } +} + + +static void smaps_pte_entry(pte_t *pte, unsigned long addr, + struct mm_walk *walk) { struct mem_size_stats *mss = walk->private; struct vm_area_struct *vma = mss->vma; pgoff_t pgoff = linear_page_index(vma, addr); struct page *page = NULL; - int mapcount; - if (pte_present(ptent)) { - page = vm_normal_page(vma, addr, ptent); - } else if (is_swap_pte(ptent)) { - swp_entry_t swpent = pte_to_swp_entry(ptent); + if (pte_present(*pte)) { + page = vm_normal_page(vma, addr, *pte); + } else if (is_swap_pte(*pte)) { + swp_entry_t swpent = pte_to_swp_entry(*pte); if (!non_swap_entry(swpent)) - mss->swap += ptent_size; + mss->swap += PAGE_SIZE; else if (is_migration_entry(swpent)) page = migration_entry_to_page(swpent); - } else if (pte_file(ptent)) { - if (pte_to_pgoff(ptent) != pgoff) - mss->nonlinear += ptent_size; + } else if (pte_file(*pte)) { + if (pte_to_pgoff(*pte) != pgoff) + mss->nonlinear += PAGE_SIZE; } if (!page) return; - if (PageAnon(page)) - mss->anonymous += ptent_size; - if (page->index != pgoff) - mss->nonlinear += ptent_size; + mss->nonlinear += PAGE_SIZE; - mss->resident += ptent_size; - /* Accumulate the size in pages that have been accessed. */ - if (pte_young(ptent) || PageReferenced(page)) - mss->referenced += ptent_size; - mapcount = page_mapcount(page); - if (mapcount >= 2) { - if (pte_dirty(ptent) || PageDirty(page)) - mss->shared_dirty += ptent_size; - else - mss->shared_clean += ptent_size; - mss->pss += (ptent_size << PSS_SHIFT) / mapcount; - } else { - if (pte_dirty(ptent) || PageDirty(page)) - mss->private_dirty += ptent_size; - else - mss->private_clean += ptent_size; - mss->pss += (ptent_size << PSS_SHIFT); - } + smaps_account(mss, page, PAGE_SIZE, pte_young(*pte), pte_dirty(*pte)); +} + +#ifdef CONFIG_TRANSPARENT_HUGEPAGE +static void smaps_pmd_entry(pmd_t *pmd, unsigned long addr, + struct mm_walk *walk) +{ + struct mem_size_stats *mss = walk->private; + struct vm_area_struct *vma = mss->vma; + struct page *page; + + /* FOLL_DUMP will return -EFAULT on huge zero page */ + page = follow_trans_huge_pmd(vma, addr, pmd, FOLL_DUMP); + if (IS_ERR_OR_NULL(page)) + return; + mss->anonymous_thp += HPAGE_PMD_SIZE; + smaps_account(mss, page, HPAGE_PMD_SIZE, + pmd_young(*pmd), pmd_dirty(*pmd)); +} +#else +static void smaps_pmd_entry(pmd_t *pmd, unsigned long addr, + struct mm_walk *walk) +{ } +#endif static int smaps_pte_range(pmd_t *pmd, unsigned long addr, unsigned long end, struct mm_walk *walk) @@ -509,9 +539,8 @@ static int smaps_pte_range(pmd_t *pmd, u spinlock_t *ptl; if (pmd_trans_huge_lock(pmd, vma, &ptl) == 1) { - smaps_pte_entry(*(pte_t *)pmd, addr, HPAGE_PMD_SIZE, walk); + smaps_pmd_entry(pmd, addr, walk); spin_unlock(ptl); - mss->anonymous_thp += HPAGE_PMD_SIZE; return 0; } @@ -524,7 +553,7 @@ static int smaps_pte_range(pmd_t *pmd, u */ pte = pte_offset_map_lock(vma->vm_mm, pmd, addr, &ptl); for (; addr != end; pte++, addr += PAGE_SIZE) - smaps_pte_entry(*pte, addr, PAGE_SIZE, walk); + smaps_pte_entry(pte, addr, walk); pte_unmap_unlock(pte - 1, ptl); cond_resched(); return 0; _ Patches currently in -mm which might be from kirill.shutemov@xxxxxxxxxxxxxxx are origin.patch mm-verify-compound-order-when-freeing-a-page.patch thp-do-not-mark-zero-page-pmd-write-protected-explicitly.patch mremap-take-anon_vma-lock-in-shared-mode.patch mm-fix-huge-zero-page-accounting-in-smaps-report.patch mm-introduce-do_shared_fault-and-drop-do_fault-fix-fix.patch do_shared_fault-check-that-mmap_sem-is-held.patch mm-replace-remap_file_pages-syscall-with-emulation.patch -- To unsubscribe from this list: send the line "unsubscribe mm-commits" in the body of a message to majordomo@xxxxxxxxxxxxxxx More majordomo info at http://vger.kernel.org/majordomo-info.html