From: "Kirill A. Shutemov" <kirill.shutemov@xxxxxxxxxxxxxxx> Subject: mm: use updated pmdp_invalidate() interface to track dirty/accessed bits Use the modifed pmdp_invalidate() that returns the previous value of pmd to transfer dirty and accessed bits. Link: http://lkml.kernel.org/r/20171213105756.69879-12-kirill.shutemov@xxxxxxxxxxxxxxx Signed-off-by: Kirill A. Shutemov <kirill.shutemov@xxxxxxxxxxxxxxx> Cc: Andrea Arcangeli <aarcange@xxxxxxxxxx> Cc: Aneesh Kumar K.V <aneesh.kumar@xxxxxxxxxxxxxxxxxx> Cc: Catalin Marinas <catalin.marinas@xxxxxxx> Cc: David Daney <david.daney@xxxxxxxxxx> Cc: David Miller <davem@xxxxxxxxxxxxx> Cc: H. Peter Anvin <hpa@xxxxxxxxx> Cc: Hugh Dickins <hughd@xxxxxxxxxx> Cc: Ingo Molnar <mingo@xxxxxxxxxx> Cc: Martin Schwidefsky <schwidefsky@xxxxxxxxxx> Cc: Michal Hocko <mhocko@xxxxxxxxxx> Cc: Nitin Gupta <nitin.m.gupta@xxxxxxxxxx> Cc: Ralf Baechle <ralf@xxxxxxxxxxxxxx> Cc: Thomas Gleixner <tglx@xxxxxxxxxxxxx> Cc: Vineet Gupta <vgupta@xxxxxxxxxxxx> Cc: Vlastimil Babka <vbabka@xxxxxxx> Signed-off-by: Andrew Morton <akpm@xxxxxxxxxxxxxxxxxxxx> --- fs/proc/task_mmu.c | 8 ++++---- mm/huge_memory.c | 29 ++++++++++++----------------- 2 files changed, 16 insertions(+), 21 deletions(-) diff -puN fs/proc/task_mmu.c~mm-use-updated-pmdp_invalidate-interface-to-track-dirty-accessed-bits fs/proc/task_mmu.c --- a/fs/proc/task_mmu.c~mm-use-updated-pmdp_invalidate-interface-to-track-dirty-accessed-bits +++ a/fs/proc/task_mmu.c @@ -982,14 +982,14 @@ static inline void clear_soft_dirty(stru static inline void clear_soft_dirty_pmd(struct vm_area_struct *vma, unsigned long addr, pmd_t *pmdp) { - pmd_t pmd = *pmdp; + pmd_t old, pmd = *pmdp; if (pmd_present(pmd)) { /* See comment in change_huge_pmd() */ - pmdp_invalidate(vma, addr, pmdp); - if (pmd_dirty(*pmdp)) + old = pmdp_invalidate(vma, addr, pmdp); + if (pmd_dirty(old)) pmd = pmd_mkdirty(pmd); - if (pmd_young(*pmdp)) + if (pmd_young(old)) pmd = pmd_mkyoung(pmd); pmd = pmd_wrprotect(pmd); diff -puN mm/huge_memory.c~mm-use-updated-pmdp_invalidate-interface-to-track-dirty-accessed-bits mm/huge_memory.c --- a/mm/huge_memory.c~mm-use-updated-pmdp_invalidate-interface-to-track-dirty-accessed-bits +++ a/mm/huge_memory.c @@ -1910,17 +1910,7 @@ int change_huge_pmd(struct vm_area_struc * pmdp_invalidate() is required to make sure we don't miss * dirty/young flags set by hardware. */ - entry = *pmd; - pmdp_invalidate(vma, addr, pmd); - - /* - * Recover dirty/young flags. It relies on pmdp_invalidate to not - * corrupt them. - */ - if (pmd_dirty(*pmd)) - entry = pmd_mkdirty(entry); - if (pmd_young(*pmd)) - entry = pmd_mkyoung(entry); + entry = pmdp_invalidate(vma, addr, pmd); entry = pmd_modify(entry, newprot); if (preserve_write) @@ -2073,8 +2063,8 @@ static void __split_huge_pmd_locked(stru struct mm_struct *mm = vma->vm_mm; struct page *page; pgtable_t pgtable; - pmd_t _pmd; - bool young, write, dirty, soft_dirty, pmd_migration = false; + pmd_t old, _pmd; + bool young, write, soft_dirty, pmd_migration = false; unsigned long addr; int i; @@ -2130,7 +2120,6 @@ static void __split_huge_pmd_locked(stru page_ref_add(page, HPAGE_PMD_NR - 1); write = pmd_write(*pmd); young = pmd_young(*pmd); - dirty = pmd_dirty(*pmd); soft_dirty = pmd_soft_dirty(*pmd); pmdp_huge_split_prepare(vma, haddr, pmd); @@ -2160,8 +2149,6 @@ static void __split_huge_pmd_locked(stru if (soft_dirty) entry = pte_mksoft_dirty(entry); } - if (dirty) - SetPageDirty(page + i); pte = pte_offset_map(&_pmd, addr); BUG_ON(!pte_none(*pte)); set_pte_at(mm, addr, pte, entry); @@ -2209,7 +2196,15 @@ static void __split_huge_pmd_locked(stru * for this pmd), then we flush the SMP TLB and finally we write the * non-huge version of the pmd entry with pmd_populate. */ - pmdp_invalidate(vma, haddr, pmd); + old = pmdp_invalidate(vma, haddr, pmd); + + /* + * Transfer dirty bit using value returned by pmd_invalidate() to be + * sure we don't race with CPU that can set the bit under us. + */ + if (pmd_dirty(old)) + SetPageDirty(page); + pmd_populate(mm, pmd, pgtable); if (freeze) { _ -- To unsubscribe from this list: send the line "unsubscribe mm-commits" in the body of a message to majordomo@xxxxxxxxxxxxxxx More majordomo info at http://vger.kernel.org/majordomo-info.html