Similarly to the previous patch, this tries to optimise dirty/accessed bits in ptes to avoid access costs of hardware setting them. This tidies up a few last cases where dirty/accessed faults can be seen, and subsumes the pte_sw_mkyoung helper -- it's not just architectures with explicit software dirty/accessed bits that take expensive faults to modify ptes. The vast majority of the remaining dirty/accessed faults on kbuild workloads after this patch are from NUMA migration, due to remove_migration_pte inserting old/clean ptes. Signed-off-by: Nicholas Piggin <npiggin@xxxxxxxxx> --- arch/mips/include/asm/pgtable.h | 2 -- include/linux/pgtable.h | 16 ---------------- mm/huge_memory.c | 4 ++-- mm/memory.c | 14 +++++++------- mm/migrate.c | 1 + mm/shmem.c | 1 + mm/userfaultfd.c | 2 +- 7 files changed, 12 insertions(+), 28 deletions(-) diff --git a/arch/mips/include/asm/pgtable.h b/arch/mips/include/asm/pgtable.h index 4f9c37616d42..3275495adccb 100644 --- a/arch/mips/include/asm/pgtable.h +++ b/arch/mips/include/asm/pgtable.h @@ -406,8 +406,6 @@ static inline pte_t pte_mkyoung(pte_t pte) return pte; } -#define pte_sw_mkyoung pte_mkyoung - #ifdef CONFIG_MIPS_HUGE_TLB_SUPPORT static inline int pte_huge(pte_t pte) { return pte_val(pte) & _PAGE_HUGE; } diff --git a/include/linux/pgtable.h b/include/linux/pgtable.h index 8fcdfa52eb4b..70d04931dff4 100644 --- a/include/linux/pgtable.h +++ b/include/linux/pgtable.h @@ -424,22 +424,6 @@ static inline void ptep_set_wrprotect(struct mm_struct *mm, unsigned long addres } #endif -/* - * On some architectures hardware does not set page access bit when accessing - * memory page, it is responsibilty of software setting this bit. It brings - * out extra page fault penalty to track page access bit. For optimization page - * access bit can be set during all page fault flow on these arches. - * To be differentiate with macro pte_mkyoung, this macro is used on platforms - * where software maintains page access bit. - */ -#ifndef pte_sw_mkyoung -static inline pte_t pte_sw_mkyoung(pte_t pte) -{ - return pte; -} -#define pte_sw_mkyoung pte_sw_mkyoung -#endif - #ifndef pte_savedwrite #define pte_savedwrite pte_write #endif diff --git a/mm/huge_memory.c b/mm/huge_memory.c index f2ca0326b5af..f6719312dc27 100644 --- a/mm/huge_memory.c +++ b/mm/huge_memory.c @@ -2151,8 +2151,8 @@ static void __split_huge_pmd_locked(struct vm_area_struct *vma, pmd_t *pmd, entry = maybe_mkwrite(entry, vma); if (!write) entry = pte_wrprotect(entry); - if (!young) - entry = pte_mkold(entry); + if (young) + entry = pte_mkyoung(entry); if (soft_dirty) entry = pte_mksoft_dirty(entry); if (uffd_wp) diff --git a/mm/memory.c b/mm/memory.c index dd1f364d8ca3..4cebba596660 100644 --- a/mm/memory.c +++ b/mm/memory.c @@ -1639,7 +1639,7 @@ static int insert_page_into_pte_locked(struct mm_struct *mm, pte_t *pte, get_page(page); inc_mm_counter_fast(mm, mm_counter_file(page)); page_add_file_rmap(page, false); - set_pte_at(mm, addr, pte, mk_pte(page, prot)); + set_pte_at(mm, addr, pte, pte_mkyoung(mk_pte(page, prot))); return 0; } @@ -1954,10 +1954,9 @@ static vm_fault_t insert_pfn(struct vm_area_struct *vma, unsigned long addr, else entry = pte_mkspecial(pfn_t_pte(pfn, prot)); - if (mkwrite) { - entry = pte_mkyoung(entry); + entry = pte_mkyoung(entry); + if (mkwrite) entry = maybe_mkwrite(pte_mkdirty(entry), vma); - } set_pte_at(mm, addr, pte, entry); update_mmu_cache(vma, addr, pte); /* XXX: why not for insert_page? */ @@ -2889,7 +2888,7 @@ static vm_fault_t wp_page_copy(struct vm_fault *vmf) } flush_cache_page(vma, vmf->address, pte_pfn(vmf->orig_pte)); entry = mk_pte(new_page, vma->vm_page_prot); - entry = pte_sw_mkyoung(entry); + entry = pte_mkyoung(entry); entry = maybe_mkwrite(pte_mkdirty(entry), vma); /* * Clear the pte entry and flush it first, before updating the @@ -3402,6 +3401,7 @@ vm_fault_t do_swap_page(struct vm_fault *vmf) inc_mm_counter_fast(vma->vm_mm, MM_ANONPAGES); dec_mm_counter_fast(vma->vm_mm, MM_SWAPENTS); pte = mk_pte(page, vma->vm_page_prot); + pte = pte_mkyoung(pte); if ((vmf->flags & FAULT_FLAG_WRITE) && reuse_swap_page(page, NULL)) { pte = maybe_mkwrite(pte_mkdirty(pte), vma); vmf->flags &= ~FAULT_FLAG_WRITE; @@ -3545,7 +3545,7 @@ static vm_fault_t do_anonymous_page(struct vm_fault *vmf) __SetPageUptodate(page); entry = mk_pte(page, vma->vm_page_prot); - entry = pte_sw_mkyoung(entry); + entry = pte_mkyoung(entry); if (vma->vm_flags & VM_WRITE) entry = pte_mkwrite(pte_mkdirty(entry)); @@ -3821,7 +3821,7 @@ vm_fault_t alloc_set_pte(struct vm_fault *vmf, struct page *page) flush_icache_page(vma, page); entry = mk_pte(page, vma->vm_page_prot); - entry = pte_sw_mkyoung(entry); + entry = pte_mkyoung(entry); if (write) entry = maybe_mkwrite(pte_mkdirty(entry), vma); /* copy-on-write page */ diff --git a/mm/migrate.c b/mm/migrate.c index ee5e612b4cd8..d33b2bfc846b 100644 --- a/mm/migrate.c +++ b/mm/migrate.c @@ -2963,6 +2963,7 @@ static void migrate_vma_insert_page(struct migrate_vma *migrate, } } else { entry = mk_pte(page, vma->vm_page_prot); + entry = pte_mkyoung(entry); if (vma->vm_flags & VM_WRITE) entry = pte_mkwrite(pte_mkdirty(entry)); } diff --git a/mm/shmem.c b/mm/shmem.c index 7c6b6d8f6c39..4f23b16d6baf 100644 --- a/mm/shmem.c +++ b/mm/shmem.c @@ -2420,6 +2420,7 @@ static int shmem_mfill_atomic_pte(struct mm_struct *dst_mm, goto out_release; _dst_pte = mk_pte(page, dst_vma->vm_page_prot); + _dst_pte = pte_mkyoung(_dst_pte); if (dst_vma->vm_flags & VM_WRITE) _dst_pte = pte_mkwrite(pte_mkdirty(_dst_pte)); else { diff --git a/mm/userfaultfd.c b/mm/userfaultfd.c index 9a3d451402d7..56c44aa06a7e 100644 --- a/mm/userfaultfd.c +++ b/mm/userfaultfd.c @@ -99,7 +99,7 @@ static int mcopy_atomic_pte(struct mm_struct *dst_mm, if (mem_cgroup_charge(page, dst_mm, GFP_KERNEL)) goto out_release; - _dst_pte = pte_mkdirty(mk_pte(page, dst_vma->vm_page_prot)); + _dst_pte = pte_mkdirty(pte_mkyoung(mk_pte(page, dst_vma->vm_page_prot))); if (dst_vma->vm_flags & VM_WRITE) { if (wp_copy) _dst_pte = pte_mkuffd_wp(_dst_pte); -- 2.23.0