Hi Nick, On Sun, 20 Dec 2020, Nicholas Piggin wrote: > Similarly to the previous patch, this tries to optimise dirty/accessed > bits in ptes to avoid access costs of hardware setting them. > > This tidies up a few last cases where dirty/accessed faults can be seen, > and subsumes the pte_sw_mkyoung helper -- it's not just architectures > with explicit software dirty/accessed bits that take expensive faults to > modify ptes. > > The vast majority of the remaining dirty/accessed faults on kbuild > workloads after this patch are from NUMA migration, due to > remove_migration_pte inserting old/clean ptes. Are you sure about this patch? It looks wrong to me: because isn't _PAGE_ACCESSED (young) already included in the vm_page_prot __S001 etc? I haven't checked all instances below, but in general, that's why the existing code tends not to bother to mkyoung, but does sometimes mkold (admittedly confusing). A quick check on x86 and powerpc looks like they do have _PAGE_ACCESSED in there; and IIRC that's true, or ought to be true, of all architectures. Maybe not mips, which I see you have singled out: I remember going round this loop a few months ago with mips, where strange changes were being proposed to compensate for not having that bit in their vm_page_prot. I didn't follow through to see how that ended up, but I did suggest mips needed to follow the same convention as the other architectures. Hugh > > Signed-off-by: Nicholas Piggin <npiggin@xxxxxxxxx> > --- > arch/mips/include/asm/pgtable.h | 2 -- > include/linux/pgtable.h | 16 ---------------- > mm/huge_memory.c | 4 ++-- > mm/memory.c | 14 +++++++------- > mm/migrate.c | 1 + > mm/shmem.c | 1 + > mm/userfaultfd.c | 2 +- > 7 files changed, 12 insertions(+), 28 deletions(-) > > diff --git a/arch/mips/include/asm/pgtable.h b/arch/mips/include/asm/pgtable.h > index 4f9c37616d42..3275495adccb 100644 > --- a/arch/mips/include/asm/pgtable.h > +++ b/arch/mips/include/asm/pgtable.h > @@ -406,8 +406,6 @@ static inline pte_t pte_mkyoung(pte_t pte) > return pte; > } > > -#define pte_sw_mkyoung pte_mkyoung > - > #ifdef CONFIG_MIPS_HUGE_TLB_SUPPORT > static inline int pte_huge(pte_t pte) { return pte_val(pte) & _PAGE_HUGE; } > > diff --git a/include/linux/pgtable.h b/include/linux/pgtable.h > index 8fcdfa52eb4b..70d04931dff4 100644 > --- a/include/linux/pgtable.h > +++ b/include/linux/pgtable.h > @@ -424,22 +424,6 @@ static inline void ptep_set_wrprotect(struct mm_struct *mm, unsigned long addres > } > #endif > > -/* > - * On some architectures hardware does not set page access bit when accessing > - * memory page, it is responsibilty of software setting this bit. It brings > - * out extra page fault penalty to track page access bit. For optimization page > - * access bit can be set during all page fault flow on these arches. > - * To be differentiate with macro pte_mkyoung, this macro is used on platforms > - * where software maintains page access bit. > - */ > -#ifndef pte_sw_mkyoung > -static inline pte_t pte_sw_mkyoung(pte_t pte) > -{ > - return pte; > -} > -#define pte_sw_mkyoung pte_sw_mkyoung > -#endif > - > #ifndef pte_savedwrite > #define pte_savedwrite pte_write > #endif > diff --git a/mm/huge_memory.c b/mm/huge_memory.c > index f2ca0326b5af..f6719312dc27 100644 > --- a/mm/huge_memory.c > +++ b/mm/huge_memory.c > @@ -2151,8 +2151,8 @@ static void __split_huge_pmd_locked(struct vm_area_struct *vma, pmd_t *pmd, > entry = maybe_mkwrite(entry, vma); > if (!write) > entry = pte_wrprotect(entry); > - if (!young) > - entry = pte_mkold(entry); > + if (young) > + entry = pte_mkyoung(entry); > if (soft_dirty) > entry = pte_mksoft_dirty(entry); > if (uffd_wp) > diff --git a/mm/memory.c b/mm/memory.c > index dd1f364d8ca3..4cebba596660 100644 > --- a/mm/memory.c > +++ b/mm/memory.c > @@ -1639,7 +1639,7 @@ static int insert_page_into_pte_locked(struct mm_struct *mm, pte_t *pte, > get_page(page); > inc_mm_counter_fast(mm, mm_counter_file(page)); > page_add_file_rmap(page, false); > - set_pte_at(mm, addr, pte, mk_pte(page, prot)); > + set_pte_at(mm, addr, pte, pte_mkyoung(mk_pte(page, prot))); > return 0; > } > > @@ -1954,10 +1954,9 @@ static vm_fault_t insert_pfn(struct vm_area_struct *vma, unsigned long addr, > else > entry = pte_mkspecial(pfn_t_pte(pfn, prot)); > > - if (mkwrite) { > - entry = pte_mkyoung(entry); > + entry = pte_mkyoung(entry); > + if (mkwrite) > entry = maybe_mkwrite(pte_mkdirty(entry), vma); > - } > > set_pte_at(mm, addr, pte, entry); > update_mmu_cache(vma, addr, pte); /* XXX: why not for insert_page? */ > @@ -2889,7 +2888,7 @@ static vm_fault_t wp_page_copy(struct vm_fault *vmf) > } > flush_cache_page(vma, vmf->address, pte_pfn(vmf->orig_pte)); > entry = mk_pte(new_page, vma->vm_page_prot); > - entry = pte_sw_mkyoung(entry); > + entry = pte_mkyoung(entry); > entry = maybe_mkwrite(pte_mkdirty(entry), vma); > /* > * Clear the pte entry and flush it first, before updating the > @@ -3402,6 +3401,7 @@ vm_fault_t do_swap_page(struct vm_fault *vmf) > inc_mm_counter_fast(vma->vm_mm, MM_ANONPAGES); > dec_mm_counter_fast(vma->vm_mm, MM_SWAPENTS); > pte = mk_pte(page, vma->vm_page_prot); > + pte = pte_mkyoung(pte); > if ((vmf->flags & FAULT_FLAG_WRITE) && reuse_swap_page(page, NULL)) { > pte = maybe_mkwrite(pte_mkdirty(pte), vma); > vmf->flags &= ~FAULT_FLAG_WRITE; > @@ -3545,7 +3545,7 @@ static vm_fault_t do_anonymous_page(struct vm_fault *vmf) > __SetPageUptodate(page); > > entry = mk_pte(page, vma->vm_page_prot); > - entry = pte_sw_mkyoung(entry); > + entry = pte_mkyoung(entry); > if (vma->vm_flags & VM_WRITE) > entry = pte_mkwrite(pte_mkdirty(entry)); > > @@ -3821,7 +3821,7 @@ vm_fault_t alloc_set_pte(struct vm_fault *vmf, struct page *page) > > flush_icache_page(vma, page); > entry = mk_pte(page, vma->vm_page_prot); > - entry = pte_sw_mkyoung(entry); > + entry = pte_mkyoung(entry); > if (write) > entry = maybe_mkwrite(pte_mkdirty(entry), vma); > /* copy-on-write page */ > diff --git a/mm/migrate.c b/mm/migrate.c > index ee5e612b4cd8..d33b2bfc846b 100644 > --- a/mm/migrate.c > +++ b/mm/migrate.c > @@ -2963,6 +2963,7 @@ static void migrate_vma_insert_page(struct migrate_vma *migrate, > } > } else { > entry = mk_pte(page, vma->vm_page_prot); > + entry = pte_mkyoung(entry); > if (vma->vm_flags & VM_WRITE) > entry = pte_mkwrite(pte_mkdirty(entry)); > } > diff --git a/mm/shmem.c b/mm/shmem.c > index 7c6b6d8f6c39..4f23b16d6baf 100644 > --- a/mm/shmem.c > +++ b/mm/shmem.c > @@ -2420,6 +2420,7 @@ static int shmem_mfill_atomic_pte(struct mm_struct *dst_mm, > goto out_release; > > _dst_pte = mk_pte(page, dst_vma->vm_page_prot); > + _dst_pte = pte_mkyoung(_dst_pte); > if (dst_vma->vm_flags & VM_WRITE) > _dst_pte = pte_mkwrite(pte_mkdirty(_dst_pte)); > else { > diff --git a/mm/userfaultfd.c b/mm/userfaultfd.c > index 9a3d451402d7..56c44aa06a7e 100644 > --- a/mm/userfaultfd.c > +++ b/mm/userfaultfd.c > @@ -99,7 +99,7 @@ static int mcopy_atomic_pte(struct mm_struct *dst_mm, > if (mem_cgroup_charge(page, dst_mm, GFP_KERNEL)) > goto out_release; > > - _dst_pte = pte_mkdirty(mk_pte(page, dst_vma->vm_page_prot)); > + _dst_pte = pte_mkdirty(pte_mkyoung(mk_pte(page, dst_vma->vm_page_prot))); > if (dst_vma->vm_flags & VM_WRITE) { > if (wp_copy) > _dst_pte = pte_mkuffd_wp(_dst_pte); > -- > 2.23.0