The subject says "riscv:" but it changes core part and several arch. Maybe this commit should be split in two commits, one for API changes that changes flush_tlb_fix_spurious_fault() to flush_tlb_fix_spurious_write_fault() and adds flush_tlb_fix_spurious_read_fault() including the change in memory.c, then a second patch with the changes to riscv. Le 07/12/2023 à 16:03, Alexandre Ghiti a écrit : > The preventive sfence.vma were emitted because new mappings must be made > visible to the page table walker, either the uarch caches invalid > entries or not. > > Actually, there is no need to preventively sfence.vma on new mappings for > userspace, this should be handled only in the page fault path. > > This allows to drastically reduce the number of sfence.vma emitted: > > * Ubuntu boot to login: > Before: ~630k sfence.vma > After: ~200k sfence.vma > > * ltp - mmapstress01 > Before: ~45k > After: ~6.3k > > * lmbench - lat_pagefault > Before: ~665k > After: 832 (!) > > * lmbench - lat_mmap > Before: ~546k > After: 718 (!) > > The only issue with the removal of sfence.vma in update_mmu_cache() is > that on uarchs that cache invalid entries, those won't be invalidated > until the process takes a fault: so that's an additional fault in those > cases. > > Signed-off-by: Alexandre Ghiti <alexghiti@xxxxxxxxxxxx> > --- > arch/arm64/include/asm/pgtable.h | 2 +- > arch/mips/include/asm/pgtable.h | 6 +-- > arch/powerpc/include/asm/book3s/64/tlbflush.h | 8 ++-- > arch/riscv/include/asm/pgtable.h | 43 +++++++++++-------- > include/linux/pgtable.h | 8 +++- > mm/memory.c | 12 +++++- > 6 files changed, 48 insertions(+), 31 deletions(-) Did you forget mm/pgtable-generic.c ? > > diff --git a/arch/arm64/include/asm/pgtable.h b/arch/arm64/include/asm/pgtable.h > index 7f7d9b1df4e5..728f25f529a5 100644 > --- a/arch/arm64/include/asm/pgtable.h > +++ b/arch/arm64/include/asm/pgtable.h > @@ -57,7 +57,7 @@ static inline bool arch_thp_swp_supported(void) > * fault on one CPU which has been handled concurrently by another CPU > * does not need to perform additional invalidation. > */ > -#define flush_tlb_fix_spurious_fault(vma, address, ptep) do { } while (0) > +#define flush_tlb_fix_spurious_write_fault(vma, address, ptep) do { } while (0) Why do you need to do that change ? Nothing is explained about that in the commit message. > > /* > * ZERO_PAGE is a global shared page that is always zero: used > diff --git a/arch/mips/include/asm/pgtable.h b/arch/mips/include/asm/pgtable.h > index 430b208c0130..84439fe6ed29 100644 > --- a/arch/mips/include/asm/pgtable.h > +++ b/arch/mips/include/asm/pgtable.h > @@ -478,9 +478,9 @@ static inline pgprot_t pgprot_writecombine(pgprot_t _prot) > return __pgprot(prot); > } > > -static inline void flush_tlb_fix_spurious_fault(struct vm_area_struct *vma, > - unsigned long address, > - pte_t *ptep) > +static inline void flush_tlb_fix_spurious_write_fault(struct vm_area_struct *vma, > + unsigned long address, > + pte_t *ptep) > { > } > > diff --git a/arch/powerpc/include/asm/book3s/64/tlbflush.h b/arch/powerpc/include/asm/book3s/64/tlbflush.h > index 1950c1b825b4..7166d56f90db 100644 > --- a/arch/powerpc/include/asm/book3s/64/tlbflush.h > +++ b/arch/powerpc/include/asm/book3s/64/tlbflush.h > @@ -128,10 +128,10 @@ static inline void flush_tlb_page(struct vm_area_struct *vma, > #define flush_tlb_page(vma, addr) local_flush_tlb_page(vma, addr) > #endif /* CONFIG_SMP */ > > -#define flush_tlb_fix_spurious_fault flush_tlb_fix_spurious_fault > -static inline void flush_tlb_fix_spurious_fault(struct vm_area_struct *vma, > - unsigned long address, > - pte_t *ptep) > +#define flush_tlb_fix_spurious_write_fault flush_tlb_fix_spurious_write_fault > +static inline void flush_tlb_fix_spurious_write_fault(struct vm_area_struct *vma, > + unsigned long address, > + pte_t *ptep) > { > /* > * Book3S 64 does not require spurious fault flushes because the PTE > diff --git a/arch/riscv/include/asm/pgtable.h b/arch/riscv/include/asm/pgtable.h > index b2ba3f79cfe9..89aa5650f104 100644 > --- a/arch/riscv/include/asm/pgtable.h > +++ b/arch/riscv/include/asm/pgtable.h > @@ -472,28 +472,20 @@ static inline void update_mmu_cache_range(struct vm_fault *vmf, > struct vm_area_struct *vma, unsigned long address, > pte_t *ptep, unsigned int nr) > { > - /* > - * The kernel assumes that TLBs don't cache invalid entries, but > - * in RISC-V, SFENCE.VMA specifies an ordering constraint, not a > - * cache flush; it is necessary even after writing invalid entries. > - * Relying on flush_tlb_fix_spurious_fault would suffice, but > - * the extra traps reduce performance. So, eagerly SFENCE.VMA. > - */ > - while (nr--) > - local_flush_tlb_page(address + nr * PAGE_SIZE); > } > #define update_mmu_cache(vma, addr, ptep) \ > update_mmu_cache_range(NULL, vma, addr, ptep, 1) > > #define __HAVE_ARCH_UPDATE_MMU_TLB > -#define update_mmu_tlb update_mmu_cache > +static inline void update_mmu_tlb(struct vm_area_struct *vma, > + unsigned long address, pte_t *ptep) > +{ > + flush_tlb_range(vma, address, address + PAGE_SIZE); > +} > > static inline void update_mmu_cache_pmd(struct vm_area_struct *vma, > unsigned long address, pmd_t *pmdp) > { > - pte_t *ptep = (pte_t *)pmdp; > - > - update_mmu_cache(vma, address, ptep); > } > > #define __HAVE_ARCH_PTE_SAME > @@ -548,13 +540,26 @@ static inline int ptep_set_access_flags(struct vm_area_struct *vma, > unsigned long address, pte_t *ptep, > pte_t entry, int dirty) > { > - if (!pte_same(*ptep, entry)) > + if (!pte_same(*ptep, entry)) { > __set_pte_at(ptep, entry); > - /* > - * update_mmu_cache will unconditionally execute, handling both > - * the case that the PTE changed and the spurious fault case. > - */ > - return true; > + /* Here only not svadu is impacted */ > + flush_tlb_page(vma, address); > + return true; > + } > + > + return false; > +} > + > +extern u64 nr_sfence_vma_handle_exception; > +extern bool tlb_caching_invalid_entries; > + > +#define flush_tlb_fix_spurious_read_fault flush_tlb_fix_spurious_read_fault > +static inline void flush_tlb_fix_spurious_read_fault(struct vm_area_struct *vma, > + unsigned long address, > + pte_t *ptep) > +{ > + if (tlb_caching_invalid_entries) > + flush_tlb_page(vma, address); > } > > #define __HAVE_ARCH_PTEP_GET_AND_CLEAR > diff --git a/include/linux/pgtable.h b/include/linux/pgtable.h > index af7639c3b0a3..7abaf42ef612 100644 > --- a/include/linux/pgtable.h > +++ b/include/linux/pgtable.h > @@ -931,8 +931,12 @@ static inline void arch_swap_restore(swp_entry_t entry, struct folio *folio) > # define pte_accessible(mm, pte) ((void)(pte), 1) > #endif > > -#ifndef flush_tlb_fix_spurious_fault > -#define flush_tlb_fix_spurious_fault(vma, address, ptep) flush_tlb_page(vma, address) > +#ifndef flush_tlb_fix_spurious_write_fault > +#define flush_tlb_fix_spurious_write_fault(vma, address, ptep) flush_tlb_page(vma, address) > +#endif > + > +#ifndef flush_tlb_fix_spurious_read_fault > +#define flush_tlb_fix_spurious_read_fault(vma, address, ptep) > #endif > > /* > diff --git a/mm/memory.c b/mm/memory.c > index 517221f01303..5cb0ccf0c03f 100644 > --- a/mm/memory.c > +++ b/mm/memory.c > @@ -5014,8 +5014,16 @@ static vm_fault_t handle_pte_fault(struct vm_fault *vmf) > * with threads. > */ > if (vmf->flags & FAULT_FLAG_WRITE) > - flush_tlb_fix_spurious_fault(vmf->vma, vmf->address, > - vmf->pte); > + flush_tlb_fix_spurious_write_fault(vmf->vma, vmf->address, > + vmf->pte); > + else > + /* > + * With the pte_same(ptep_get(vmf->pte), entry) check > + * that calls update_mmu_tlb() above, multiple threads > + * faulting at the same time won't get there. > + */ > + flush_tlb_fix_spurious_read_fault(vmf->vma, vmf->address, > + vmf->pte); > } > unlock: > pte_unmap_unlock(vmf->pte, vmf->ptl);