This is a note to let you know that I've just added the patch titled mm/mprotect: do not flush when not required architecturally to the 5.15-stable tree which can be found at: http://www.kernel.org/git/?p=linux/kernel/git/stable/stable-queue.git;a=summary The filename of the patch is: mm-mprotect-do-not-flush-when-not-required-architect.patch and it can be found in the queue-5.15 subdirectory. If you, or anyone else, feels it should not be added to the stable tree, please let <stable@xxxxxxxxxxxxxxx> know about it. commit 2031c117202f5d2e11b95194e0012d36553e6e78 Author: Nadav Amit <nadav.amit@xxxxxxxxx> Date: Mon May 9 18:20:50 2022 -0700 mm/mprotect: do not flush when not required architecturally [ Upstream commit c9fe66560bf2dc7d109754414e309888cb8c9ba9 ] Currently, using mprotect() to unprotect a memory region or uffd to unprotect a memory region causes a TLB flush. However, in such cases the PTE is often not modified (i.e., remain RO) and therefore not TLB flush is needed. Add an arch-specific pte_needs_flush() which tells whether a TLB flush is needed based on the old PTE and the new one. Implement an x86 pte_needs_flush(). Always flush the TLB when it is architecturally needed even when skipping a TLB flush might only result in a spurious page-faults by skipping the flush. Even with such conservative manner, we can in the future further refine the checks to test whether a PTE is present by only considering the architectural _PAGE_PRESENT flag instead of {pte|pmd}_preesnt(). For not be careful and use the latter. Link: https://lkml.kernel.org/r/20220401180821.1986781-3-namit@xxxxxxxxxx Signed-off-by: Nadav Amit <namit@xxxxxxxxxx> Cc: Andrea Arcangeli <aarcange@xxxxxxxxxx> Cc: Andy Lutomirski <luto@xxxxxxxxxx> Cc: Dave Hansen <dave.hansen@xxxxxxxxxxxxxxx> Cc: Peter Zijlstra <peterz@xxxxxxxxxxxxx> Cc: Thomas Gleixner <tglx@xxxxxxxxxxxxx> Cc: Will Deacon <will@xxxxxxxxxx> Cc: Yu Zhao <yuzhao@xxxxxxxxxx> Cc: Nick Piggin <npiggin@xxxxxxxxx> Cc: Andrew Cooper <andrew.cooper3@xxxxxxxxxx> Cc: Peter Xu <peterx@xxxxxxxxxx> Signed-off-by: Andrew Morton <akpm@xxxxxxxxxxxxxxxxxxxx> Stable-dep-of: 3a5a8d343e1c ("mm: fix race between __split_huge_pmd_locked() and GUP-fast") Signed-off-by: Sasha Levin <sashal@xxxxxxxxxx> diff --git a/arch/x86/include/asm/pgtable_types.h b/arch/x86/include/asm/pgtable_types.h index 28e59576c75be..de9e3c635618e 100644 --- a/arch/x86/include/asm/pgtable_types.h +++ b/arch/x86/include/asm/pgtable_types.h @@ -110,9 +110,11 @@ #if defined(CONFIG_X86_64) || defined(CONFIG_X86_PAE) #define _PAGE_NX (_AT(pteval_t, 1) << _PAGE_BIT_NX) #define _PAGE_DEVMAP (_AT(u64, 1) << _PAGE_BIT_DEVMAP) +#define _PAGE_SOFTW4 (_AT(pteval_t, 1) << _PAGE_BIT_SOFTW4) #else #define _PAGE_NX (_AT(pteval_t, 0)) #define _PAGE_DEVMAP (_AT(pteval_t, 0)) +#define _PAGE_SOFTW4 (_AT(pteval_t, 0)) #endif #define _PAGE_PROTNONE (_AT(pteval_t, 1) << _PAGE_BIT_PROTNONE) diff --git a/arch/x86/include/asm/tlbflush.h b/arch/x86/include/asm/tlbflush.h index b587a9ee9cb25..8be1ff9081728 100644 --- a/arch/x86/include/asm/tlbflush.h +++ b/arch/x86/include/asm/tlbflush.h @@ -259,6 +259,103 @@ static inline void arch_tlbbatch_add_mm(struct arch_tlbflush_unmap_batch *batch, extern void arch_tlbbatch_flush(struct arch_tlbflush_unmap_batch *batch); +static inline bool pte_flags_need_flush(unsigned long oldflags, + unsigned long newflags, + bool ignore_access) +{ + /* + * Flags that require a flush when cleared but not when they are set. + * Only include flags that would not trigger spurious page-faults. + * Non-present entries are not cached. Hardware would set the + * dirty/access bit if needed without a fault. + */ + const pteval_t flush_on_clear = _PAGE_DIRTY | _PAGE_PRESENT | + _PAGE_ACCESSED; + const pteval_t software_flags = _PAGE_SOFTW1 | _PAGE_SOFTW2 | + _PAGE_SOFTW3 | _PAGE_SOFTW4; + const pteval_t flush_on_change = _PAGE_RW | _PAGE_USER | _PAGE_PWT | + _PAGE_PCD | _PAGE_PSE | _PAGE_GLOBAL | _PAGE_PAT | + _PAGE_PAT_LARGE | _PAGE_PKEY_BIT0 | _PAGE_PKEY_BIT1 | + _PAGE_PKEY_BIT2 | _PAGE_PKEY_BIT3 | _PAGE_NX; + unsigned long diff = oldflags ^ newflags; + + BUILD_BUG_ON(flush_on_clear & software_flags); + BUILD_BUG_ON(flush_on_clear & flush_on_change); + BUILD_BUG_ON(flush_on_change & software_flags); + + /* Ignore software flags */ + diff &= ~software_flags; + + if (ignore_access) + diff &= ~_PAGE_ACCESSED; + + /* + * Did any of the 'flush_on_clear' flags was clleared set from between + * 'oldflags' and 'newflags'? + */ + if (diff & oldflags & flush_on_clear) + return true; + + /* Flush on modified flags. */ + if (diff & flush_on_change) + return true; + + /* Ensure there are no flags that were left behind */ + if (IS_ENABLED(CONFIG_DEBUG_VM) && + (diff & ~(flush_on_clear | software_flags | flush_on_change))) { + VM_WARN_ON_ONCE(1); + return true; + } + + return false; +} + +/* + * pte_needs_flush() checks whether permissions were demoted and require a + * flush. It should only be used for userspace PTEs. + */ +static inline bool pte_needs_flush(pte_t oldpte, pte_t newpte) +{ + /* !PRESENT -> * ; no need for flush */ + if (!(pte_flags(oldpte) & _PAGE_PRESENT)) + return false; + + /* PFN changed ; needs flush */ + if (pte_pfn(oldpte) != pte_pfn(newpte)) + return true; + + /* + * check PTE flags; ignore access-bit; see comment in + * ptep_clear_flush_young(). + */ + return pte_flags_need_flush(pte_flags(oldpte), pte_flags(newpte), + true); +} +#define pte_needs_flush pte_needs_flush + +/* + * huge_pmd_needs_flush() checks whether permissions were demoted and require a + * flush. It should only be used for userspace huge PMDs. + */ +static inline bool huge_pmd_needs_flush(pmd_t oldpmd, pmd_t newpmd) +{ + /* !PRESENT -> * ; no need for flush */ + if (!(pmd_flags(oldpmd) & _PAGE_PRESENT)) + return false; + + /* PFN changed ; needs flush */ + if (pmd_pfn(oldpmd) != pmd_pfn(newpmd)) + return true; + + /* + * check PMD flags; do not ignore access-bit; see + * pmdp_clear_flush_young(). + */ + return pte_flags_need_flush(pmd_flags(oldpmd), pmd_flags(newpmd), + false); +} +#define huge_pmd_needs_flush huge_pmd_needs_flush + #endif /* !MODULE */ #endif /* _ASM_X86_TLBFLUSH_H */ diff --git a/include/asm-generic/tlb.h b/include/asm-generic/tlb.h index c99710b3027a0..7afde1eff2398 100644 --- a/include/asm-generic/tlb.h +++ b/include/asm-generic/tlb.h @@ -662,6 +662,20 @@ static inline void tlb_flush_p4d_range(struct mmu_gather *tlb, } while (0) #endif +#ifndef pte_needs_flush +static inline bool pte_needs_flush(pte_t oldpte, pte_t newpte) +{ + return true; +} +#endif + +#ifndef huge_pmd_needs_flush +static inline bool huge_pmd_needs_flush(pmd_t oldpmd, pmd_t newpmd) +{ + return true; +} +#endif + #endif /* CONFIG_MMU */ #endif /* _ASM_GENERIC__TLB_H */ diff --git a/mm/huge_memory.c b/mm/huge_memory.c index 661dd29642ebc..8ab6316d85391 100644 --- a/mm/huge_memory.c +++ b/mm/huge_memory.c @@ -1726,7 +1726,7 @@ int change_huge_pmd(struct mmu_gather *tlb, struct vm_area_struct *vma, { struct mm_struct *mm = vma->vm_mm; spinlock_t *ptl; - pmd_t entry; + pmd_t oldpmd, entry; bool preserve_write; int ret; bool prot_numa = cp_flags & MM_CP_PROT_NUMA; @@ -1801,9 +1801,9 @@ int change_huge_pmd(struct mmu_gather *tlb, struct vm_area_struct *vma, * pmdp_invalidate() is required to make sure we don't miss * dirty/young flags set by hardware. */ - entry = pmdp_invalidate(vma, addr, pmd); + oldpmd = pmdp_invalidate(vma, addr, pmd); - entry = pmd_modify(entry, newprot); + entry = pmd_modify(oldpmd, newprot); if (preserve_write) entry = pmd_mk_savedwrite(entry); if (uffd_wp) { @@ -1820,7 +1820,8 @@ int change_huge_pmd(struct mmu_gather *tlb, struct vm_area_struct *vma, ret = HPAGE_PMD_NR; set_pmd_at(mm, addr, pmd, entry); - tlb_flush_pmd_range(tlb, addr, HPAGE_PMD_SIZE); + if (huge_pmd_needs_flush(oldpmd, entry)) + tlb_flush_pmd_range(tlb, addr, HPAGE_PMD_SIZE); BUG_ON(vma_is_anonymous(vma) && !preserve_write && pmd_write(entry)); unlock: diff --git a/mm/mprotect.c b/mm/mprotect.c index fe1196be9ca28..09c5c448b9e7c 100644 --- a/mm/mprotect.c +++ b/mm/mprotect.c @@ -141,7 +141,8 @@ static unsigned long change_pte_range(struct mmu_gather *tlb, ptent = pte_mkwrite(ptent); } ptep_modify_prot_commit(vma, addr, pte, oldpte, ptent); - tlb_flush_pte_range(tlb, addr, PAGE_SIZE); + if (pte_needs_flush(oldpte, ptent)) + tlb_flush_pte_range(tlb, addr, PAGE_SIZE); pages++; } else if (is_swap_pte(oldpte)) { swp_entry_t entry = pte_to_swp_entry(oldpte);