From: Nadav Amit <namit@xxxxxxxxxx> Introduce relaxed TLB flushes in x86. When protection is removed from PTEs (i.e., PTEs become writeable or executable), relaxed TLB flushes would be used. Relaxed TLB flushes do flush the local TLB, but do not flush remote TLBs. If later a spurious page-fault is encountered, and the local TLB generation is found to be out of sync with the mm's TLB generation, a full TLB flush takes place to prevent further spurious page-faults from occurring. Cc: Andrea Arcangeli <aarcange@xxxxxxxxxx> Cc: Andrew Cooper <andrew.cooper3@xxxxxxxxxx> Cc: Andrew Morton <akpm@xxxxxxxxxxxxxxxxxxxx> Cc: Andy Lutomirski <luto@xxxxxxxxxx> Cc: Dave Hansen <dave.hansen@xxxxxxxxxxxxxxx> Cc: David Hildenbrand <david@xxxxxxxxxx> Cc: Peter Xu <peterx@xxxxxxxxxx> Cc: Peter Zijlstra <peterz@xxxxxxxxxxxxx> Cc: Thomas Gleixner <tglx@xxxxxxxxxxxxx> Cc: Will Deacon <will@xxxxxxxxxx> Cc: Yu Zhao <yuzhao@xxxxxxxxxx> Cc: Nick Piggin <npiggin@xxxxxxxxx> Signed-off-by: Nadav Amit <namit@xxxxxxxxxx> --- arch/x86/include/asm/tlb.h | 3 ++- arch/x86/include/asm/tlbflush.h | 9 +++++---- arch/x86/kernel/alternative.c | 2 +- arch/x86/kernel/ldt.c | 3 ++- arch/x86/mm/tlb.c | 4 ++-- 5 files changed, 12 insertions(+), 9 deletions(-) diff --git a/arch/x86/include/asm/tlb.h b/arch/x86/include/asm/tlb.h index 1bfe979bb9bc..51c85136f9a8 100644 --- a/arch/x86/include/asm/tlb.h +++ b/arch/x86/include/asm/tlb.h @@ -20,7 +20,8 @@ static inline void tlb_flush(struct mmu_gather *tlb) end = tlb->end; } - flush_tlb_mm_range(tlb->mm, start, end, stride_shift, tlb->freed_tables); + flush_tlb_mm_range(tlb->mm, start, end, stride_shift, tlb->freed_tables, + tlb->strict); } /* diff --git a/arch/x86/include/asm/tlbflush.h b/arch/x86/include/asm/tlbflush.h index 77d4810e5a5d..230cd1d24fe6 100644 --- a/arch/x86/include/asm/tlbflush.h +++ b/arch/x86/include/asm/tlbflush.h @@ -220,23 +220,24 @@ void flush_tlb_multi(const struct cpumask *cpumask, #endif #define flush_tlb_mm(mm) \ - flush_tlb_mm_range(mm, 0UL, TLB_FLUSH_ALL, 0UL, true) + flush_tlb_mm_range(mm, 0UL, TLB_FLUSH_ALL, 0UL, true, true) #define flush_tlb_range(vma, start, end) \ flush_tlb_mm_range((vma)->vm_mm, start, end, \ ((vma)->vm_flags & VM_HUGETLB) \ ? huge_page_shift(hstate_vma(vma)) \ - : PAGE_SHIFT, false) + : PAGE_SHIFT, false, true) extern void flush_tlb_all(void); extern void flush_tlb_mm_range(struct mm_struct *mm, unsigned long start, unsigned long end, unsigned int stride_shift, - bool freed_tables); + bool freed_tables, bool strict); extern void flush_tlb_kernel_range(unsigned long start, unsigned long end); static inline void flush_tlb_page(struct vm_area_struct *vma, unsigned long a) { - flush_tlb_mm_range(vma->vm_mm, a, a + PAGE_SIZE, PAGE_SHIFT, false); + flush_tlb_mm_range(vma->vm_mm, a, a + PAGE_SIZE, PAGE_SHIFT, false, + true); } static inline u64 inc_mm_tlb_gen(struct mm_struct *mm) diff --git a/arch/x86/kernel/alternative.c b/arch/x86/kernel/alternative.c index e257f6c80372..48945a47fd76 100644 --- a/arch/x86/kernel/alternative.c +++ b/arch/x86/kernel/alternative.c @@ -1099,7 +1099,7 @@ static void *__text_poke(text_poke_f func, void *addr, const void *src, size_t l */ flush_tlb_mm_range(poking_mm, poking_addr, poking_addr + (cross_page_boundary ? 2 : 1) * PAGE_SIZE, - PAGE_SHIFT, false); + PAGE_SHIFT, false, true); if (func == text_poke_memcpy) { /* diff --git a/arch/x86/kernel/ldt.c b/arch/x86/kernel/ldt.c index 525876e7b9f4..7c7bc97324bc 100644 --- a/arch/x86/kernel/ldt.c +++ b/arch/x86/kernel/ldt.c @@ -372,7 +372,8 @@ static void unmap_ldt_struct(struct mm_struct *mm, struct ldt_struct *ldt) } va = (unsigned long)ldt_slot_va(ldt->slot); - flush_tlb_mm_range(mm, va, va + nr_pages * PAGE_SIZE, PAGE_SHIFT, false); + flush_tlb_mm_range(mm, va, va + nr_pages * PAGE_SIZE, PAGE_SHIFT, false, + true); } #else /* !CONFIG_PAGE_TABLE_ISOLATION */ diff --git a/arch/x86/mm/tlb.c b/arch/x86/mm/tlb.c index ff3bcc55435e..ec5033d28a97 100644 --- a/arch/x86/mm/tlb.c +++ b/arch/x86/mm/tlb.c @@ -974,7 +974,7 @@ void flush_tlb_fix_spurious_fault(struct vm_area_struct *vma, void flush_tlb_mm_range(struct mm_struct *mm, unsigned long start, unsigned long end, unsigned int stride_shift, - bool freed_tables) + bool freed_tables, bool strict) { struct flush_tlb_info *info; u64 new_tlb_gen; @@ -1000,7 +1000,7 @@ void flush_tlb_mm_range(struct mm_struct *mm, unsigned long start, * a local TLB flush is needed. Optimize this use-case by calling * flush_tlb_func_local() directly in this case. */ - if (cpumask_any_but(mm_cpumask(mm), cpu) < nr_cpu_ids) { + if (strict && cpumask_any_but(mm_cpumask(mm), cpu) < nr_cpu_ids) { flush_tlb_multi(mm_cpumask(mm), info); } else if (mm == this_cpu_read(cpu_tlbstate.loaded_mm)) { lockdep_assert_irqs_enabled(); -- 2.25.1