From: Nadav Amit <namit@xxxxxxxxxx> To detect deferred TLB flushes in fine granularity, we need to keep track on the completed TLB flush generation for each mm. Add logic to track for each mm the tlb_gen_completed, which tracks the completed TLB generation. It is the arch responsibility to call mark_mm_tlb_gen_done() whenever a TLB flush is completed. Start the generation numbers from 1 instead of 0. This would allow later to detect whether flushes of a certain generation were completed. Signed-off-by: Nadav Amit <namit@xxxxxxxxxx> Cc: Andrea Arcangeli <aarcange@xxxxxxxxxx> Cc: Andrew Morton <akpm@xxxxxxxxxxxxxxxxxxxx> Cc: Andy Lutomirski <luto@xxxxxxxxxx> Cc: Dave Hansen <dave.hansen@xxxxxxxxxxxxxxx> Cc: Peter Zijlstra <peterz@xxxxxxxxxxxxx> Cc: Thomas Gleixner <tglx@xxxxxxxxxxxxx> Cc: Will Deacon <will@xxxxxxxxxx> Cc: Yu Zhao <yuzhao@xxxxxxxxxx> Cc: Nick Piggin <npiggin@xxxxxxxxx> Cc: x86@xxxxxxxxxx --- arch/x86/mm/tlb.c | 10 ++++++++++ include/asm-generic/tlb.h | 33 +++++++++++++++++++++++++++++++++ include/linux/mm_types.h | 15 ++++++++++++++- 3 files changed, 57 insertions(+), 1 deletion(-) diff --git a/arch/x86/mm/tlb.c b/arch/x86/mm/tlb.c index 7ab21430be41..d17b5575531e 100644 --- a/arch/x86/mm/tlb.c +++ b/arch/x86/mm/tlb.c @@ -14,6 +14,7 @@ #include <asm/nospec-branch.h> #include <asm/cache.h> #include <asm/apic.h> +#include <asm/tlb.h> #include "mm_internal.h" @@ -915,6 +916,9 @@ void flush_tlb_mm_range(struct mm_struct *mm, unsigned long start, if (cpumask_any_but(mm_cpumask(mm), cpu) < nr_cpu_ids) flush_tlb_others(mm_cpumask(mm), info); + /* Update the completed generation */ + mark_mm_tlb_gen_done(mm, new_tlb_gen); + put_flush_tlb_info(); put_cpu(); } @@ -1147,6 +1151,12 @@ void arch_tlbbatch_flush(struct arch_tlbflush_unmap_batch *batch) cpumask_clear(&batch->cpumask); + /* + * We cannot call mark_mm_tlb_gen_done() since we do not know which + * mm's should be flushed. This may lead to some unwarranted TLB + * flushes, but not to correction problems. + */ + put_cpu(); } diff --git a/include/asm-generic/tlb.h b/include/asm-generic/tlb.h index 517c89398c83..427bfcc6cdec 100644 --- a/include/asm-generic/tlb.h +++ b/include/asm-generic/tlb.h @@ -513,6 +513,39 @@ static inline void tlb_end_vma(struct mmu_gather *tlb, struct vm_area_struct *vm } #endif +#ifdef CONFIG_ARCH_HAS_TLB_GENERATIONS + +/* + * Helper function to update a generation to have a new value, as long as new + * value is greater or equal to gen. + */ +static inline void tlb_update_generation(atomic64_t *gen, u64 new_gen) +{ + u64 cur_gen = atomic64_read(gen); + + while (cur_gen < new_gen) { + u64 old_gen = atomic64_cmpxchg(gen, cur_gen, new_gen); + + /* Check if we succeeded in the cmpxchg */ + if (likely(cur_gen == old_gen)) + break; + + cur_gen = old_gen; + }; +} + + +static inline void mark_mm_tlb_gen_done(struct mm_struct *mm, u64 gen) +{ + /* + * Update the completed generation to the new generation if the new + * generation is greater than the previous one. + */ + tlb_update_generation(&mm->tlb_gen_completed, gen); +} + +#endif /* CONFIG_ARCH_HAS_TLB_GENERATIONS */ + /* * tlb_flush_{pte|pmd|pud|p4d}_range() adjust the tlb->start and tlb->end, * and set corresponding cleared_*. diff --git a/include/linux/mm_types.h b/include/linux/mm_types.h index 2035ac319c2b..8a5eb4bfac59 100644 --- a/include/linux/mm_types.h +++ b/include/linux/mm_types.h @@ -571,6 +571,13 @@ struct mm_struct { * This is not used on Xen PV. */ atomic64_t tlb_gen; + + /* + * TLB generation which is guarnateed to be flushed, including + * all the PTE changes that were performed before tlb_gen was + * incremented. + */ + atomic64_t tlb_gen_completed; #endif } __randomize_layout; @@ -690,7 +697,13 @@ static inline bool mm_tlb_flush_nested(struct mm_struct *mm) #ifdef CONFIG_ARCH_HAS_TLB_GENERATIONS static inline void init_mm_tlb_gen(struct mm_struct *mm) { - atomic64_set(&mm->tlb_gen, 0); + /* + * Start from generation of 1, so default generation 0 will be + * considered as flushed and would not be regarded as an outstanding + * deferred invalidation. + */ + atomic64_set(&mm->tlb_gen, 1); + atomic64_set(&mm->tlb_gen_completed, 1); } static inline u64 inc_mm_tlb_gen(struct mm_struct *mm) -- 2.25.1