TLB flushes can be skipped if TLB flushes requested have been done by any reason, which doesn't have to be done from migrations. It can be tracked by keeping timestamp(= migrc_gen) when it's requested and when it's triggered. Signed-off-by: Byungchul Park <byungchul@xxxxxx> --- arch/x86/include/asm/tlbflush.h | 6 ++++ arch/x86/mm/tlb.c | 55 +++++++++++++++++++++++++++++++++ mm/migrate.c | 10 ++++++ mm/rmap.c | 1 + 4 files changed, 72 insertions(+) diff --git a/arch/x86/include/asm/tlbflush.h b/arch/x86/include/asm/tlbflush.h index 752d72ea209b..da987c15049e 100644 --- a/arch/x86/include/asm/tlbflush.h +++ b/arch/x86/include/asm/tlbflush.h @@ -283,6 +283,12 @@ extern void arch_tlbbatch_clean(struct arch_tlbflush_unmap_batch *batch); extern void arch_tlbbatch_fold(struct arch_tlbflush_unmap_batch *bdst, struct arch_tlbflush_unmap_batch *bsrc); +#ifdef CONFIG_MIGRC +extern void arch_migrc_adj(struct arch_tlbflush_unmap_batch *batch, int gen); +#else +static inline void arch_migrc_adj(struct arch_tlbflush_unmap_batch *batch, int gen) {} +#endif + static inline bool pte_flags_need_flush(unsigned long oldflags, unsigned long newflags, bool ignore_access) diff --git a/arch/x86/mm/tlb.c b/arch/x86/mm/tlb.c index 2dabf0f340fb..913cad013979 100644 --- a/arch/x86/mm/tlb.c +++ b/arch/x86/mm/tlb.c @@ -1210,9 +1210,48 @@ STATIC_NOPV void native_flush_tlb_local(void) native_write_cr3(__native_read_cr3()); } +#ifdef CONFIG_MIGRC +DEFINE_PER_CPU(int, migrc_done); + +static inline int migrc_tlb_local_begin(void) +{ + int ret = atomic_read(&migrc_gen); + + /* + * XXX: barrier() would be sufficient if the architecture + * quarantees the order between memory access and TLB flush. + */ + smp_mb(); + return ret; +} + +static inline void migrc_tlb_local_end(int gen) +{ + /* + * XXX: barrier() would be sufficient if the architecture + * quarantees the order between TLB flush and memory access. + */ + smp_mb(); + WRITE_ONCE(*this_cpu_ptr(&migrc_done), gen); +} +#else +static inline int migrc_tlb_local_begin(void) +{ + return 0; +} + +static inline void migrc_tlb_local_end(int gen) +{ +} +#endif + void flush_tlb_local(void) { + unsigned int gen; + + gen = migrc_tlb_local_begin(); __flush_tlb_local(); + migrc_tlb_local_end(gen); } /* @@ -1237,6 +1276,22 @@ void __flush_tlb_all(void) } EXPORT_SYMBOL_GPL(__flush_tlb_all); +#ifdef CONFIG_MIGRC +static inline bool before(int a, int b) +{ + return a - b < 0; +} + +void arch_migrc_adj(struct arch_tlbflush_unmap_batch *batch, int gen) +{ + int cpu; + + for_each_cpu(cpu, &batch->cpumask) + if (!before(READ_ONCE(*per_cpu_ptr(&migrc_done, cpu)), gen)) + cpumask_clear_cpu(cpu, &batch->cpumask); +} +#endif + void arch_tlbbatch_flush(struct arch_tlbflush_unmap_batch *batch) { struct flush_tlb_info *info; diff --git a/mm/migrate.c b/mm/migrate.c index f9446f5b312a..c7b72d275b2a 100644 --- a/mm/migrate.c +++ b/mm/migrate.c @@ -2053,6 +2053,16 @@ static int migrate_pages_batch(struct list_head *from, new_page_t get_new_page, stats->nr_thp_failed += thp_retry; stats->nr_failed_pages += nr_retry_pages; move: + /* + * Should be prior to try_to_unmap_flush() so that + * migrc_try_flush_free_folios() that will be called later + * can take benefit from the TLB flushes in try_to_unmap_flush(). + * + * migrc_req_end() will store the timestamp for pending, and + * TLB flushes will also store the timestamp for TLB flush so + * that unnecessary TLB flushes can be skipped using the time + * information. + */ if (migrc_cond1) migrc_req_end(); diff --git a/mm/rmap.c b/mm/rmap.c index 0652d25206ee..2ae1b1324f84 100644 --- a/mm/rmap.c +++ b/mm/rmap.c @@ -627,6 +627,7 @@ static bool __migrc_try_flush_free_folios(struct llist_head *h) llist_for_each_entry_safe(req, req2, reqs, llnode) { struct llist_node *n; + arch_migrc_adj(&req->arch, req->gen); arch_tlbbatch_fold(&arch, &req->arch); n = llist_del_all(&req->pages); -- 2.17.1