[RFC v2 3/6] mm, migrc: Skip TLB flushes at the CPUs that already have been done

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

 



TLB flushes can be skipped if TLB flushes requested have been done by
any reason, which doesn't have to be done from migrations. It can be
tracked by keeping timestamp(= migrc_gen) when it's requested and
when it's triggered.

Signed-off-by: Byungchul Park <byungchul@xxxxxx>
---
 arch/x86/include/asm/tlbflush.h |  6 ++++
 arch/x86/mm/tlb.c               | 55 +++++++++++++++++++++++++++++++++
 mm/migrate.c                    | 10 ++++++
 mm/rmap.c                       |  1 +
 4 files changed, 72 insertions(+)

diff --git a/arch/x86/include/asm/tlbflush.h b/arch/x86/include/asm/tlbflush.h
index 752d72ea209b..da987c15049e 100644
--- a/arch/x86/include/asm/tlbflush.h
+++ b/arch/x86/include/asm/tlbflush.h
@@ -283,6 +283,12 @@ extern void arch_tlbbatch_clean(struct arch_tlbflush_unmap_batch *batch);
 extern void arch_tlbbatch_fold(struct arch_tlbflush_unmap_batch *bdst,
 			       struct arch_tlbflush_unmap_batch *bsrc);
 
+#ifdef CONFIG_MIGRC
+extern void arch_migrc_adj(struct arch_tlbflush_unmap_batch *batch, int gen);
+#else
+static inline void arch_migrc_adj(struct arch_tlbflush_unmap_batch *batch, int gen) {}
+#endif
+
 static inline bool pte_flags_need_flush(unsigned long oldflags,
 					unsigned long newflags,
 					bool ignore_access)
diff --git a/arch/x86/mm/tlb.c b/arch/x86/mm/tlb.c
index 2dabf0f340fb..913cad013979 100644
--- a/arch/x86/mm/tlb.c
+++ b/arch/x86/mm/tlb.c
@@ -1210,9 +1210,48 @@ STATIC_NOPV void native_flush_tlb_local(void)
 	native_write_cr3(__native_read_cr3());
 }
 
+#ifdef CONFIG_MIGRC
+DEFINE_PER_CPU(int, migrc_done);
+
+static inline int migrc_tlb_local_begin(void)
+{
+	int ret = atomic_read(&migrc_gen);
+
+	/*
+	 * XXX: barrier() would be sufficient if the architecture
+	 * quarantees the order between memory access and TLB flush.
+	 */
+	smp_mb();
+	return ret;
+}
+
+static inline void migrc_tlb_local_end(int gen)
+{
+	/*
+	 * XXX: barrier() would be sufficient if the architecture
+	 * quarantees the order between TLB flush and memory access.
+	 */
+	smp_mb();
+	WRITE_ONCE(*this_cpu_ptr(&migrc_done), gen);
+}
+#else
+static inline int migrc_tlb_local_begin(void)
+{
+	return 0;
+}
+
+static inline void migrc_tlb_local_end(int gen)
+{
+}
+#endif
+
 void flush_tlb_local(void)
 {
+	unsigned int gen;
+
+	gen = migrc_tlb_local_begin();
 	__flush_tlb_local();
+	migrc_tlb_local_end(gen);
 }
 
 /*
@@ -1237,6 +1276,22 @@ void __flush_tlb_all(void)
 }
 EXPORT_SYMBOL_GPL(__flush_tlb_all);
 
+#ifdef CONFIG_MIGRC
+static inline bool before(int a, int b)
+{
+	return a - b < 0;
+}
+
+void arch_migrc_adj(struct arch_tlbflush_unmap_batch *batch, int gen)
+{
+	int cpu;
+
+	for_each_cpu(cpu, &batch->cpumask)
+		if (!before(READ_ONCE(*per_cpu_ptr(&migrc_done, cpu)), gen))
+			cpumask_clear_cpu(cpu, &batch->cpumask);
+}
+#endif
+
 void arch_tlbbatch_flush(struct arch_tlbflush_unmap_batch *batch)
 {
 	struct flush_tlb_info *info;
diff --git a/mm/migrate.c b/mm/migrate.c
index f9446f5b312a..c7b72d275b2a 100644
--- a/mm/migrate.c
+++ b/mm/migrate.c
@@ -2053,6 +2053,16 @@ static int migrate_pages_batch(struct list_head *from, new_page_t get_new_page,
 	stats->nr_thp_failed += thp_retry;
 	stats->nr_failed_pages += nr_retry_pages;
 move:
+	/*
+	 * Should be prior to try_to_unmap_flush() so that
+	 * migrc_try_flush_free_folios() that will be called later
+	 * can take benefit from the TLB flushes in try_to_unmap_flush().
+	 *
+	 * migrc_req_end() will store the timestamp for pending, and
+	 * TLB flushes will also store the timestamp for TLB flush so
+	 * that unnecessary TLB flushes can be skipped using the time
+	 * information.
+	 */
 	if (migrc_cond1)
 		migrc_req_end();
 
diff --git a/mm/rmap.c b/mm/rmap.c
index 0652d25206ee..2ae1b1324f84 100644
--- a/mm/rmap.c
+++ b/mm/rmap.c
@@ -627,6 +627,7 @@ static bool __migrc_try_flush_free_folios(struct llist_head *h)
 	llist_for_each_entry_safe(req, req2, reqs, llnode) {
 		struct llist_node *n;
 
+		arch_migrc_adj(&req->arch, req->gen);
 		arch_tlbbatch_fold(&arch, &req->arch);
 
 		n = llist_del_all(&req->pages);
-- 
2.17.1





[Index of Archives]     [Linux ARM Kernel]     [Linux ARM]     [Linux Omap]     [Fedora ARM]     [IETF Annouce]     [Bugtraq]     [Linux OMAP]     [Linux MIPS]     [eCos]     [Asterisk Internet PBX]     [Linux API]

  Powered by Linux