[RFC PATCH v2 19/20] context_tracking,x86: Add infrastructure to defer kernel TLBI

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

 



Kernel TLB invalidation IPIs are a common source of interference on
NOHZ_FULL CPUs. Given NOHZ_FULL CPUs executing in userspace are not
accessing any kernel addresses, these invalidations do not need to happen
immediately, and can be deferred until the next user->kernel transition.

Rather than make __flush_tlb_all() noinstr, add a minimal noinstr
variant that doesn't try to leverage INVPCID.

FIXME: not fully noinstr compliant
XXX: same issue as with ins patching, when do we access data that should be
invalidated?

Signed-off-by: Valentin Schneider <vschneid@xxxxxxxxxx>
---
 arch/x86/include/asm/context_tracking_work.h |  4 ++++
 arch/x86/include/asm/tlbflush.h              |  1 +
 arch/x86/mm/tlb.c                            | 17 +++++++++++++++++
 include/linux/context_tracking_state.h       |  4 ++++
 include/linux/context_tracking_work.h        |  2 ++
 5 files changed, 28 insertions(+)

diff --git a/arch/x86/include/asm/context_tracking_work.h b/arch/x86/include/asm/context_tracking_work.h
index 2c66687ce00e2..9d4f021b5a45b 100644
--- a/arch/x86/include/asm/context_tracking_work.h
+++ b/arch/x86/include/asm/context_tracking_work.h
@@ -3,6 +3,7 @@
 #define _ASM_X86_CONTEXT_TRACKING_WORK_H
 
 #include <asm/sync_core.h>
+#include <asm/tlbflush.h>
 
 static __always_inline void arch_context_tracking_work(int work)
 {
@@ -10,6 +11,9 @@ static __always_inline void arch_context_tracking_work(int work)
 	case CONTEXT_WORK_SYNC:
 		sync_core();
 		break;
+	case CONTEXT_WORK_TLBI:
+		__flush_tlb_all_noinstr();
+		break;
 	}
 }
 
diff --git a/arch/x86/include/asm/tlbflush.h b/arch/x86/include/asm/tlbflush.h
index 80450e1d5385a..323b971987af7 100644
--- a/arch/x86/include/asm/tlbflush.h
+++ b/arch/x86/include/asm/tlbflush.h
@@ -17,6 +17,7 @@
 DECLARE_PER_CPU(u64, tlbstate_untag_mask);
 
 void __flush_tlb_all(void);
+void noinstr __flush_tlb_all_noinstr(void);
 
 #define TLB_FLUSH_ALL	-1UL
 #define TLB_GENERATION_INVALID	0
diff --git a/arch/x86/mm/tlb.c b/arch/x86/mm/tlb.c
index 267acf27480af..631df9189ded4 100644
--- a/arch/x86/mm/tlb.c
+++ b/arch/x86/mm/tlb.c
@@ -1237,6 +1237,23 @@ void __flush_tlb_all(void)
 }
 EXPORT_SYMBOL_GPL(__flush_tlb_all);
 
+void noinstr __flush_tlb_all_noinstr(void)
+{
+	/*
+	 * This is for invocation in early entry code that cannot be
+	 * instrumented. A RMW to CR4 works for most cases, but relies on
+	 * being able to flip either of the PGE or PCIDE bits. Flipping CR4.PCID
+	 * would require also resetting CR3.PCID, so just try with CR4.PGE, else
+	 * do the CR3 write.
+	 *
+	 * TODO: paravirt
+	 */
+	if (cpu_feature_enabled(X86_FEATURE_PGE))
+		__native_tlb_flush_global(this_cpu_read(cpu_tlbstate.cr4));
+	else
+		flush_tlb_local();
+}
+
 void arch_tlbbatch_flush(struct arch_tlbflush_unmap_batch *batch)
 {
 	struct flush_tlb_info *info;
diff --git a/include/linux/context_tracking_state.h b/include/linux/context_tracking_state.h
index 292a0b7c06948..3571c62cbb9cd 100644
--- a/include/linux/context_tracking_state.h
+++ b/include/linux/context_tracking_state.h
@@ -62,6 +62,10 @@ enum ctx_state {
 #define RCU_DYNTICKS_END   (CT_STATE_SIZE - 1)
 #define RCU_DYNTICKS_IDX   BIT(RCU_DYNTICKS_START)
 
+/*
+ * When CONFIG_CONTEXT_TRACKING_WORK=n, _END is 1 behind _START, which makes
+ * the CONTEXT_WORK size computation below 0, which is what we want!
+ */
 #define	CONTEXT_WORK_START (CONTEXT_STATE_END + 1)
 #define CONTEXT_WORK_END   (RCU_DYNTICKS_START - 1)
 
diff --git a/include/linux/context_tracking_work.h b/include/linux/context_tracking_work.h
index 13fc97b395030..47d5ced39a43a 100644
--- a/include/linux/context_tracking_work.h
+++ b/include/linux/context_tracking_work.h
@@ -6,11 +6,13 @@
 
 enum {
 	CONTEXT_WORK_SYNC_OFFSET,
+	CONTEXT_WORK_TLBI_OFFSET,
 	CONTEXT_WORK_MAX_OFFSET
 };
 
 enum ct_work {
 	CONTEXT_WORK_SYNC     = BIT(CONTEXT_WORK_SYNC_OFFSET),
+	CONTEXT_WORK_TLBI     = BIT(CONTEXT_WORK_TLBI_OFFSET),
 	CONTEXT_WORK_MAX      = BIT(CONTEXT_WORK_MAX_OFFSET)
 };
 
-- 
2.31.1





[Index of Archives]     [Linux ARM Kernel]     [Linux ARM]     [Linux Omap]     [Fedora ARM]     [IETF Annouce]     [Bugtraq]     [Linux OMAP]     [Linux MIPS]     [eCos]     [Asterisk Internet PBX]     [Linux API]

  Powered by Linux