[PATCH 1/2] KVM: PPC: Book3S HV: Remove shared-TLB optimisation from vCPU TLB coherency logic

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

 



Processors that implement ISA v3.0 or later don't necessarily have
threads in a core sharing all translations, and/or TLBIEL does not
necessarily invalidate translations on all other threads (the
architecture talks only about the effect on translations for "the thread
executing the tlbiel instruction".

While this worked for POWER9, it may not for future implementations, so
remove it. A POWER9 specific optimisation would have to have a specific
CPU feature to check, if it were to be re-added.

Signed-off-by: Nicholas Piggin <npiggin@xxxxxxxxx>
---
 arch/powerpc/kvm/book3s_hv.c         | 32 +++++++++++++++++++---------
 arch/powerpc/kvm/book3s_hv_builtin.c |  9 --------
 arch/powerpc/kvm/book3s_hv_rm_mmu.c  |  6 ------
 3 files changed, 22 insertions(+), 25 deletions(-)

diff --git a/arch/powerpc/kvm/book3s_hv.c b/arch/powerpc/kvm/book3s_hv.c
index 2d8627dbd9f6..752daf43f780 100644
--- a/arch/powerpc/kvm/book3s_hv.c
+++ b/arch/powerpc/kvm/book3s_hv.c
@@ -2588,22 +2588,34 @@ static void radix_flush_cpu(struct kvm *kvm, int cpu, struct kvm_vcpu *vcpu)
 {
 	struct kvm_nested_guest *nested = vcpu->arch.nested;
 	cpumask_t *cpu_in_guest;
+	cpumask_t *need_tlb_flush;
 	int i;
 
-	cpu = cpu_first_thread_sibling(cpu);
 	if (nested) {
-		cpumask_set_cpu(cpu, &nested->need_tlb_flush);
+		need_tlb_flush = &nested->need_tlb_flush;
 		cpu_in_guest = &nested->cpu_in_guest;
 	} else {
-		cpumask_set_cpu(cpu, &kvm->arch.need_tlb_flush);
+		need_tlb_flush = &kvm->arch.need_tlb_flush;
 		cpu_in_guest = &kvm->arch.cpu_in_guest;
 	}
+
+	cpu = cpu_first_thread_sibling(cpu);
+	for (i = 0; i < threads_per_core; ++i)
+		cpumask_set_cpu(cpu + i, need_tlb_flush);
+
 	/*
-	 * Make sure setting of bit in need_tlb_flush precedes
+	 * Make sure setting of bits in need_tlb_flush precedes
 	 * testing of cpu_in_guest bits.  The matching barrier on
 	 * the other side is the first smp_mb() in kvmppc_run_core().
 	 */
 	smp_mb();
+
+	/*
+	 * Pull vcpus out of guests if necessary, such that they'll notice
+	 * the need_tlb_flush bit when they re-enter the guest. If this was
+	 * ever a performance concern, it would be interesting to compare
+	 * with performance of using TLBIE.
+	 */
 	for (i = 0; i < threads_per_core; ++i)
 		if (cpumask_test_cpu(cpu + i, cpu_in_guest))
 			smp_call_function_single(cpu + i, do_nothing, NULL, 1);
@@ -2632,18 +2644,18 @@ static void kvmppc_prepare_radix_vcpu(struct kvm_vcpu *vcpu, int pcpu)
 	 * can move around between pcpus.  To cope with this, when
 	 * a vcpu moves from one pcpu to another, we need to tell
 	 * any vcpus running on the same core as this vcpu previously
-	 * ran to flush the TLB.  The TLB is shared between threads,
-	 * so we use a single bit in .need_tlb_flush for all 4 threads.
+	 * ran to flush the TLB.
 	 */
 	if (prev_cpu != pcpu) {
-		if (prev_cpu >= 0 &&
-		    cpu_first_thread_sibling(prev_cpu) !=
-		    cpu_first_thread_sibling(pcpu))
-			radix_flush_cpu(kvm, prev_cpu, vcpu);
 		if (nested)
 			nested->prev_cpu[vcpu->arch.nested_vcpu_id] = pcpu;
 		else
 			vcpu->arch.prev_cpu = pcpu;
+
+		if (prev_cpu < 0)
+			return; /* first run */
+
+		radix_flush_cpu(kvm, prev_cpu, vcpu);
 	}
 }
 
diff --git a/arch/powerpc/kvm/book3s_hv_builtin.c b/arch/powerpc/kvm/book3s_hv_builtin.c
index f3d3183249fe..dad118760a4e 100644
--- a/arch/powerpc/kvm/book3s_hv_builtin.c
+++ b/arch/powerpc/kvm/book3s_hv_builtin.c
@@ -789,15 +789,6 @@ void kvmppc_check_need_tlb_flush(struct kvm *kvm, int pcpu,
 {
 	cpumask_t *need_tlb_flush;
 
-	/*
-	 * On POWER9, individual threads can come in here, but the
-	 * TLB is shared between the 4 threads in a core, hence
-	 * invalidating on one thread invalidates for all.
-	 * Thus we make all 4 threads use the same bit.
-	 */
-	if (cpu_has_feature(CPU_FTR_ARCH_300))
-		pcpu = cpu_first_thread_sibling(pcpu);
-
 	if (nested)
 		need_tlb_flush = &nested->need_tlb_flush;
 	else
diff --git a/arch/powerpc/kvm/book3s_hv_rm_mmu.c b/arch/powerpc/kvm/book3s_hv_rm_mmu.c
index 88da2764c1bb..f87237927096 100644
--- a/arch/powerpc/kvm/book3s_hv_rm_mmu.c
+++ b/arch/powerpc/kvm/book3s_hv_rm_mmu.c
@@ -62,12 +62,6 @@ static int global_invalidates(struct kvm *kvm)
 		smp_wmb();
 		cpumask_setall(&kvm->arch.need_tlb_flush);
 		cpu = local_paca->kvm_hstate.kvm_vcore->pcpu;
-		/*
-		 * On POWER9, threads are independent but the TLB is shared,
-		 * so use the bit for the first thread to represent the core.
-		 */
-		if (cpu_has_feature(CPU_FTR_ARCH_300))
-			cpu = cpu_first_thread_sibling(cpu);
 		cpumask_clear_cpu(cpu, &kvm->arch.need_tlb_flush);
 	}
 
-- 
2.23.0




[Index of Archives]     [KVM Development]     [KVM ARM]     [KVM ia64]     [Linux Virtualization]     [Linux USB Devel]     [Linux Video]     [Linux Audio Users]     [Linux Kernel]     [Linux SCSI]     [Big List of Linux Books]

  Powered by Linux