[PATCH 2/2] KVM: PPC: Book3S HV: Optimise TLB flushing when a vcpu moves between threads in a core

Nicholas Piggin <npiggin@xxxxxxxxx> · Mon, 18 Jan 2021 22:26:09 +1000

As explained in the comment, there is no need to flush TLBs on all
threads in a core when a vcpu moves between threads in the same core.

Thread migrations can be a significant proportion of vcpu migrations,
so this can help reduce the TLB flushing and IPI traffic.

Signed-off-by: Nicholas Piggin <npiggin@xxxxxxxxx>
---
I believe we can do this and have the TLB coherency correct as per
the architecture, but would appreciate someone else verifying my
thinking.

Thanks,
Nick

 arch/powerpc/kvm/book3s_hv.c | 28 ++++++++++++++++++++++++++--
 1 file changed, 26 insertions(+), 2 deletions(-)

diff --git a/arch/powerpc/kvm/book3s_hv.c b/arch/powerpc/kvm/book3s_hv.c
index 752daf43f780..53d0cbfe5933 100644
--- a/arch/powerpc/kvm/book3s_hv.c
+++ b/arch/powerpc/kvm/book3s_hv.c
@@ -2584,7 +2584,7 @@ static void kvmppc_release_hwthread(int cpu)
 	tpaca->kvm_hstate.kvm_split_mode = NULL;
 }
 
-static void radix_flush_cpu(struct kvm *kvm, int cpu, struct kvm_vcpu *vcpu)
+static void radix_flush_cpu(struct kvm *kvm, int cpu, bool core, struct kvm_vcpu *vcpu)
 {
 	struct kvm_nested_guest *nested = vcpu->arch.nested;
 	cpumask_t *cpu_in_guest;
@@ -2599,6 +2599,14 @@ static void radix_flush_cpu(struct kvm *kvm, int cpu, struct kvm_vcpu *vcpu)
 		cpu_in_guest = &kvm->arch.cpu_in_guest;
 	}
 
+	if (!core) {
+		cpumask_set_cpu(cpu, need_tlb_flush);
+		smp_mb();
+		if (cpumask_test_cpu(cpu, cpu_in_guest))
+			smp_call_function_single(cpu, do_nothing, NULL, 1);
+		return;
+	}
+
 	cpu = cpu_first_thread_sibling(cpu);
 	for (i = 0; i < threads_per_core; ++i)
 		cpumask_set_cpu(cpu + i, need_tlb_flush);
@@ -2655,7 +2663,23 @@ static void kvmppc_prepare_radix_vcpu(struct kvm_vcpu *vcpu, int pcpu)
 		if (prev_cpu < 0)
 			return; /* first run */
 
-		radix_flush_cpu(kvm, prev_cpu, vcpu);
+		/*
+		 * If changing cores, all threads on the old core should
+		 * flush, because TLBs can be shared between threads. More
+		 * precisely, the thread we previously ran on should be
+		 * flushed, and the thread to first run a vcpu on the old
+		 * core should flush, but we don't keep enough information
+		 * around to track that, so we flush all.
+		 *
+		 * If changing threads in the same core, only the old thread
+		 * need be flushed.
+		 */
+		if (cpu_first_thread_sibling(prev_cpu) !=
+		    cpu_first_thread_sibling(pcpu))
+			radix_flush_cpu(kvm, prev_cpu, true, vcpu);
+		else
+			radix_flush_cpu(kvm, prev_cpu, false, vcpu);
+
 	}
 }
 
-- 
2.23.0