[PATCH DO NOT APPLY] KVM: SVM: Cache volatile task state to avoid VMSAVE in core run loop

Sean Christopherson <seanjc@xxxxxxxxxx> · Tue, 30 Apr 2024 15:44:31 -0700

Do not apply.  This code is not functional, and is probably a terrible
idea in the first place.

Not-signed-off-by: Sean Christopherson <seanjc@xxxxxxxxxx>
---

Tom, this is the idea I was talking about at PUCK for avoiding VMSAVE when a
vCPU is scheduled back in.  The idea is to:

 1) Do VMSAVE during KVM_RUN's vcpu_load()
 2) Snapshot the state that is per-task and manually shove that state into the
    save area (via memory) during svm_prepare_switch_to_guest().
 3) For state that is (theoretically) common to all tasks, e.g. the SYSENTER
    entry point, rely on the initial VMSAVE.
 4) Refresh the snapshot during kvm_sched_in() if the vCPU is being migrated to
    a different pCPU to account for per-CPU state, e.g. GS.base.

QEMU segfaults on FS: access though, specifically if I omit the VMSAVE in the
sched_in path, so I'm missing something.

And this really was so supposed to be for illustrative purposes only, I just
couldn't resist seeing if it would work :-)

 arch/x86/kvm/svm/svm.c | 34 ++++++++++++++++++++++++----------
 arch/x86/kvm/svm/svm.h |  5 +++++
 2 files changed, 29 insertions(+), 10 deletions(-)

diff --git a/arch/x86/kvm/svm/svm.c b/arch/x86/kvm/svm/svm.c
index 6d9763dc4fed..348addf0d4cf 100644
--- a/arch/x86/kvm/svm/svm.c
+++ b/arch/x86/kvm/svm/svm.c
@@ -1499,6 +1499,7 @@ static void svm_prepare_switch_to_guest(struct kvm_vcpu *vcpu)
 {
 	struct vcpu_svm *svm = to_svm(vcpu);
 	struct svm_cpu_data *sd = per_cpu_ptr(&svm_data, vcpu->cpu);
+	struct vmcb_save_area *sa = page_address(sd->save_area);
 
 	if (sev_es_guest(vcpu->kvm))
 		sev_es_unmap_ghcb(svm);
@@ -1506,17 +1507,13 @@ static void svm_prepare_switch_to_guest(struct kvm_vcpu *vcpu)
 	if (svm->guest_state_loaded)
 		return;
 
-	/*
-	 * Save additional host state that will be restored on VMEXIT (sev-es)
-	 * or subsequent vmload of host save area.
-	 */
-	vmsave(sd->save_area_pa);
-	if (sev_es_guest(vcpu->kvm)) {
-		struct sev_es_save_area *hostsa;
-		hostsa = (struct sev_es_save_area *)(page_address(sd->save_area) + 0x400);
+	sa->ldtr = svm->saved_ldtr;
+	sa->tr = svm->saved_tr;
+	sa->fs = svm->saved_fs;
+	sa->kernel_gs_base = svm->saved_kernel_gs_base;
 
-		sev_es_prepare_switch_to_guest(svm, hostsa);
-	}
+	if (sev_es_guest(vcpu->kvm))
+		sev_es_prepare_switch_to_guest(svm, (void *)sa + 0x400);
 
 	if (tsc_scaling)
 		__svm_write_tsc_multiplier(vcpu->arch.tsc_scaling_ratio);
@@ -1543,6 +1540,7 @@ static void svm_vcpu_load(struct kvm_vcpu *vcpu, int cpu, bool sched_in)
 {
 	struct vcpu_svm *svm = to_svm(vcpu);
 	struct svm_cpu_data *sd = per_cpu_ptr(&svm_data, cpu);
+	struct vmcb_save_area *sa = page_address(sd->save_area);
 
 	if (sched_in && !kvm_pause_in_guest(vcpu->kvm))
 		shrink_ple_window(vcpu);
@@ -1555,6 +1553,22 @@ static void svm_vcpu_load(struct kvm_vcpu *vcpu, int cpu, bool sched_in)
 	}
 	if (kvm_vcpu_apicv_active(vcpu))
 		avic_vcpu_load(vcpu, cpu);
+
+	/*
+	 * If the vCPU is being loaded for KVM_RUN, save additional host state
+	 * that will be restored on VMEXIT (sev-es) or subsequent vmload of
+	 * host save area.  No need to re-save state if the vCPU task was
+	 * scheduled out from within KVM_RUN and is being scheduled back in on
+	 * the same pCPU.
+	 */
+	if (vcpu->wants_to_run && (vcpu->cpu != cpu || !sched_in)) {
+		vmsave(sd->save_area_pa);
+
+		svm->saved_ldtr = sa->ldtr;
+		svm->saved_tr = sa->tr;
+		svm->saved_fs = sa->fs;
+		svm->saved_kernel_gs_base = sa->kernel_gs_base;
+	}
 }
 
 static void svm_vcpu_put(struct kvm_vcpu *vcpu)
diff --git a/arch/x86/kvm/svm/svm.h b/arch/x86/kvm/svm/svm.h
index 323901782547..d0b93a69453a 100644
--- a/arch/x86/kvm/svm/svm.h
+++ b/arch/x86/kvm/svm/svm.h
@@ -219,6 +219,11 @@ struct vcpu_svm {
 	u32 sysenter_eip_hi;
 	uint64_t tsc_aux;
 
+	struct vmcb_seg saved_ldtr;
+	struct vmcb_seg saved_tr;
+	struct vmcb_seg saved_fs;
+	u64 saved_kernel_gs_base;
+
 	u64 msr_decfg;
 
 	u64 next_rip;

base-commit: 662c1834094920e74ef48f6d52eba112094ae730
-- 
2.45.0.rc0.197.gbae5840b3b-goog