Re: [PATCH v19 079/130] KVM: TDX: vcpu_run: save/restore host state(host kernel gs)

Binbin Wu <binbin.wu@xxxxxxxxxxxxxxx> · Sun, 7 Apr 2024 11:02:52 +0800

On 2/26/2024 4:26 PM, isaku.yamahata@xxxxxxxxx wrote:
From: Isaku Yamahata <isaku.yamahata@xxxxxxxxx>

On entering/exiting TDX vcpu, Preserved or clobbered CPU state is different
from VMX case.

Could you add more descriptions about the differences?

Add TDX hooks to save/restore host/guest CPU state.

KVM doesn't save/restore guest CPU state for TDX.

Save/restore kernel GS base MSR.

Signed-off-by: Isaku Yamahata <isaku.yamahata@xxxxxxxxx>
Reviewed-by: Paolo Bonzini <pbonzini@xxxxxxxxxx>
---
  arch/x86/kvm/vmx/main.c    | 30 +++++++++++++++++++++++++--
  arch/x86/kvm/vmx/tdx.c     | 42 ++++++++++++++++++++++++++++++++++++++
  arch/x86/kvm/vmx/tdx.h     |  4 ++++
  arch/x86/kvm/vmx/x86_ops.h |  4 ++++
  4 files changed, 78 insertions(+), 2 deletions(-)

diff --git a/arch/x86/kvm/vmx/main.c b/arch/x86/kvm/vmx/main.c
index d72651ce99ac..8275a242ce07 100644
--- a/arch/x86/kvm/vmx/main.c
+++ b/arch/x86/kvm/vmx/main.c
@@ -158,6 +158,32 @@ static void vt_vcpu_reset(struct kvm_vcpu *vcpu, bool init_event)
  	vmx_vcpu_reset(vcpu, init_event);
  }
  
+static void vt_prepare_switch_to_guest(struct kvm_vcpu *vcpu)
+{
+	/*
+	 * All host state is saved/restored across SEAMCALL/SEAMRET,

It sounds confusing to me.
If all host states are saved/restored across SEAMCALL/SEAMRET, why this 
patch saves/restores MSR_KERNEL_GS_BASE for host?

  and the
+	 * guest state of a TD is obviously off limits.  Deferring MSRs and DRs
+	 * is pointless because the TDX module needs to load *something* so as
+	 * not to expose guest state.
+	 */
+	if (is_td_vcpu(vcpu)) {
+		tdx_prepare_switch_to_guest(vcpu);
+		return;
+	}
+
+	vmx_prepare_switch_to_guest(vcpu);
+}
+
+static void vt_vcpu_put(struct kvm_vcpu *vcpu)
+{
+	if (is_td_vcpu(vcpu)) {
+		tdx_vcpu_put(vcpu);
+		return;
+	}
+
+	vmx_vcpu_put(vcpu);
+}
+
  static int vt_vcpu_pre_run(struct kvm_vcpu *vcpu)
  {
  	if (is_td_vcpu(vcpu))
@@ -326,9 +352,9 @@ struct kvm_x86_ops vt_x86_ops __initdata = {
  	.vcpu_free = vt_vcpu_free,
  	.vcpu_reset = vt_vcpu_reset,
  
-	.prepare_switch_to_guest = vmx_prepare_switch_to_guest,
+	.prepare_switch_to_guest = vt_prepare_switch_to_guest,
  	.vcpu_load = vmx_vcpu_load,
-	.vcpu_put = vmx_vcpu_put,
+	.vcpu_put = vt_vcpu_put,
  
  	.update_exception_bitmap = vmx_update_exception_bitmap,
  	.get_msr_feature = vmx_get_msr_feature,
diff --git a/arch/x86/kvm/vmx/tdx.c b/arch/x86/kvm/vmx/tdx.c
index fdf9196cb592..9616b1aab6ce 100644
--- a/arch/x86/kvm/vmx/tdx.c
+++ b/arch/x86/kvm/vmx/tdx.c
@@ -1,5 +1,6 @@
  // SPDX-License-Identifier: GPL-2.0
  #include <linux/cpu.h>
+#include <linux/mmu_context.h>
  
  #include <asm/tdx.h>
  
@@ -423,6 +424,7 @@ u8 tdx_get_mt_mask(struct kvm_vcpu *vcpu, gfn_t gfn, bool is_mmio)
  int tdx_vcpu_create(struct kvm_vcpu *vcpu)
  {
  	struct kvm_tdx *kvm_tdx = to_kvm_tdx(vcpu->kvm);
+	struct vcpu_tdx *tdx = to_tdx(vcpu);
  
  	WARN_ON_ONCE(vcpu->arch.cpuid_entries);
  	WARN_ON_ONCE(vcpu->arch.cpuid_nent);
@@ -446,9 +448,47 @@ int tdx_vcpu_create(struct kvm_vcpu *vcpu)
  	if ((kvm_tdx->xfam & XFEATURE_MASK_XTILE) == XFEATURE_MASK_XTILE)
  		vcpu->arch.xfd_no_write_intercept = true;
  
+	tdx->host_state_need_save = true;
+	tdx->host_state_need_restore = false;
+
  	return 0;
  }
  
+void tdx_prepare_switch_to_guest(struct kvm_vcpu *vcpu)

Just like vmx_prepare_switch_to_host(), the input can be "struct 
vcpu_tdx *", since vcpu is not used inside the function.
And the callsites just use "to_tdx(vcpu)"

+{
+	struct vcpu_tdx *tdx = to_tdx(vcpu);
Then, this can be dropped.

+
+	if (!tdx->host_state_need_save)
+		return;
+
+	if (likely(is_64bit_mm(current->mm)))
+		tdx->msr_host_kernel_gs_base = current->thread.gsbase;
+	else
+		tdx->msr_host_kernel_gs_base = read_msr(MSR_KERNEL_GS_BASE);
+
+	tdx->host_state_need_save = false;
+}
+
+static void tdx_prepare_switch_to_host(struct kvm_vcpu *vcpu)

ditto

+{
+	struct vcpu_tdx *tdx = to_tdx(vcpu);
+
+	tdx->host_state_need_save = true;
+	if (!tdx->host_state_need_restore)
+		return;
+
+	++vcpu->stat.host_state_reload;
+
+	wrmsrl(MSR_KERNEL_GS_BASE, tdx->msr_host_kernel_gs_base);
+	tdx->host_state_need_restore = false;
+}
+
+void tdx_vcpu_put(struct kvm_vcpu *vcpu)
+{
+	vmx_vcpu_pi_put(vcpu);
+	tdx_prepare_switch_to_host(vcpu);
+}
+
  void tdx_vcpu_free(struct kvm_vcpu *vcpu)
  {
  	struct vcpu_tdx *tdx = to_tdx(vcpu);
@@ -569,6 +609,8 @@ fastpath_t tdx_vcpu_run(struct kvm_vcpu *vcpu)
  
  	tdx_vcpu_enter_exit(tdx);
  
+	tdx->host_state_need_restore = true;
+
  	vcpu->arch.regs_avail &= ~VMX_REGS_LAZY_LOAD_SET;
  	trace_kvm_exit(vcpu, KVM_ISA_VMX);
  
diff --git a/arch/x86/kvm/vmx/tdx.h b/arch/x86/kvm/vmx/tdx.h
index 81d301fbe638..e96c416e73bf 100644
--- a/arch/x86/kvm/vmx/tdx.h
+++ b/arch/x86/kvm/vmx/tdx.h
@@ -69,6 +69,10 @@ struct vcpu_tdx {
  
  	bool initialized;
  
+	bool host_state_need_save;
+	bool host_state_need_restore;
+	u64 msr_host_kernel_gs_base;
+
  	/*
  	 * Dummy to make pmu_intel not corrupt memory.
  	 * TODO: Support PMU for TDX.  Future work.
diff --git a/arch/x86/kvm/vmx/x86_ops.h b/arch/x86/kvm/vmx/x86_ops.h
index 3e29a6fe28ef..9fd997c79c33 100644
--- a/arch/x86/kvm/vmx/x86_ops.h
+++ b/arch/x86/kvm/vmx/x86_ops.h
@@ -151,6 +151,8 @@ int tdx_vcpu_create(struct kvm_vcpu *vcpu);
  void tdx_vcpu_free(struct kvm_vcpu *vcpu);
  void tdx_vcpu_reset(struct kvm_vcpu *vcpu, bool init_event);
  fastpath_t tdx_vcpu_run(struct kvm_vcpu *vcpu);
+void tdx_prepare_switch_to_guest(struct kvm_vcpu *vcpu);
+void tdx_vcpu_put(struct kvm_vcpu *vcpu);
  u8 tdx_get_mt_mask(struct kvm_vcpu *vcpu, gfn_t gfn, bool is_mmio);
  
  int tdx_vcpu_ioctl(struct kvm_vcpu *vcpu, void __user *argp);
@@ -186,6 +188,8 @@ static inline int tdx_vcpu_create(struct kvm_vcpu *vcpu) { return -EOPNOTSUPP; }
  static inline void tdx_vcpu_free(struct kvm_vcpu *vcpu) {}
  static inline void tdx_vcpu_reset(struct kvm_vcpu *vcpu, bool init_event) {}
  static inline fastpath_t tdx_vcpu_run(struct kvm_vcpu *vcpu) { return EXIT_FASTPATH_NONE; }
+static inline void tdx_prepare_switch_to_guest(struct kvm_vcpu *vcpu) {}
+static inline void tdx_vcpu_put(struct kvm_vcpu *vcpu) {}
  static inline u8 tdx_get_mt_mask(struct kvm_vcpu *vcpu, gfn_t gfn, bool is_mmio) { return 0; }
  
  static inline int tdx_vcpu_ioctl(struct kvm_vcpu *vcpu, void __user *argp) { return -EOPNOTSUPP; }