On 25/11/24 16:12, Nikolay Borisov wrote: > > > On 21.11.24 г. 22:14 ч., Adrian Hunter wrote: >> From: Isaku Yamahata <isaku.yamahata@xxxxxxxxx> >> >> On entering/exiting TDX vcpu, preserved or clobbered CPU state is different >> from the VMX case. Add TDX hooks to save/restore host/guest CPU state. >> Save/restore kernel GS base MSR. >> >> Signed-off-by: Isaku Yamahata <isaku.yamahata@xxxxxxxxx> >> Signed-off-by: Adrian Hunter <adrian.hunter@xxxxxxxxx> >> Reviewed-by: Paolo Bonzini <pbonzini@xxxxxxxxxx> >> --- >> TD vcpu enter/exit v1: >> - Clarify comment (Binbin) >> - Use lower case preserved and add the for VMX in log (Tony) >> - Fix bisectability issue with includes (Kai) >> --- >> arch/x86/kvm/vmx/main.c | 24 ++++++++++++++++++-- >> arch/x86/kvm/vmx/tdx.c | 46 ++++++++++++++++++++++++++++++++++++++ >> arch/x86/kvm/vmx/tdx.h | 4 ++++ >> arch/x86/kvm/vmx/x86_ops.h | 4 ++++ >> 4 files changed, 76 insertions(+), 2 deletions(-) >> >> diff --git a/arch/x86/kvm/vmx/main.c b/arch/x86/kvm/vmx/main.c >> index 44ec6005a448..3a8ffc199be2 100644 >> --- a/arch/x86/kvm/vmx/main.c >> +++ b/arch/x86/kvm/vmx/main.c >> @@ -129,6 +129,26 @@ static void vt_vcpu_load(struct kvm_vcpu *vcpu, int cpu) >> vmx_vcpu_load(vcpu, cpu); >> } >> +static void vt_prepare_switch_to_guest(struct kvm_vcpu *vcpu) >> +{ >> + if (is_td_vcpu(vcpu)) { >> + tdx_prepare_switch_to_guest(vcpu); >> + return; >> + } >> + >> + vmx_prepare_switch_to_guest(vcpu); >> +} >> + >> +static void vt_vcpu_put(struct kvm_vcpu *vcpu) >> +{ >> + if (is_td_vcpu(vcpu)) { >> + tdx_vcpu_put(vcpu); >> + return; >> + } >> + >> + vmx_vcpu_put(vcpu); >> +} >> + >> static int vt_vcpu_pre_run(struct kvm_vcpu *vcpu) >> { >> if (is_td_vcpu(vcpu)) >> @@ -250,9 +270,9 @@ struct kvm_x86_ops vt_x86_ops __initdata = { >> .vcpu_free = vt_vcpu_free, >> .vcpu_reset = vt_vcpu_reset, >> - .prepare_switch_to_guest = vmx_prepare_switch_to_guest, >> + .prepare_switch_to_guest = vt_prepare_switch_to_guest, >> .vcpu_load = vt_vcpu_load, >> - .vcpu_put = vmx_vcpu_put, >> + .vcpu_put = vt_vcpu_put, >> .update_exception_bitmap = vmx_update_exception_bitmap, >> .get_feature_msr = vmx_get_feature_msr, >> diff --git a/arch/x86/kvm/vmx/tdx.c b/arch/x86/kvm/vmx/tdx.c >> index 5fa5b65b9588..6e4ea2d420bc 100644 >> --- a/arch/x86/kvm/vmx/tdx.c >> +++ b/arch/x86/kvm/vmx/tdx.c >> @@ -1,6 +1,7 @@ >> // SPDX-License-Identifier: GPL-2.0 >> #include <linux/cleanup.h> >> #include <linux/cpu.h> >> +#include <linux/mmu_context.h> >> #include <asm/tdx.h> >> #include "capabilities.h" >> #include "mmu.h" >> @@ -9,6 +10,7 @@ >> #include "vmx.h" >> #include "mmu/spte.h" >> #include "common.h" >> +#include "posted_intr.h" >> #include <trace/events/kvm.h> >> #include "trace.h" >> @@ -605,6 +607,9 @@ int tdx_vcpu_create(struct kvm_vcpu *vcpu) >> if ((kvm_tdx->xfam & XFEATURE_MASK_XTILE) == XFEATURE_MASK_XTILE) >> vcpu->arch.xfd_no_write_intercept = true; >> + tdx->host_state_need_save = true; >> + tdx->host_state_need_restore = false; > > nit: Rather than have 2 separate values which actually work in tandem, why not define a u8 or even u32 and have a mask of the valid flags. > > So you can have something like: > > #define SAVE_HOST BIT(0) > #define RESTORE_HOST BIT(1) > > tdx->state_flags = SAVE_HOST > > I don't know what are the plans for the future but there might be cases where you can have more complex flags composed of more simple ones. > There are really only 3 possibilities: initial state (or after tdx_prepare_switch_to_host()) tdx->host_state_need_save = true; tdx->host_state_need_restore = false; After save (i.e. after tdx_prepare_switch_to_guest()) tdx->host_state_need_save = false tdx->host_state_need_restore = false; After enter/exit (i.e. after tdx_vcpu_enter_exit()) tdx->host_state_need_save = false tdx->host_state_need_restore = true; I can't think of good names, perhaps: enum tdx_prepare_switch_state { TDX_PREP_UNSAVED, TDX_PREP_SAVED, TDX_PREP_UNRESTORED, }; >> tdx->state = VCPU_TD_STATE_UNINITIALIZED; >> return 0; >> @@ -631,6 +636,45 @@ void tdx_vcpu_load(struct kvm_vcpu *vcpu, int cpu) >> local_irq_enable(); >> } >> +/* >> + * Compared to vmx_prepare_switch_to_guest(), there is not much to do >> + * as SEAMCALL/SEAMRET calls take care of most of save and restore. >> + */ >> +void tdx_prepare_switch_to_guest(struct kvm_vcpu *vcpu) >> +{ >> + struct vcpu_tdx *tdx = to_tdx(vcpu); >> + >> + if (!tdx->host_state_need_save) > if (!(tdx->state_flags & SAVE_HOST)) if (tdx->prep_switch_state != TDX_PREP_UNSAVED) >> + return; >> + >> + if (likely(is_64bit_mm(current->mm))) >> + tdx->msr_host_kernel_gs_base = current->thread.gsbase; >> + else >> + tdx->msr_host_kernel_gs_base = read_msr(MSR_KERNEL_GS_BASE); >> + >> + tdx->host_state_need_save = false; > > tdx->state &= ~SAVE_HOST tdx->prep_switch_state = TDX_PREP_SAVED; >> +} >> + >> +static void tdx_prepare_switch_to_host(struct kvm_vcpu *vcpu) >> +{ >> + struct vcpu_tdx *tdx = to_tdx(vcpu); >> + >> + tdx->host_state_need_save = true; >> + if (!tdx->host_state_need_restore) > if (!(tdx->state_flags & RESTORE_HOST) if (tdx->prep_switch_state != TDX_PREP_UNRESTORED) > >> + return; >> + >> + ++vcpu->stat.host_state_reload; >> + >> + wrmsrl(MSR_KERNEL_GS_BASE, tdx->msr_host_kernel_gs_base); >> + tdx->host_state_need_restore = false; tdx->prep_switch_state = TDX_PREP_UNSAVED; >> +} >> + >> +void tdx_vcpu_put(struct kvm_vcpu *vcpu) >> +{ >> + vmx_vcpu_pi_put(vcpu); >> + tdx_prepare_switch_to_host(vcpu); >> +} >> + >> void tdx_vcpu_free(struct kvm_vcpu *vcpu) >> { >> struct kvm_tdx *kvm_tdx = to_kvm_tdx(vcpu->kvm); >> @@ -732,6 +776,8 @@ fastpath_t tdx_vcpu_run(struct kvm_vcpu *vcpu, bool force_immediate_exit) >> tdx_vcpu_enter_exit(vcpu); >> + tdx->host_state_need_restore = true; > > tdx->state_flags |= RESTORE_HOST tdx->prep_switch_state = TDX_PREP_UNRESTORED; > >> + >> vcpu->arch.regs_avail &= ~VMX_REGS_LAZY_LOAD_SET; >> trace_kvm_exit(vcpu, KVM_ISA_VMX); >> diff --git a/arch/x86/kvm/vmx/tdx.h b/arch/x86/kvm/vmx/tdx.h >> index ebee1049b08b..48cf0a1abfcc 100644 >> --- a/arch/x86/kvm/vmx/tdx.h >> +++ b/arch/x86/kvm/vmx/tdx.h >> @@ -54,6 +54,10 @@ struct vcpu_tdx { >> u64 vp_enter_ret; >> enum vcpu_tdx_state state; >> + >> + bool host_state_need_save; >> + bool host_state_need_restore; > > this would save having a discrete member for those boolean checks. > >> + u64 msr_host_kernel_gs_base; >> }; >> void tdh_vp_rd_failed(struct vcpu_tdx *tdx, char *uclass, u32 field, u64 err); >> diff --git a/arch/x86/kvm/vmx/x86_ops.h b/arch/x86/kvm/vmx/x86_ops.h >> index 3d292a677b92..5bd45a720007 100644 >> --- a/arch/x86/kvm/vmx/x86_ops.h >> +++ b/arch/x86/kvm/vmx/x86_ops.h >> @@ -130,6 +130,8 @@ int tdx_vcpu_create(struct kvm_vcpu *vcpu); >> void tdx_vcpu_free(struct kvm_vcpu *vcpu); >> void tdx_vcpu_load(struct kvm_vcpu *vcpu, int cpu); >> fastpath_t tdx_vcpu_run(struct kvm_vcpu *vcpu, bool force_immediate_exit); >> +void tdx_prepare_switch_to_guest(struct kvm_vcpu *vcpu); >> +void tdx_vcpu_put(struct kvm_vcpu *vcpu); >> int tdx_vcpu_ioctl(struct kvm_vcpu *vcpu, void __user *argp); >> @@ -161,6 +163,8 @@ static inline fastpath_t tdx_vcpu_run(struct kvm_vcpu *vcpu, bool force_immediat >> { >> return EXIT_FASTPATH_NONE; >> } >> +static inline void tdx_prepare_switch_to_guest(struct kvm_vcpu *vcpu) {} >> +static inline void tdx_vcpu_put(struct kvm_vcpu *vcpu) {} >> static inline int tdx_vcpu_ioctl(struct kvm_vcpu *vcpu, void __user *argp) { return -EOPNOTSUPP; } >> >