From: Isaku Yamahata <isaku.yamahata@xxxxxxxxx> Wire up TDX PV HLT hypercall to the KVM backend function. Signed-off-by: Isaku Yamahata <isaku.yamahata@xxxxxxxxx> --- arch/x86/kvm/vmx/tdx.c | 42 +++++++++++++++++++++++++++++++++++++++++- arch/x86/kvm/vmx/tdx.h | 3 +++ 2 files changed, 44 insertions(+), 1 deletion(-) diff --git a/arch/x86/kvm/vmx/tdx.c b/arch/x86/kvm/vmx/tdx.c index 3a1fe74b95c3..4e48989d364f 100644 --- a/arch/x86/kvm/vmx/tdx.c +++ b/arch/x86/kvm/vmx/tdx.c @@ -662,7 +662,32 @@ void tdx_vcpu_load(struct kvm_vcpu *vcpu, int cpu) bool tdx_protected_apic_has_interrupt(struct kvm_vcpu *vcpu) { - return pi_has_pending_interrupt(vcpu); + bool ret = pi_has_pending_interrupt(vcpu); + struct vcpu_tdx *tdx = to_tdx(vcpu); + + if (ret || vcpu->arch.mp_state != KVM_MP_STATE_HALTED) + return true; + + if (tdx->interrupt_disabled_hlt) + return false; + + /* + * This is for the case where the virtual interrupt is recognized, + * i.e. set in vmcs.RVI, between the STI and "HLT". KVM doesn't have + * access to RVI and the interrupt is no longer in the PID (because it + * was "recognized". It doesn't get delivered in the guest because the + * TDCALL completes before interrupts are enabled. + * + * TDX modules sets RVI while in an STI interrupt shadow. + * - TDExit(typically TDG.VP.VMCALL<HLT>) from the guest to TDX module. + * The interrupt shadow at this point is gone. + * - It knows that there is an interrupt that can be delivered + * (RVI > PPR && EFLAGS.IF=1, the other conditions of 29.2.2 don't + * matter) + * - It forwards the TDExit nevertheless, to a clueless hypervisor that + * has no way to glean either RVI or PPR. + */ + return !!xchg(&tdx->buggy_hlt_workaround, 0); } void tdx_prepare_switch_to_guest(struct kvm_vcpu *vcpu) @@ -1104,6 +1129,17 @@ static int tdx_emulate_cpuid(struct kvm_vcpu *vcpu) return 1; } +static int tdx_emulate_hlt(struct kvm_vcpu *vcpu) +{ + struct vcpu_tdx *tdx = to_tdx(vcpu); + + /* See tdx_protected_apic_has_interrupt() to avoid heavy seamcall */ + tdx->interrupt_disabled_hlt = tdvmcall_a0_read(vcpu); + + tdvmcall_set_return_code(vcpu, TDG_VP_VMCALL_SUCCESS); + return kvm_emulate_halt_noskip(vcpu); +} + static int handle_tdvmcall(struct kvm_vcpu *vcpu) { if (tdvmcall_exit_type(vcpu)) @@ -1112,6 +1148,8 @@ static int handle_tdvmcall(struct kvm_vcpu *vcpu) switch (tdvmcall_leaf(vcpu)) { case EXIT_REASON_CPUID: return tdx_emulate_cpuid(vcpu); + case EXIT_REASON_HLT: + return tdx_emulate_hlt(vcpu); default: break; } @@ -1486,6 +1524,8 @@ void tdx_deliver_interrupt(struct kvm_lapic *apic, int delivery_mode, struct kvm_vcpu *vcpu = apic->vcpu; struct vcpu_tdx *tdx = to_tdx(vcpu); + /* See comment in tdx_protected_apic_has_interrupt(). */ + tdx->buggy_hlt_workaround = 1; /* TDX supports only posted interrupt. No lapic emulation. */ __vmx_deliver_posted_interrupt(vcpu, &tdx->pi_desc, vector); } diff --git a/arch/x86/kvm/vmx/tdx.h b/arch/x86/kvm/vmx/tdx.h index 870a0d15e073..4ddcf804c0a4 100644 --- a/arch/x86/kvm/vmx/tdx.h +++ b/arch/x86/kvm/vmx/tdx.h @@ -101,6 +101,9 @@ struct vcpu_tdx { bool host_state_need_restore; u64 msr_host_kernel_gs_base; + bool interrupt_disabled_hlt; + unsigned int buggy_hlt_workaround; + /* * Dummy to make pmu_intel not corrupt memory. * TODO: Support PMU for TDX. Future work. -- 2.25.1