Model inactive VTL vCPUs' behaviour with a new MP state. Inactive VTLs are in an artificial halt state. They enter into this state in response to invoking HvCallVtlCall, HvCallVtlReturn. User-space, which is VTL aware, can processes the hypercall, and set the vCPU in MP_STATE_HV_INACTIVE_VTL. When a vCPU is run in this state it'll block until a wakeup event is received. The rules of what constitutes an event are analogous to halt's except that VTL's ignore RFLAGS.IF. When a wakeup event is registered, KVM will exit to user-space with a KVM_SYSTEM_EVENT exit, and KVM_SYSTEM_EVENT_WAKEUP event type. User-space is responsible of deciding whether the event has precedence over the active VTL and will switch the vCPU to KVM_MP_STATE_RUNNABLE before resuming execution on it. Running a KVM_MP_STATE_HV_INACTIVE_VTL vCPU with pending events will return immediately to user-space. Note that by re-using the readily available halt infrastructure in KVM_RUN, MP_STATE_HV_INACTIVE_VTL correctly handles (or disables) virtualisation features like the VMX preemption timer or APICv before blocking. Suggested-by: Maxim Levitsky <mlevitsk@xxxxxxxxxx> Signed-off-by: Nicolas Saenz Julienne <nsaenz@xxxxxxxxxx> --- I do recall Sean mentioning using MP states for this might have unexpected side-effects. But it was in the context of introducing a broader `HALTED_USERSPACE` style state. I believe that by narrowing down the MP state's semantics to the specifics of inactive VTLs -- alternatively, we could change RFLAGS.IF in user-space before updating the mp state -- we cement this as a VSM-only API as well as limit the ambiguity on the guest/vCPU's state upon entering into this execution mode. Documentation/virt/kvm/api.rst | 19 +++++++++++++++++++ arch/x86/kvm/hyperv.h | 8 ++++++++ arch/x86/kvm/svm/svm.c | 7 ++++++- arch/x86/kvm/vmx/vmx.c | 7 ++++++- arch/x86/kvm/x86.c | 16 +++++++++++++++- include/uapi/linux/kvm.h | 1 + 6 files changed, 55 insertions(+), 3 deletions(-) diff --git a/Documentation/virt/kvm/api.rst b/Documentation/virt/kvm/api.rst index 17893b330b76f..e664c54a13b04 100644 --- a/Documentation/virt/kvm/api.rst +++ b/Documentation/virt/kvm/api.rst @@ -1517,6 +1517,8 @@ Possible values are: [s390] KVM_MP_STATE_SUSPENDED the vcpu is in a suspend state and is waiting for a wakeup event [arm64] + KVM_MP_STATE_HV_INACTIVE_VTL the vcpu is an inactive VTL and is waiting for + a wakeup event [x86] ========================== =============================================== On x86, this ioctl is only useful after KVM_CREATE_IRQCHIP. Without an @@ -1559,6 +1561,23 @@ KVM_MP_STATE_RUNNABLE which reflect if the vcpu is paused or not. On LoongArch, only the KVM_MP_STATE_RUNNABLE state is used to reflect whether the vcpu is runnable. +For x86: +^^^^^^^^ + +KVM_MP_STATE_HV_INACTIVE_VTL is only available to a VM if Hyper-V's +HV_ACCESS_VSM CPUID is exposed to the guest. This processor state models the +behavior of an inactive VTL and should only be used for this purpose. A +userspace process should only switch a vCPU into this MP state in response to a +HvCallVtlCall, HvCallVtlReturn. + +If a vCPU is in KVM_MP_STATE_HV_INACTIVE_VTL, KVM will emulate the +architectural execution of a HLT instruction with the caveat that RFLAGS.IF is +ignored when deciding whether to wake up (TLFS 12.12.2.1). If a wakeup is +recognized, KVM will exit to userspace with a KVM_SYSTEM_EVENT exit, where the +event type is KVM_SYSTEM_EVENT_WAKEUP. Userspace has the responsibility to +switch the vCPU back into KVM_MP_STATE_RUNNABLE state. Calling KVM_RUN on a +KVM_MP_STATE_HV_INACTIVE_VTL vCPU with pending events will exit immediately. + 4.39 KVM_SET_MP_STATE --------------------- diff --git a/arch/x86/kvm/hyperv.h b/arch/x86/kvm/hyperv.h index d007d2203e0e4..d42fe3f85b002 100644 --- a/arch/x86/kvm/hyperv.h +++ b/arch/x86/kvm/hyperv.h @@ -271,6 +271,10 @@ static inline bool kvm_hv_cpuid_vsm_enabled(struct kvm_vcpu *vcpu) return hv_vcpu && (hv_vcpu->cpuid_cache.features_ebx & HV_ACCESS_VSM); } +static inline bool kvm_hv_vcpu_is_idle_vtl(struct kvm_vcpu *vcpu) +{ + return vcpu->arch.mp_state == KVM_MP_STATE_HV_INACTIVE_VTL; +} #else /* CONFIG_KVM_HYPERV */ static inline void kvm_hv_setup_tsc_page(struct kvm *kvm, struct pvclock_vcpu_time_info *hv_clock) {} @@ -332,6 +336,10 @@ static inline bool kvm_hv_cpuid_vsm_enabled(struct kvm_vcpu *vcpu) { return false; } +static inline bool kvm_hv_vcpu_is_idle_vtl(struct kvm_vcpu *vcpu) +{ + return false; +} #endif /* CONFIG_KVM_HYPERV */ #endif /* __ARCH_X86_KVM_HYPERV_H__ */ diff --git a/arch/x86/kvm/svm/svm.c b/arch/x86/kvm/svm/svm.c index 296c524988f95..9671191fef4ea 100644 --- a/arch/x86/kvm/svm/svm.c +++ b/arch/x86/kvm/svm/svm.c @@ -49,6 +49,7 @@ #include "svm.h" #include "svm_ops.h" +#include "hyperv.h" #include "kvm_onhyperv.h" #include "svm_onhyperv.h" @@ -3797,6 +3798,10 @@ bool svm_interrupt_blocked(struct kvm_vcpu *vcpu) if (!gif_set(svm)) return true; + /* + * The Hyper-V TLFS states that RFLAGS.IF is ignored when deciding + * whether to block interrupts targeted at inactive VTLs. + */ if (is_guest_mode(vcpu)) { /* As long as interrupts are being delivered... */ if ((svm->nested.ctl.int_ctl & V_INTR_MASKING_MASK) @@ -3808,7 +3813,7 @@ bool svm_interrupt_blocked(struct kvm_vcpu *vcpu) if (nested_exit_on_intr(svm)) return false; } else { - if (!svm_get_if_flag(vcpu)) + if (!svm_get_if_flag(vcpu) && !kvm_hv_vcpu_is_idle_vtl(vcpu)) return true; } diff --git a/arch/x86/kvm/vmx/vmx.c b/arch/x86/kvm/vmx/vmx.c index b3c83c06f8265..ac0682fece604 100644 --- a/arch/x86/kvm/vmx/vmx.c +++ b/arch/x86/kvm/vmx/vmx.c @@ -5057,7 +5057,12 @@ bool vmx_interrupt_blocked(struct kvm_vcpu *vcpu) if (is_guest_mode(vcpu) && nested_exit_on_intr(vcpu)) return false; - return !(vmx_get_rflags(vcpu) & X86_EFLAGS_IF) || + /* + * The Hyper-V TLFS states that RFLAGS.IF is ignored when deciding + * whether to block interrupts targeted at inactive VTLs. + */ + return (!(vmx_get_rflags(vcpu) & X86_EFLAGS_IF) && + !kvm_hv_vcpu_is_idle_vtl(vcpu)) || (vmcs_read32(GUEST_INTERRUPTIBILITY_INFO) & (GUEST_INTR_STATE_STI | GUEST_INTR_STATE_MOV_SS)); } diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c index 8c9e4281d978d..a6e2312ccb68f 100644 --- a/arch/x86/kvm/x86.c +++ b/arch/x86/kvm/x86.c @@ -134,6 +134,7 @@ static int kvm_vcpu_do_singlestep(struct kvm_vcpu *vcpu); static int __set_sregs2(struct kvm_vcpu *vcpu, struct kvm_sregs2 *sregs2); static void __get_sregs2(struct kvm_vcpu *vcpu, struct kvm_sregs2 *sregs2); +static inline bool kvm_vcpu_has_events(struct kvm_vcpu *vcpu); static DEFINE_MUTEX(vendor_module_lock); struct kvm_x86_ops kvm_x86_ops __read_mostly; @@ -11176,7 +11177,8 @@ static inline int vcpu_block(struct kvm_vcpu *vcpu) kvm_lapic_switch_to_sw_timer(vcpu); kvm_vcpu_srcu_read_unlock(vcpu); - if (vcpu->arch.mp_state == KVM_MP_STATE_HALTED) + if (vcpu->arch.mp_state == KVM_MP_STATE_HALTED || + kvm_hv_vcpu_is_idle_vtl(vcpu)) kvm_vcpu_halt(vcpu); else kvm_vcpu_block(vcpu); @@ -11218,6 +11220,7 @@ static inline int vcpu_block(struct kvm_vcpu *vcpu) vcpu->arch.apf.halted = false; break; case KVM_MP_STATE_INIT_RECEIVED: + case KVM_MP_STATE_HV_INACTIVE_VTL: break; default: WARN_ON_ONCE(1); @@ -11264,6 +11267,13 @@ static int vcpu_run(struct kvm_vcpu *vcpu) if (kvm_cpu_has_pending_timer(vcpu)) kvm_inject_pending_timer_irqs(vcpu); + if (kvm_hv_vcpu_is_idle_vtl(vcpu) && kvm_vcpu_has_events(vcpu)) { + r = 0; + vcpu->run->exit_reason = KVM_EXIT_SYSTEM_EVENT; + vcpu->run->system_event.type = KVM_SYSTEM_EVENT_WAKEUP; + break; + } + if (dm_request_for_irq_injection(vcpu) && kvm_vcpu_ready_for_interrupt_injection(vcpu)) { r = 0; @@ -11703,6 +11713,10 @@ int kvm_arch_vcpu_ioctl_set_mpstate(struct kvm_vcpu *vcpu, goto out; break; + case KVM_MP_STATE_HV_INACTIVE_VTL: + if (is_guest_mode(vcpu) || !kvm_hv_cpuid_vsm_enabled(vcpu)) + goto out; + break; case KVM_MP_STATE_RUNNABLE: break; diff --git a/include/uapi/linux/kvm.h b/include/uapi/linux/kvm.h index fbdee8d754595..f4864e6907e0b 100644 --- a/include/uapi/linux/kvm.h +++ b/include/uapi/linux/kvm.h @@ -564,6 +564,7 @@ struct kvm_vapic_addr { #define KVM_MP_STATE_LOAD 8 #define KVM_MP_STATE_AP_RESET_HOLD 9 #define KVM_MP_STATE_SUSPENDED 10 +#define KVM_MP_STATE_HV_INACTIVE_VTL 11 struct kvm_mp_state { __u32 mp_state; -- 2.40.1