Make the runtime disabled mwait/hlt/pause/cstate exits flags vCPU scope to allow finer-grained, per-vCPU control. The VM-scoped control is only allowed before vCPUs are created, thus preserving the existing behavior is a simple matter of snapshotting the flags at vCPU creation. Signed-off-by: Kechen Lu <kechenl@xxxxxxxxxx> Suggested-by: Sean Christopherson <seanjc@xxxxxxxxxx> Reviewed-by: Sean Christopherson <seanjc@xxxxxxxxxx> --- arch/x86/include/asm/kvm_host.h | 5 +++++ arch/x86/kvm/cpuid.c | 4 ++-- arch/x86/kvm/lapic.c | 7 +++---- arch/x86/kvm/svm/nested.c | 4 ++-- arch/x86/kvm/svm/svm.c | 12 ++++++------ arch/x86/kvm/vmx/vmx.c | 16 ++++++++-------- arch/x86/kvm/x86.c | 6 +++++- arch/x86/kvm/x86.h | 16 ++++++++-------- 8 files changed, 39 insertions(+), 31 deletions(-) diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h index 9217bd6cf0d1..573a39bf7a84 100644 --- a/arch/x86/include/asm/kvm_host.h +++ b/arch/x86/include/asm/kvm_host.h @@ -924,6 +924,11 @@ struct kvm_vcpu_arch { #if IS_ENABLED(CONFIG_HYPERV) hpa_t hv_root_tdp; #endif + + bool mwait_in_guest; + bool hlt_in_guest; + bool pause_in_guest; + bool cstate_in_guest; }; struct kvm_lpage_info { diff --git a/arch/x86/kvm/cpuid.c b/arch/x86/kvm/cpuid.c index de6d44e07e34..f013ff4f49c5 100644 --- a/arch/x86/kvm/cpuid.c +++ b/arch/x86/kvm/cpuid.c @@ -245,8 +245,8 @@ static void __kvm_update_cpuid_runtime(struct kvm_vcpu *vcpu, struct kvm_cpuid_e best->ebx = xstate_required_size(vcpu->arch.xcr0, true); best = __kvm_find_kvm_cpuid_features(vcpu, entries, nent); - if (kvm_hlt_in_guest(vcpu->kvm) && best && - (best->eax & (1 << KVM_FEATURE_PV_UNHALT))) + if (kvm_hlt_in_guest(vcpu) && + best && (best->eax & (1 << KVM_FEATURE_PV_UNHALT))) best->eax &= ~(1 << KVM_FEATURE_PV_UNHALT); if (!kvm_check_has_quirk(vcpu->kvm, KVM_X86_QUIRK_MISC_ENABLE_NO_MWAIT)) { diff --git a/arch/x86/kvm/lapic.c b/arch/x86/kvm/lapic.c index 0e68b4c937fc..9e29d658a8c2 100644 --- a/arch/x86/kvm/lapic.c +++ b/arch/x86/kvm/lapic.c @@ -147,14 +147,13 @@ static inline u32 kvm_x2apic_id(struct kvm_lapic *apic) static bool kvm_can_post_timer_interrupt(struct kvm_vcpu *vcpu) { return pi_inject_timer && kvm_vcpu_apicv_active(vcpu) && - (kvm_mwait_in_guest(vcpu->kvm) || kvm_hlt_in_guest(vcpu->kvm)); + (kvm_mwait_in_guest(vcpu) || kvm_hlt_in_guest(vcpu)); } bool kvm_can_use_hv_timer(struct kvm_vcpu *vcpu) { - return kvm_x86_ops.set_hv_timer - && !(kvm_mwait_in_guest(vcpu->kvm) || - kvm_can_post_timer_interrupt(vcpu)); + return kvm_x86_ops.set_hv_timer && + !(kvm_mwait_in_guest(vcpu) || kvm_can_post_timer_interrupt(vcpu)); } EXPORT_SYMBOL_GPL(kvm_can_use_hv_timer); diff --git a/arch/x86/kvm/svm/nested.c b/arch/x86/kvm/svm/nested.c index ba7cd26f438f..f143ec757467 100644 --- a/arch/x86/kvm/svm/nested.c +++ b/arch/x86/kvm/svm/nested.c @@ -675,7 +675,7 @@ static void nested_vmcb02_prepare_control(struct vcpu_svm *svm) pause_count12 = svm->pause_filter_enabled ? svm->nested.ctl.pause_filter_count : 0; pause_thresh12 = svm->pause_threshold_enabled ? svm->nested.ctl.pause_filter_thresh : 0; - if (kvm_pause_in_guest(svm->vcpu.kvm)) { + if (kvm_pause_in_guest(&svm->vcpu)) { /* use guest values since host doesn't intercept PAUSE */ vmcb02->control.pause_filter_count = pause_count12; vmcb02->control.pause_filter_thresh = pause_thresh12; @@ -951,7 +951,7 @@ int nested_svm_vmexit(struct vcpu_svm *svm) vmcb12->control.event_inj = svm->nested.ctl.event_inj; vmcb12->control.event_inj_err = svm->nested.ctl.event_inj_err; - if (!kvm_pause_in_guest(vcpu->kvm)) { + if (!kvm_pause_in_guest(vcpu)) { vmcb01->control.pause_filter_count = vmcb02->control.pause_filter_count; vmcb_mark_dirty(vmcb01, VMCB_INTERCEPTS); diff --git a/arch/x86/kvm/svm/svm.c b/arch/x86/kvm/svm/svm.c index 87da90360bc7..b32987f54ace 100644 --- a/arch/x86/kvm/svm/svm.c +++ b/arch/x86/kvm/svm/svm.c @@ -921,7 +921,7 @@ static void grow_ple_window(struct kvm_vcpu *vcpu) struct vmcb_control_area *control = &svm->vmcb->control; int old = control->pause_filter_count; - if (kvm_pause_in_guest(vcpu->kvm)) + if (kvm_pause_in_guest(vcpu)) return; control->pause_filter_count = __grow_ple_window(old, @@ -942,7 +942,7 @@ static void shrink_ple_window(struct kvm_vcpu *vcpu) struct vmcb_control_area *control = &svm->vmcb->control; int old = control->pause_filter_count; - if (kvm_pause_in_guest(vcpu->kvm)) + if (kvm_pause_in_guest(vcpu)) return; control->pause_filter_count = @@ -1136,12 +1136,12 @@ static void init_vmcb(struct kvm_vcpu *vcpu) svm_set_intercept(svm, INTERCEPT_RDPRU); svm_set_intercept(svm, INTERCEPT_RSM); - if (!kvm_mwait_in_guest(vcpu->kvm)) { + if (!kvm_mwait_in_guest(vcpu)) { svm_set_intercept(svm, INTERCEPT_MONITOR); svm_set_intercept(svm, INTERCEPT_MWAIT); } - if (!kvm_hlt_in_guest(vcpu->kvm)) + if (!kvm_hlt_in_guest(vcpu)) svm_set_intercept(svm, INTERCEPT_HLT); control->iopm_base_pa = __sme_set(iopm_base); @@ -1185,7 +1185,7 @@ static void init_vmcb(struct kvm_vcpu *vcpu) svm->nested.vmcb12_gpa = INVALID_GPA; svm->nested.last_vmcb12_gpa = INVALID_GPA; - if (!kvm_pause_in_guest(vcpu->kvm)) { + if (!kvm_pause_in_guest(vcpu)) { control->pause_filter_count = pause_filter_count; if (pause_filter_thresh) control->pause_filter_thresh = pause_filter_thresh; @@ -4269,7 +4269,7 @@ static void svm_handle_exit_irqoff(struct kvm_vcpu *vcpu) static void svm_sched_in(struct kvm_vcpu *vcpu, int cpu) { - if (!kvm_pause_in_guest(vcpu->kvm)) + if (!kvm_pause_in_guest(vcpu)) shrink_ple_window(vcpu); } diff --git a/arch/x86/kvm/vmx/vmx.c b/arch/x86/kvm/vmx/vmx.c index 553dd2317b9c..f24c9a357f70 100644 --- a/arch/x86/kvm/vmx/vmx.c +++ b/arch/x86/kvm/vmx/vmx.c @@ -1597,7 +1597,7 @@ static void vmx_clear_hlt(struct kvm_vcpu *vcpu) * then the instruction is already executing and RIP has already been * advanced. */ - if (kvm_hlt_in_guest(vcpu->kvm) && + if (kvm_hlt_in_guest(vcpu) && vmcs_read32(GUEST_ACTIVITY_STATE) == GUEST_ACTIVITY_HLT) vmcs_write32(GUEST_ACTIVITY_STATE, GUEST_ACTIVITY_ACTIVE); } @@ -4212,10 +4212,10 @@ static u32 vmx_exec_control(struct vcpu_vmx *vmx) exec_control |= CPU_BASED_CR3_STORE_EXITING | CPU_BASED_CR3_LOAD_EXITING | CPU_BASED_INVLPG_EXITING; - if (kvm_mwait_in_guest(vmx->vcpu.kvm)) + if (kvm_mwait_in_guest(&vmx->vcpu)) exec_control &= ~(CPU_BASED_MWAIT_EXITING | CPU_BASED_MONITOR_EXITING); - if (kvm_hlt_in_guest(vmx->vcpu.kvm)) + if (kvm_hlt_in_guest(&vmx->vcpu)) exec_control &= ~CPU_BASED_HLT_EXITING; return exec_control; } @@ -4294,7 +4294,7 @@ static u32 vmx_secondary_exec_control(struct vcpu_vmx *vmx) } if (!enable_unrestricted_guest) exec_control &= ~SECONDARY_EXEC_UNRESTRICTED_GUEST; - if (kvm_pause_in_guest(vmx->vcpu.kvm)) + if (kvm_pause_in_guest(&vmx->vcpu)) exec_control &= ~SECONDARY_EXEC_PAUSE_LOOP_EXITING; if (!kvm_vcpu_apicv_active(vcpu)) exec_control &= ~(SECONDARY_EXEC_APIC_REGISTER_VIRT | @@ -4397,7 +4397,7 @@ static void init_vmcs(struct vcpu_vmx *vmx) vmcs_write64(POSTED_INTR_DESC_ADDR, __pa((&vmx->pi_desc))); } - if (!kvm_pause_in_guest(vmx->vcpu.kvm)) { + if (!kvm_pause_in_guest(&vmx->vcpu)) { vmcs_write32(PLE_GAP, ple_gap); vmx->ple_window = ple_window; vmx->ple_window_dirty = true; @@ -5562,7 +5562,7 @@ static void shrink_ple_window(struct kvm_vcpu *vcpu) */ static int handle_pause(struct kvm_vcpu *vcpu) { - if (!kvm_pause_in_guest(vcpu->kvm)) + if (!kvm_pause_in_guest(vcpu)) grow_ple_window(vcpu); /* @@ -7059,7 +7059,7 @@ static int vmx_vcpu_create(struct kvm_vcpu *vcpu) vmx_disable_intercept_for_msr(vcpu, MSR_IA32_SYSENTER_CS, MSR_TYPE_RW); vmx_disable_intercept_for_msr(vcpu, MSR_IA32_SYSENTER_ESP, MSR_TYPE_RW); vmx_disable_intercept_for_msr(vcpu, MSR_IA32_SYSENTER_EIP, MSR_TYPE_RW); - if (kvm_cstate_in_guest(vcpu->kvm)) { + if (kvm_cstate_in_guest(vcpu)) { vmx_disable_intercept_for_msr(vcpu, MSR_CORE_C1_RES, MSR_TYPE_R); vmx_disable_intercept_for_msr(vcpu, MSR_CORE_C3_RESIDENCY, MSR_TYPE_R); vmx_disable_intercept_for_msr(vcpu, MSR_CORE_C6_RESIDENCY, MSR_TYPE_R); @@ -7597,7 +7597,7 @@ static void vmx_cancel_hv_timer(struct kvm_vcpu *vcpu) static void vmx_sched_in(struct kvm_vcpu *vcpu, int cpu) { - if (!kvm_pause_in_guest(vcpu->kvm)) + if (!kvm_pause_in_guest(vcpu)) shrink_ple_window(vcpu); } diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c index 3ac6329e6d43..b419b258ed90 100644 --- a/arch/x86/kvm/x86.c +++ b/arch/x86/kvm/x86.c @@ -11355,6 +11355,10 @@ int kvm_arch_vcpu_create(struct kvm_vcpu *vcpu) #if IS_ENABLED(CONFIG_HYPERV) vcpu->arch.hv_root_tdp = INVALID_PAGE; #endif + vcpu->arch.mwait_in_guest = vcpu->kvm->arch.mwait_in_guest; + vcpu->arch.hlt_in_guest = vcpu->kvm->arch.hlt_in_guest; + vcpu->arch.pause_in_guest = vcpu->kvm->arch.pause_in_guest; + vcpu->arch.cstate_in_guest = vcpu->kvm->arch.cstate_in_guest; r = static_call(kvm_x86_vcpu_create)(vcpu); if (r) @@ -12539,7 +12543,7 @@ bool kvm_can_do_async_pf(struct kvm_vcpu *vcpu) vcpu->arch.exception.pending)) return false; - if (kvm_hlt_in_guest(vcpu->kvm) && !kvm_can_deliver_async_pf(vcpu)) + if (kvm_hlt_in_guest(vcpu) && !kvm_can_deliver_async_pf(vcpu)) return false; /* diff --git a/arch/x86/kvm/x86.h b/arch/x86/kvm/x86.h index 588792f00334..a59b73e11726 100644 --- a/arch/x86/kvm/x86.h +++ b/arch/x86/kvm/x86.h @@ -324,24 +324,24 @@ static inline u64 nsec_to_cycles(struct kvm_vcpu *vcpu, u64 nsec) __rem; \ }) -static inline bool kvm_mwait_in_guest(struct kvm *kvm) +static inline bool kvm_mwait_in_guest(struct kvm_vcpu *vcpu) { - return kvm->arch.mwait_in_guest; + return vcpu->arch.mwait_in_guest; } -static inline bool kvm_hlt_in_guest(struct kvm *kvm) +static inline bool kvm_hlt_in_guest(struct kvm_vcpu *vcpu) { - return kvm->arch.hlt_in_guest; + return vcpu->arch.hlt_in_guest; } -static inline bool kvm_pause_in_guest(struct kvm *kvm) +static inline bool kvm_pause_in_guest(struct kvm_vcpu *vcpu) { - return kvm->arch.pause_in_guest; + return vcpu->arch.pause_in_guest; } -static inline bool kvm_cstate_in_guest(struct kvm *kvm) +static inline bool kvm_cstate_in_guest(struct kvm_vcpu *vcpu) { - return kvm->arch.cstate_in_guest; + return vcpu->arch.cstate_in_guest; } enum kvm_intr_type { -- 2.32.0