From: Yunhong Jiang <yunhong.jiang@xxxxxxxxx> Adding the basic VMX preemption timer functionality, including checking if the feature is supported, setup/clean the VMX preemption timer. Also adds a parameter to state if the VMX preemption timer should be utilized. Signed-off-by: Yunhong Jiang <yunhong.jiang@xxxxxxxxx> --- arch/x86/include/asm/kvm_host.h | 4 ++++ arch/x86/kvm/lapic.c | 7 +++++++ arch/x86/kvm/vmx.c | 45 ++++++++++++++++++++++++++++++++++++++++- 3 files changed, 55 insertions(+), 1 deletion(-) diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h index 5e6b3ce7748f..8e58db20b3a4 100644 --- a/arch/x86/include/asm/kvm_host.h +++ b/arch/x86/include/asm/kvm_host.h @@ -1006,6 +1006,10 @@ struct kvm_x86_ops { int (*update_pi_irte)(struct kvm *kvm, unsigned int host_irq, uint32_t guest_irq, bool set); void (*apicv_post_state_restore)(struct kvm_vcpu *vcpu); + + int (*hw_emul_timer)(struct kvm_vcpu *vcpu); + void (*set_hwemul_timer)(struct kvm_vcpu *vcpu, u64 tsc); + void (*clear_hwemul_timer)(struct kvm_vcpu *vcpu); }; struct kvm_arch_async_pf { diff --git a/arch/x86/kvm/lapic.c b/arch/x86/kvm/lapic.c index bbb5b283ff63..8908ee514f6c 100644 --- a/arch/x86/kvm/lapic.c +++ b/arch/x86/kvm/lapic.c @@ -256,6 +256,13 @@ static inline int apic_lvtt_tscdeadline(struct kvm_lapic *apic) return apic->lapic_timer.timer_mode == APIC_LVT_TIMER_TSCDEADLINE; } +static inline int hw_emul_timer(struct kvm_lapic *apic) +{ + if (kvm_x86_ops->hw_emul_timer) + return kvm_x86_ops->hw_emul_timer(apic->vcpu); + return 0; +} + static inline int apic_lvt_nmi_mode(u32 lvt_val) { return (lvt_val & (APIC_MODE_MASK | APIC_LVT_MASKED)) == APIC_DM_NMI; diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c index 9e078ff29f86..5475a7699ee5 100644 --- a/arch/x86/kvm/vmx.c +++ b/arch/x86/kvm/vmx.c @@ -110,6 +110,9 @@ module_param_named(pml, enable_pml, bool, S_IRUGO); #define KVM_VMX_TSC_MULTIPLIER_MAX 0xffffffffffffffffULL +static bool __read_mostly hwemul_timer; +module_param_named(hwemul_timer, hwemul_timer, bool, S_IRUGO); + #define KVM_GUEST_CR0_MASK (X86_CR0_NW | X86_CR0_CD) #define KVM_VM_CR0_ALWAYS_ON_UNRESTRICTED_GUEST (X86_CR0_WP | X86_CR0_NE) #define KVM_VM_CR0_ALWAYS_ON \ @@ -1056,6 +1059,20 @@ static inline bool cpu_has_vmx_virtual_intr_delivery(void) SECONDARY_EXEC_VIRTUAL_INTR_DELIVERY; } +static inline bool cpu_has_preemption_timer(void) +{ + return vmcs_config.pin_based_exec_ctrl & + PIN_BASED_VMX_PREEMPTION_TIMER; +} + +static inline int cpu_preemption_timer_multi(void) +{ + u64 vmx_msr; + + rdmsrl(MSR_IA32_VMX_MISC, vmx_msr); + return vmx_msr & VMX_MISC_PREEMPTION_TIMER_RATE_MASK; +} + static inline bool cpu_has_vmx_posted_intr(void) { return IS_ENABLED(CONFIG_X86_LOCAL_APIC) && @@ -3306,7 +3323,8 @@ static __init int setup_vmcs_config(struct vmcs_config *vmcs_conf) return -EIO; min = PIN_BASED_EXT_INTR_MASK | PIN_BASED_NMI_EXITING; - opt = PIN_BASED_VIRTUAL_NMIS | PIN_BASED_POSTED_INTR; + opt = PIN_BASED_VIRTUAL_NMIS | PIN_BASED_POSTED_INTR | + PIN_BASED_VMX_PREEMPTION_TIMER; if (adjust_vmx_controls(min, opt, MSR_IA32_VMX_PINBASED_CTLS, &_pin_based_exec_control) < 0) return -EIO; @@ -4779,6 +4797,8 @@ static u32 vmx_pin_based_exec_ctrl(struct vcpu_vmx *vmx) if (!kvm_vcpu_apicv_active(&vmx->vcpu)) pin_based_exec_ctrl &= ~PIN_BASED_POSTED_INTR; + /* Enable the preemption timer dynamically */ + pin_based_exec_ctrl &= ~PIN_BASED_VMX_PREEMPTION_TIMER; return pin_based_exec_ctrl; } @@ -10650,6 +10670,25 @@ static int vmx_check_intercept(struct kvm_vcpu *vcpu, return X86EMUL_CONTINUE; } +static int vmx_hwemul_timer(struct kvm_vcpu *vcpu) +{ + return hwemul_timer && cpu_has_preemption_timer(); +} + +static void vmx_set_hwemul_timer(struct kvm_vcpu *vcpu, u64 target_tsc) +{ + vmcs_write32(VMX_PREEMPTION_TIMER_VALUE, + target_tsc >> cpu_preemption_timer_multi()); + vmcs_set_bits(PIN_BASED_VM_EXEC_CONTROL, + PIN_BASED_VMX_PREEMPTION_TIMER); +} + +static void vmx_clear_hwemul_timer(struct kvm_vcpu *vcpu) +{ + vmcs_clear_bits(PIN_BASED_VM_EXEC_CONTROL, + PIN_BASED_VMX_PREEMPTION_TIMER); +} + static void vmx_sched_in(struct kvm_vcpu *vcpu, int cpu) { if (ple_gap) @@ -11018,6 +11057,10 @@ static struct kvm_x86_ops vmx_x86_ops = { .pmu_ops = &intel_pmu_ops, .update_pi_irte = vmx_update_pi_irte, + + .hw_emul_timer = vmx_hwemul_timer, + .set_hwemul_timer = vmx_set_hwemul_timer, + .clear_hwemul_timer = vmx_clear_hwemul_timer, }; static int __init vmx_init(void) -- 1.8.3.1 -- To unsubscribe from this list: send the line "unsubscribe kvm" in the body of a message to majordomo@xxxxxxxxxxxxxxx More majordomo info at http://vger.kernel.org/majordomo-info.html