On 20/05/2016 03:45, Yunhong Jiang wrote: > From: Yunhong Jiang <yunhong.jiang@xxxxxxxxx> > > Adding the basic VMX preemption timer functionality, including checking > if the feature is supported, setup/clean the VMX preemption timer. Also > adds a parameter to state if the VMX preemption timer should be utilized. > > Signed-off-by: Yunhong Jiang <yunhong.jiang@xxxxxxxxx> > --- > arch/x86/include/asm/kvm_host.h | 4 ++++ > arch/x86/kvm/lapic.c | 7 +++++++ > arch/x86/kvm/vmx.c | 45 ++++++++++++++++++++++++++++++++++++++++- > 3 files changed, 55 insertions(+), 1 deletion(-) > > diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h > index 5e6b3ce7748f..8e58db20b3a4 100644 > --- a/arch/x86/include/asm/kvm_host.h > +++ b/arch/x86/include/asm/kvm_host.h > @@ -1006,6 +1006,10 @@ struct kvm_x86_ops { > int (*update_pi_irte)(struct kvm *kvm, unsigned int host_irq, > uint32_t guest_irq, bool set); > void (*apicv_post_state_restore)(struct kvm_vcpu *vcpu); > + > + int (*hw_emul_timer)(struct kvm_vcpu *vcpu); > + void (*set_hwemul_timer)(struct kvm_vcpu *vcpu, u64 tsc); > + void (*clear_hwemul_timer)(struct kvm_vcpu *vcpu); > }; > > struct kvm_arch_async_pf { > diff --git a/arch/x86/kvm/lapic.c b/arch/x86/kvm/lapic.c > index bbb5b283ff63..8908ee514f6c 100644 > --- a/arch/x86/kvm/lapic.c > +++ b/arch/x86/kvm/lapic.c > @@ -256,6 +256,13 @@ static inline int apic_lvtt_tscdeadline(struct kvm_lapic *apic) > return apic->lapic_timer.timer_mode == APIC_LVT_TIMER_TSCDEADLINE; > } > > +static inline int hw_emul_timer(struct kvm_lapic *apic) > +{ > + if (kvm_x86_ops->hw_emul_timer) > + return kvm_x86_ops->hw_emul_timer(apic->vcpu); > + return 0; > +} > + > static inline int apic_lvt_nmi_mode(u32 lvt_val) > { > return (lvt_val & (APIC_MODE_MASK | APIC_LVT_MASKED)) == APIC_DM_NMI; > diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c > index 9e078ff29f86..5475a7699ee5 100644 > --- a/arch/x86/kvm/vmx.c > +++ b/arch/x86/kvm/vmx.c > @@ -110,6 +110,9 @@ module_param_named(pml, enable_pml, bool, S_IRUGO); > > #define KVM_VMX_TSC_MULTIPLIER_MAX 0xffffffffffffffffULL > > +static bool __read_mostly hwemul_timer; > +module_param_named(hwemul_timer, hwemul_timer, bool, S_IRUGO); > + > #define KVM_GUEST_CR0_MASK (X86_CR0_NW | X86_CR0_CD) > #define KVM_VM_CR0_ALWAYS_ON_UNRESTRICTED_GUEST (X86_CR0_WP | X86_CR0_NE) > #define KVM_VM_CR0_ALWAYS_ON \ > @@ -1056,6 +1059,20 @@ static inline bool cpu_has_vmx_virtual_intr_delivery(void) > SECONDARY_EXEC_VIRTUAL_INTR_DELIVERY; > } > > +static inline bool cpu_has_preemption_timer(void) > +{ > + return vmcs_config.pin_based_exec_ctrl & > + PIN_BASED_VMX_PREEMPTION_TIMER; > +} > + > +static inline int cpu_preemption_timer_multi(void) > +{ > + u64 vmx_msr; > + > + rdmsrl(MSR_IA32_VMX_MISC, vmx_msr); > + return vmx_msr & VMX_MISC_PREEMPTION_TIMER_RATE_MASK; > +} > + > static inline bool cpu_has_vmx_posted_intr(void) > { > return IS_ENABLED(CONFIG_X86_LOCAL_APIC) && > @@ -3306,7 +3323,8 @@ static __init int setup_vmcs_config(struct vmcs_config *vmcs_conf) > return -EIO; > > min = PIN_BASED_EXT_INTR_MASK | PIN_BASED_NMI_EXITING; > - opt = PIN_BASED_VIRTUAL_NMIS | PIN_BASED_POSTED_INTR; > + opt = PIN_BASED_VIRTUAL_NMIS | PIN_BASED_POSTED_INTR | > + PIN_BASED_VMX_PREEMPTION_TIMER; > if (adjust_vmx_controls(min, opt, MSR_IA32_VMX_PINBASED_CTLS, > &_pin_based_exec_control) < 0) > return -EIO; > @@ -4779,6 +4797,8 @@ static u32 vmx_pin_based_exec_ctrl(struct vcpu_vmx *vmx) > > if (!kvm_vcpu_apicv_active(&vmx->vcpu)) > pin_based_exec_ctrl &= ~PIN_BASED_POSTED_INTR; > + /* Enable the preemption timer dynamically */ > + pin_based_exec_ctrl &= ~PIN_BASED_VMX_PREEMPTION_TIMER; > return pin_based_exec_ctrl; > } > > @@ -10650,6 +10670,25 @@ static int vmx_check_intercept(struct kvm_vcpu *vcpu, > return X86EMUL_CONTINUE; > } > > +static int vmx_hwemul_timer(struct kvm_vcpu *vcpu) > +{ > + return hwemul_timer && cpu_has_preemption_timer(); > +} Please clear the vmx_x86_ops members instead if the preemption timer is not usable. Then you can check kvm_x86_ops->set_hwemul_timer and kvm_x86_ops->clear_hwemul_timer instead of calling this function. For what it's worth, I prefer "vmx_{set,cancel}_hv_timer" instead. > +static void vmx_set_hwemul_timer(struct kvm_vcpu *vcpu, u64 target_tsc) This is not a target_tsc, it is a delta_tsc. > +{ > + vmcs_write32(VMX_PREEMPTION_TIMER_VALUE, > + target_tsc >> cpu_preemption_timer_multi()); Please cache the value of cpu_preemption_timer_multi(); rdmsr is slow. Thanks, Paolo > + vmcs_set_bits(PIN_BASED_VM_EXEC_CONTROL, > + PIN_BASED_VMX_PREEMPTION_TIMER); > +} > + > +static void vmx_clear_hwemul_timer(struct kvm_vcpu *vcpu) > +{ > + vmcs_clear_bits(PIN_BASED_VM_EXEC_CONTROL, > + PIN_BASED_VMX_PREEMPTION_TIMER); > +} > + > static void vmx_sched_in(struct kvm_vcpu *vcpu, int cpu) > { > if (ple_gap) > @@ -11018,6 +11057,10 @@ static struct kvm_x86_ops vmx_x86_ops = { > .pmu_ops = &intel_pmu_ops, > > .update_pi_irte = vmx_update_pi_irte, > + > + .hw_emul_timer = vmx_hwemul_timer, > + .set_hwemul_timer = vmx_set_hwemul_timer, > + .clear_hwemul_timer = vmx_clear_hwemul_timer, > }; > > static int __init vmx_init(void) > -- To unsubscribe from this list: send the line "unsubscribe kvm" in the body of a message to majordomo@xxxxxxxxxxxxxxx More majordomo info at http://vger.kernel.org/majordomo-info.html