Re: [RFC PATCH 2/5] Utilize the vmx preemption timer

Paolo Bonzini <pbonzini@xxxxxxxxxx> · Fri, 20 May 2016 11:45:56 +0200

On 20/05/2016 03:45, Yunhong Jiang wrote:
> From: Yunhong Jiang <yunhong.jiang@xxxxxxxxx>
> 
> Adding the basic VMX preemption timer functionality, including checking
> if the feature is supported, setup/clean the VMX preemption timer. Also
> adds a parameter to state if the VMX preemption timer should be utilized.
> 
> Signed-off-by: Yunhong Jiang <yunhong.jiang@xxxxxxxxx>
> ---
>  arch/x86/include/asm/kvm_host.h |  4 ++++
>  arch/x86/kvm/lapic.c            |  7 +++++++
>  arch/x86/kvm/vmx.c              | 45 ++++++++++++++++++++++++++++++++++++++++-
>  3 files changed, 55 insertions(+), 1 deletion(-)
> 
> diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h
> index 5e6b3ce7748f..8e58db20b3a4 100644
> --- a/arch/x86/include/asm/kvm_host.h
> +++ b/arch/x86/include/asm/kvm_host.h
> @@ -1006,6 +1006,10 @@ struct kvm_x86_ops {
>  	int (*update_pi_irte)(struct kvm *kvm, unsigned int host_irq,
>  			      uint32_t guest_irq, bool set);
>  	void (*apicv_post_state_restore)(struct kvm_vcpu *vcpu);
> +
> +	int (*hw_emul_timer)(struct kvm_vcpu *vcpu);
> +	void (*set_hwemul_timer)(struct kvm_vcpu *vcpu, u64 tsc);
> +	void (*clear_hwemul_timer)(struct kvm_vcpu *vcpu);
>  };
>  
>  struct kvm_arch_async_pf {
> diff --git a/arch/x86/kvm/lapic.c b/arch/x86/kvm/lapic.c
> index bbb5b283ff63..8908ee514f6c 100644
> --- a/arch/x86/kvm/lapic.c
> +++ b/arch/x86/kvm/lapic.c
> @@ -256,6 +256,13 @@ static inline int apic_lvtt_tscdeadline(struct kvm_lapic *apic)
>  	return apic->lapic_timer.timer_mode == APIC_LVT_TIMER_TSCDEADLINE;
>  }
>  
> +static inline int hw_emul_timer(struct kvm_lapic *apic)
> +{
> +	if (kvm_x86_ops->hw_emul_timer)
> +		return kvm_x86_ops->hw_emul_timer(apic->vcpu);
> +	return 0;
> +}
> +
>  static inline int apic_lvt_nmi_mode(u32 lvt_val)
>  {
>  	return (lvt_val & (APIC_MODE_MASK | APIC_LVT_MASKED)) == APIC_DM_NMI;
> diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c
> index 9e078ff29f86..5475a7699ee5 100644
> --- a/arch/x86/kvm/vmx.c
> +++ b/arch/x86/kvm/vmx.c
> @@ -110,6 +110,9 @@ module_param_named(pml, enable_pml, bool, S_IRUGO);
>  
>  #define KVM_VMX_TSC_MULTIPLIER_MAX     0xffffffffffffffffULL
>  
> +static bool __read_mostly hwemul_timer;
> +module_param_named(hwemul_timer, hwemul_timer, bool, S_IRUGO);
> +
>  #define KVM_GUEST_CR0_MASK (X86_CR0_NW | X86_CR0_CD)
>  #define KVM_VM_CR0_ALWAYS_ON_UNRESTRICTED_GUEST (X86_CR0_WP | X86_CR0_NE)
>  #define KVM_VM_CR0_ALWAYS_ON						\
> @@ -1056,6 +1059,20 @@ static inline bool cpu_has_vmx_virtual_intr_delivery(void)
>  		SECONDARY_EXEC_VIRTUAL_INTR_DELIVERY;
>  }
>  
> +static inline bool cpu_has_preemption_timer(void)
> +{
> +	return vmcs_config.pin_based_exec_ctrl &
> +		PIN_BASED_VMX_PREEMPTION_TIMER;
> +}
> +
> +static inline int cpu_preemption_timer_multi(void)
> +{
> +	u64 vmx_msr;
> +
> +	rdmsrl(MSR_IA32_VMX_MISC, vmx_msr);
> +	return vmx_msr & VMX_MISC_PREEMPTION_TIMER_RATE_MASK;
> +}
> +
>  static inline bool cpu_has_vmx_posted_intr(void)
>  {
>  	return IS_ENABLED(CONFIG_X86_LOCAL_APIC) &&
> @@ -3306,7 +3323,8 @@ static __init int setup_vmcs_config(struct vmcs_config *vmcs_conf)
>  		return -EIO;
>  
>  	min = PIN_BASED_EXT_INTR_MASK | PIN_BASED_NMI_EXITING;
> -	opt = PIN_BASED_VIRTUAL_NMIS | PIN_BASED_POSTED_INTR;
> +	opt = PIN_BASED_VIRTUAL_NMIS | PIN_BASED_POSTED_INTR |
> +		 PIN_BASED_VMX_PREEMPTION_TIMER;
>  	if (adjust_vmx_controls(min, opt, MSR_IA32_VMX_PINBASED_CTLS,
>  				&_pin_based_exec_control) < 0)
>  		return -EIO;
> @@ -4779,6 +4797,8 @@ static u32 vmx_pin_based_exec_ctrl(struct vcpu_vmx *vmx)
>  
>  	if (!kvm_vcpu_apicv_active(&vmx->vcpu))
>  		pin_based_exec_ctrl &= ~PIN_BASED_POSTED_INTR;
> +	/* Enable the preemption timer dynamically */
> +	pin_based_exec_ctrl &= ~PIN_BASED_VMX_PREEMPTION_TIMER;
>  	return pin_based_exec_ctrl;
>  }
>  
> @@ -10650,6 +10670,25 @@ static int vmx_check_intercept(struct kvm_vcpu *vcpu,
>  	return X86EMUL_CONTINUE;
>  }
>  
> +static int vmx_hwemul_timer(struct kvm_vcpu *vcpu)
> +{
> +	return hwemul_timer && cpu_has_preemption_timer();
> +}

Please clear the vmx_x86_ops members instead if the preemption timer is
not usable.  Then you can check kvm_x86_ops->set_hwemul_timer and
kvm_x86_ops->clear_hwemul_timer instead of calling this function.

For what it's worth, I prefer "vmx_{set,cancel}_hv_timer" instead.

> +static void vmx_set_hwemul_timer(struct kvm_vcpu *vcpu, u64 target_tsc)

This is not a target_tsc, it is a delta_tsc.

> +{
> +	vmcs_write32(VMX_PREEMPTION_TIMER_VALUE,
> +			target_tsc >> cpu_preemption_timer_multi());

Please cache the value of cpu_preemption_timer_multi(); rdmsr is slow.

Thanks,

Paolo

> +	vmcs_set_bits(PIN_BASED_VM_EXEC_CONTROL,
> +			PIN_BASED_VMX_PREEMPTION_TIMER);
> +}
> +
> +static void vmx_clear_hwemul_timer(struct kvm_vcpu *vcpu)
> +{
> +	vmcs_clear_bits(PIN_BASED_VM_EXEC_CONTROL,
> +			PIN_BASED_VMX_PREEMPTION_TIMER);
> +}
> +
>  static void vmx_sched_in(struct kvm_vcpu *vcpu, int cpu)
>  {
>  	if (ple_gap)
> @@ -11018,6 +11057,10 @@ static struct kvm_x86_ops vmx_x86_ops = {
>  	.pmu_ops = &intel_pmu_ops,
>  
>  	.update_pi_irte = vmx_update_pi_irte,
> +
> +	.hw_emul_timer = vmx_hwemul_timer,
> +	.set_hwemul_timer = vmx_set_hwemul_timer,
> +	.clear_hwemul_timer = vmx_clear_hwemul_timer,
>  };
>  
>  static int __init vmx_init(void)
> 
--
To unsubscribe from this list: send the line "unsubscribe kvm" in
the body of a message to majordomo@xxxxxxxxxxxxxxx
More majordomo info at  http://vger.kernel.org/majordomo-info.html