Re: [PATCH RFC 2/2] KVM: x86: Support using the VMX preemption timer for APIC Timer periodic/oneshot mode

Radim Krčmář <rkrcmar@xxxxxxxxxx> · Tue, 11 Oct 2016 17:06:41 +0200

2016-10-11 20:17+0800, Wanpeng Li:
> From: Wanpeng Li <wanpeng.li@xxxxxxxxxxx>
> 
> Most windows guests still utilize APIC Timer periodic/oneshot mode 
> instead of tsc-deadline mode, and the APIC Timer periodic/oneshot 
> mode are still emulated by high overhead hrtimer on host. This patch 
> converts the expected expire time of the periodic/oneshot mode to
> guest deadline tsc in order to leverage VMX preemption timer logic 
> for APIC Timer tsc-deadline mode.
> 
> Cc: Paolo Bonzini <pbonzini@xxxxxxxxxx>
> Cc: Radim Krčmář <rkrcmar@xxxxxxxxxx>
> Cc: Yunhong Jiang <yunhong.jiang@xxxxxxxxx>
> Signed-off-by: Wanpeng Li <wanpeng.li@xxxxxxxxxxx>
> ---
> diff --git a/arch/x86/kvm/lapic.c b/arch/x86/kvm/lapic.c
> @@ -1101,13 +1101,20 @@ static u32 apic_get_tmcct(struct kvm_lapic *apic)
>  		apic->lapic_timer.period == 0)
>  		return 0;
>  
> +	if (kvm_lapic_hv_timer_in_use(apic->vcpu)) {
> +		u64 tscl = rdtsc();
> +
> +		tmcct = apic->lapic_timer.tscdeadline -
> +			kvm_read_l1_tsc(apic->vcpu, tscl);

Yes, this won't work.  The easiest way to return a less bogus TMCCT
would be remember the timeout when setting up the timer and replace
hrtimer_get_remaining() with it -- our deliver method shouldn't change
the expiration time.

> +	} else {
> +		remaining = hrtimer_get_remaining(&apic->lapic_timer.timer);
> +		if (ktime_to_ns(remaining) < 0)
> +			remaining = ktime_set(0, 0);
> +
> +		ns = mod_64(ktime_to_ns(remaining), apic->lapic_timer.period);
> +		tmcct = div64_u64(ns,
> +				 (APIC_BUS_CYCLE_NS * apic->divide_count));
> +	}
>  
>  	return tmcct;
>  }
> @@ -1400,52 +1407,65 @@ bool kvm_lapic_hv_timer_in_use(struct kvm_vcpu *vcpu)
>  }
>  EXPORT_SYMBOL_GPL(kvm_lapic_hv_timer_in_use);
>  
> -static void cancel_hv_tscdeadline(struct kvm_lapic *apic)
> +static void cancel_hv_timer(struct kvm_lapic *apic)
>  {
>  	kvm_x86_ops->cancel_hv_timer(apic->vcpu);
>  	apic->lapic_timer.hv_timer_in_use = false;
>  }
>  
> +static bool start_hv_timer(struct kvm_lapic *apic)
>  {
> +	u64 tscdeadline;
>  
> +	if (apic_lvtt_period(apic) || apic_lvtt_oneshot(apic)) {
> +		u64 tscl = rdtsc();
>  
> +		apic->lapic_timer.period = (u64)kvm_lapic_get_reg(apic, APIC_TMICT)
> +			* APIC_BUS_CYCLE_NS * apic->divide_count;
> +		apic->lapic_timer.tscdeadline = kvm_read_l1_tsc(apic->vcpu, tscl) +
> +			nsec_to_cycles(apic->vcpu, apic->lapic_timer.period);

start_hv_timer() is called from kvm_lapic_switch_to_hv_timer(), which
can happen mid-period.  The worst case is that the timer will never
fire, because we always convert back and forth.  You have to compute the
equivalent deadline only once, and carry it around.

> +	}
> +
> +	tscdeadline = apic->lapic_timer.tscdeadline;
>  
>  	if (atomic_read(&apic->lapic_timer.pending) ||
>  		kvm_x86_ops->set_hv_timer(apic->vcpu, tscdeadline)) {
>  		if (apic->lapic_timer.hv_timer_in_use)
> +			cancel_hv_timer(apic);
>  	} else {
>  		apic->lapic_timer.hv_timer_in_use = true;
>  		hrtimer_cancel(&apic->lapic_timer.timer);
>  
>  		/* In case the sw timer triggered in the window */
>  		if (atomic_read(&apic->lapic_timer.pending))
> +			cancel_hv_timer(apic);
>  	}
>  	trace_kvm_hv_timer_state(apic->vcpu->vcpu_id,
>  			apic->lapic_timer.hv_timer_in_use);
>  	return apic->lapic_timer.hv_timer_in_use;
>  }
>  
> +void kvm_lapic_expired_hv_timer(struct kvm_vcpu *vcpu)
> +{
> +	struct kvm_lapic *apic = vcpu->arch.apic;
> +
> +	WARN_ON(!apic->lapic_timer.hv_timer_in_use);
> +	WARN_ON(swait_active(&vcpu->wq));
> +	cancel_hv_timer(apic);
> +	apic_timer_expired(apic);
> +
> +	if (apic_lvtt_period(apic))
> +		start_hv_timer(apic);
> +}
> +EXPORT_SYMBOL_GPL(kvm_lapic_expired_hv_timer);
> +
>  void kvm_lapic_switch_to_hv_timer(struct kvm_vcpu *vcpu)
>  {
>  	struct kvm_lapic *apic = vcpu->arch.apic;
>  
>  	WARN_ON(apic->lapic_timer.hv_timer_in_use);
>  
> -	if (apic_lvtt_tscdeadline(apic))
> -		start_hv_tscdeadline(apic);
> +	start_hv_timer(apic);
>  }
>  EXPORT_SYMBOL_GPL(kvm_lapic_switch_to_hv_timer);
>  
--
To unsubscribe from this list: send the line "unsubscribe kvm" in
the body of a message to majordomo@xxxxxxxxxxxxxxx
More majordomo info at  http://vger.kernel.org/majordomo-info.html