2016-10-11 20:17+0800, Wanpeng Li: > From: Wanpeng Li <wanpeng.li@xxxxxxxxxxx> > > Most windows guests still utilize APIC Timer periodic/oneshot mode > instead of tsc-deadline mode, and the APIC Timer periodic/oneshot > mode are still emulated by high overhead hrtimer on host. This patch > converts the expected expire time of the periodic/oneshot mode to > guest deadline tsc in order to leverage VMX preemption timer logic > for APIC Timer tsc-deadline mode. > > Cc: Paolo Bonzini <pbonzini@xxxxxxxxxx> > Cc: Radim Krčmář <rkrcmar@xxxxxxxxxx> > Cc: Yunhong Jiang <yunhong.jiang@xxxxxxxxx> > Signed-off-by: Wanpeng Li <wanpeng.li@xxxxxxxxxxx> > --- > diff --git a/arch/x86/kvm/lapic.c b/arch/x86/kvm/lapic.c > @@ -1101,13 +1101,20 @@ static u32 apic_get_tmcct(struct kvm_lapic *apic) > apic->lapic_timer.period == 0) > return 0; > > + if (kvm_lapic_hv_timer_in_use(apic->vcpu)) { > + u64 tscl = rdtsc(); > + > + tmcct = apic->lapic_timer.tscdeadline - > + kvm_read_l1_tsc(apic->vcpu, tscl); Yes, this won't work. The easiest way to return a less bogus TMCCT would be remember the timeout when setting up the timer and replace hrtimer_get_remaining() with it -- our deliver method shouldn't change the expiration time. > + } else { > + remaining = hrtimer_get_remaining(&apic->lapic_timer.timer); > + if (ktime_to_ns(remaining) < 0) > + remaining = ktime_set(0, 0); > + > + ns = mod_64(ktime_to_ns(remaining), apic->lapic_timer.period); > + tmcct = div64_u64(ns, > + (APIC_BUS_CYCLE_NS * apic->divide_count)); > + } > > return tmcct; > } > @@ -1400,52 +1407,65 @@ bool kvm_lapic_hv_timer_in_use(struct kvm_vcpu *vcpu) > } > EXPORT_SYMBOL_GPL(kvm_lapic_hv_timer_in_use); > > -static void cancel_hv_tscdeadline(struct kvm_lapic *apic) > +static void cancel_hv_timer(struct kvm_lapic *apic) > { > kvm_x86_ops->cancel_hv_timer(apic->vcpu); > apic->lapic_timer.hv_timer_in_use = false; > } > > +static bool start_hv_timer(struct kvm_lapic *apic) > { > + u64 tscdeadline; > > + if (apic_lvtt_period(apic) || apic_lvtt_oneshot(apic)) { > + u64 tscl = rdtsc(); > > + apic->lapic_timer.period = (u64)kvm_lapic_get_reg(apic, APIC_TMICT) > + * APIC_BUS_CYCLE_NS * apic->divide_count; > + apic->lapic_timer.tscdeadline = kvm_read_l1_tsc(apic->vcpu, tscl) + > + nsec_to_cycles(apic->vcpu, apic->lapic_timer.period); start_hv_timer() is called from kvm_lapic_switch_to_hv_timer(), which can happen mid-period. The worst case is that the timer will never fire, because we always convert back and forth. You have to compute the equivalent deadline only once, and carry it around. > + } > + > + tscdeadline = apic->lapic_timer.tscdeadline; > > if (atomic_read(&apic->lapic_timer.pending) || > kvm_x86_ops->set_hv_timer(apic->vcpu, tscdeadline)) { > if (apic->lapic_timer.hv_timer_in_use) > + cancel_hv_timer(apic); > } else { > apic->lapic_timer.hv_timer_in_use = true; > hrtimer_cancel(&apic->lapic_timer.timer); > > /* In case the sw timer triggered in the window */ > if (atomic_read(&apic->lapic_timer.pending)) > + cancel_hv_timer(apic); > } > trace_kvm_hv_timer_state(apic->vcpu->vcpu_id, > apic->lapic_timer.hv_timer_in_use); > return apic->lapic_timer.hv_timer_in_use; > } > > +void kvm_lapic_expired_hv_timer(struct kvm_vcpu *vcpu) > +{ > + struct kvm_lapic *apic = vcpu->arch.apic; > + > + WARN_ON(!apic->lapic_timer.hv_timer_in_use); > + WARN_ON(swait_active(&vcpu->wq)); > + cancel_hv_timer(apic); > + apic_timer_expired(apic); > + > + if (apic_lvtt_period(apic)) > + start_hv_timer(apic); > +} > +EXPORT_SYMBOL_GPL(kvm_lapic_expired_hv_timer); > + > void kvm_lapic_switch_to_hv_timer(struct kvm_vcpu *vcpu) > { > struct kvm_lapic *apic = vcpu->arch.apic; > > WARN_ON(apic->lapic_timer.hv_timer_in_use); > > - if (apic_lvtt_tscdeadline(apic)) > - start_hv_tscdeadline(apic); > + start_hv_timer(apic); > } > EXPORT_SYMBOL_GPL(kvm_lapic_switch_to_hv_timer); > -- To unsubscribe from this list: send the line "unsubscribe kvm" in the body of a message to majordomo@xxxxxxxxxxxxxxx More majordomo info at http://vger.kernel.org/majordomo-info.html