On 22/04/2018 02:53, Anthoine Bourgeois wrote: > Since the commit "8003c9ae204e: add APIC Timer periodic/oneshot mode VMX > preemption timer support", a Windows 10 guest has some erratic timer > spikes after few hours. As the uptime of the VM grows the spikes are > larger. > > Here the results on a 150000 times 1ms timer without any load: > Before 8003c9ae204e | After 8003c9ae204e > Max 1834us | 86000us > Mean 1100us | 1021us > Deviation 59us | 149us > Here the results on a 150000 times 1ms timer with a cpu-z stress test: > Before 8003c9ae204e | After 8003c9ae204e > Max 32000us | 140000us > Mean 1006us | 1997us > Deviation 140us | 11095us > > The current patch partially revert the previous commit by removing the > target timer expectation to go back to the straight hrtimer calls. The > APIC Timer periodic/oneshot mode support is kept because it is necessary > on the new Windows Spring update. > > v2: Check if the tsc deadline is already expired. Thank you Mika. > > Cc: Mika Penttilä <mika.penttila@xxxxxxxxxxxx > Signed-off-by: Anthoine Bourgeois <anthoine.bourgeois@xxxxxxxxxxxxxxx> > --- > arch/x86/kvm/lapic.c | 57 +++++++++++++++++++++++++--------------------------- > arch/x86/kvm/lapic.h | 1 - > 2 files changed, 27 insertions(+), 31 deletions(-) > > diff --git a/arch/x86/kvm/lapic.c b/arch/x86/kvm/lapic.c > index 70dcb5548022..8b5c2a69a3b6 100644 > --- a/arch/x86/kvm/lapic.c > +++ b/arch/x86/kvm/lapic.c > @@ -1173,7 +1173,7 @@ static void apic_send_ipi(struct kvm_lapic *apic) > > static u32 apic_get_tmcct(struct kvm_lapic *apic) > { > - ktime_t remaining, now; > + ktime_t remaining; > s64 ns; > u32 tmcct; > > @@ -1184,8 +1184,7 @@ static u32 apic_get_tmcct(struct kvm_lapic *apic) > apic->lapic_timer.period == 0) > return 0; > > - now = ktime_get(); > - remaining = ktime_sub(apic->lapic_timer.target_expiration, now); > + remaining = hrtimer_get_remaining(&apic->lapic_timer.timer); I'm confused, how can this work when the preemption timer is in use (vcpu->arch.apic->lapic_timer.hv_timer_in_use is true)? Paolo > if (ktime_to_ns(remaining) < 0) > remaining = 0; > > @@ -1465,32 +1464,50 @@ static void start_sw_tscdeadline(struct kvm_lapic *apic) > > static void start_sw_period(struct kvm_lapic *apic) > { > + ktime_t now; > + > + /* lapic timer in oneshot or periodic mode */ > + now = ktime_get(); > + apic->lapic_timer.period = (u64)kvm_lapic_get_reg(apic, APIC_TMICT) > + * APIC_BUS_CYCLE_NS * apic->divide_count; > + > if (!apic->lapic_timer.period) > return; > > + limit_periodic_timer_frequency(apic); > + > if (apic_lvtt_oneshot(apic) && > - ktime_after(ktime_get(), > - apic->lapic_timer.target_expiration)) { > + ktime_after(now, > + apic->lapic_timer.tscdeadline)) { > apic_timer_expired(apic); > return; > } > > hrtimer_start(&apic->lapic_timer.timer, > - apic->lapic_timer.target_expiration, > + ktime_add_ns(now, apic->lapic_timer.period), > HRTIMER_MODE_ABS_PINNED); > + > + apic_debug("%s: bus cycle is %" PRId64 "ns, now 0x%016" > + PRIx64 ", " > + "timer initial count 0x%x, period %lldns, " > + "expire @ 0x%016" PRIx64 ".\n", __func__, > + APIC_BUS_CYCLE_NS, ktime_to_ns(now), > + kvm_lapic_get_reg(apic, APIC_TMICT), > + apic->lapic_timer.period, > + ktime_to_ns(ktime_add_ns(now, > + apic->lapic_timer.period))); > } > > static void update_target_expiration(struct kvm_lapic *apic, uint32_t old_divisor) > { > - ktime_t now, remaining; > + ktime_t remaining; > u64 ns_remaining_old, ns_remaining_new; > > apic->lapic_timer.period = (u64)kvm_lapic_get_reg(apic, APIC_TMICT) > * APIC_BUS_CYCLE_NS * apic->divide_count; > limit_periodic_timer_frequency(apic); > > - now = ktime_get(); > - remaining = ktime_sub(apic->lapic_timer.target_expiration, now); > + remaining = hrtimer_get_remaining(&apic->lapic_timer.timer); > if (ktime_to_ns(remaining) < 0) > remaining = 0; > > @@ -1501,15 +1518,10 @@ static void update_target_expiration(struct kvm_lapic *apic, uint32_t old_diviso > apic->lapic_timer.tscdeadline += > nsec_to_cycles(apic->vcpu, ns_remaining_new) - > nsec_to_cycles(apic->vcpu, ns_remaining_old); > - apic->lapic_timer.target_expiration = ktime_add_ns(now, ns_remaining_new); > } > > static bool set_target_expiration(struct kvm_lapic *apic) > { > - ktime_t now; > - u64 tscl = rdtsc(); > - > - now = ktime_get(); > apic->lapic_timer.period = (u64)kvm_lapic_get_reg(apic, APIC_TMICT) > * APIC_BUS_CYCLE_NS * apic->divide_count; > > @@ -1520,19 +1532,8 @@ static bool set_target_expiration(struct kvm_lapic *apic) > > limit_periodic_timer_frequency(apic); > > - apic_debug("%s: bus cycle is %" PRId64 "ns, now 0x%016" > - PRIx64 ", " > - "timer initial count 0x%x, period %lldns, " > - "expire @ 0x%016" PRIx64 ".\n", __func__, > - APIC_BUS_CYCLE_NS, ktime_to_ns(now), > - kvm_lapic_get_reg(apic, APIC_TMICT), > - apic->lapic_timer.period, > - ktime_to_ns(ktime_add_ns(now, > - apic->lapic_timer.period))); > - > - apic->lapic_timer.tscdeadline = kvm_read_l1_tsc(apic->vcpu, tscl) + > + apic->lapic_timer.tscdeadline = kvm_read_l1_tsc(apic->vcpu, rdtsc()) + > nsec_to_cycles(apic->vcpu, apic->lapic_timer.period); > - apic->lapic_timer.target_expiration = ktime_add_ns(now, apic->lapic_timer.period); > > return true; > } > @@ -1541,9 +1542,6 @@ static void advance_periodic_target_expiration(struct kvm_lapic *apic) > { > apic->lapic_timer.tscdeadline += > nsec_to_cycles(apic->vcpu, apic->lapic_timer.period); > - apic->lapic_timer.target_expiration = > - ktime_add_ns(apic->lapic_timer.target_expiration, > - apic->lapic_timer.period); > } > > bool kvm_lapic_hv_timer_in_use(struct kvm_vcpu *vcpu) > @@ -2216,7 +2214,6 @@ void kvm_inject_apic_timer_irqs(struct kvm_vcpu *vcpu) > apic->lapic_timer.tscdeadline = 0; > if (apic_lvtt_oneshot(apic)) { > apic->lapic_timer.tscdeadline = 0; > - apic->lapic_timer.target_expiration = 0; > } > atomic_set(&apic->lapic_timer.pending, 0); > } > diff --git a/arch/x86/kvm/lapic.h b/arch/x86/kvm/lapic.h > index edce055e9fd7..56823b159e9b 100644 > --- a/arch/x86/kvm/lapic.h > +++ b/arch/x86/kvm/lapic.h > @@ -19,7 +19,6 @@ > struct kvm_timer { > struct hrtimer timer; > s64 period; /* unit: ns */ > - ktime_t target_expiration; > u32 timer_mode; > u32 timer_mode_mask; > u64 tscdeadline; >