Since the commit "8003c9ae204e: add APIC Timer periodic/oneshot mode VMX preemption timer support", a Windows 10 guest has some erratic timer spikes after few hours. As the uptime of the VM grows the spikes are larger. Here the results on a 150000 times 1ms timer without any load: Before 8003c9ae204e | After 8003c9ae204e Max 1834us | 86000us Mean 1100us | 1021us Deviation 59us | 149us Here the results on a 150000 times 1ms timer with a cpu-z stress test: Before 8003c9ae204e | After 8003c9ae204e Max 32000us | 140000us Mean 1006us | 1997us Deviation 140us | 11095us The current patch partially revert the previous commit by removing the target timer expectation to go back to the straight hrtimer calls. The APIC Timer periodic/oneshot mode support is kept because it is necessary on the new Windows Spring update. v2: Check if the tsc deadline is already expired. Thank you Mika. Cc: Mika Penttilä <mika.penttila@xxxxxxxxxxxx Signed-off-by: Anthoine Bourgeois <anthoine.bourgeois@xxxxxxxxxxxxxxx> --- arch/x86/kvm/lapic.c | 57 +++++++++++++++++++++++++--------------------------- arch/x86/kvm/lapic.h | 1 - 2 files changed, 27 insertions(+), 31 deletions(-) diff --git a/arch/x86/kvm/lapic.c b/arch/x86/kvm/lapic.c index 70dcb5548022..8b5c2a69a3b6 100644 --- a/arch/x86/kvm/lapic.c +++ b/arch/x86/kvm/lapic.c @@ -1173,7 +1173,7 @@ static void apic_send_ipi(struct kvm_lapic *apic) static u32 apic_get_tmcct(struct kvm_lapic *apic) { - ktime_t remaining, now; + ktime_t remaining; s64 ns; u32 tmcct; @@ -1184,8 +1184,7 @@ static u32 apic_get_tmcct(struct kvm_lapic *apic) apic->lapic_timer.period == 0) return 0; - now = ktime_get(); - remaining = ktime_sub(apic->lapic_timer.target_expiration, now); + remaining = hrtimer_get_remaining(&apic->lapic_timer.timer); if (ktime_to_ns(remaining) < 0) remaining = 0; @@ -1465,32 +1464,50 @@ static void start_sw_tscdeadline(struct kvm_lapic *apic) static void start_sw_period(struct kvm_lapic *apic) { + ktime_t now; + + /* lapic timer in oneshot or periodic mode */ + now = ktime_get(); + apic->lapic_timer.period = (u64)kvm_lapic_get_reg(apic, APIC_TMICT) + * APIC_BUS_CYCLE_NS * apic->divide_count; + if (!apic->lapic_timer.period) return; + limit_periodic_timer_frequency(apic); + if (apic_lvtt_oneshot(apic) && - ktime_after(ktime_get(), - apic->lapic_timer.target_expiration)) { + ktime_after(now, + apic->lapic_timer.tscdeadline)) { apic_timer_expired(apic); return; } hrtimer_start(&apic->lapic_timer.timer, - apic->lapic_timer.target_expiration, + ktime_add_ns(now, apic->lapic_timer.period), HRTIMER_MODE_ABS_PINNED); + + apic_debug("%s: bus cycle is %" PRId64 "ns, now 0x%016" + PRIx64 ", " + "timer initial count 0x%x, period %lldns, " + "expire @ 0x%016" PRIx64 ".\n", __func__, + APIC_BUS_CYCLE_NS, ktime_to_ns(now), + kvm_lapic_get_reg(apic, APIC_TMICT), + apic->lapic_timer.period, + ktime_to_ns(ktime_add_ns(now, + apic->lapic_timer.period))); } static void update_target_expiration(struct kvm_lapic *apic, uint32_t old_divisor) { - ktime_t now, remaining; + ktime_t remaining; u64 ns_remaining_old, ns_remaining_new; apic->lapic_timer.period = (u64)kvm_lapic_get_reg(apic, APIC_TMICT) * APIC_BUS_CYCLE_NS * apic->divide_count; limit_periodic_timer_frequency(apic); - now = ktime_get(); - remaining = ktime_sub(apic->lapic_timer.target_expiration, now); + remaining = hrtimer_get_remaining(&apic->lapic_timer.timer); if (ktime_to_ns(remaining) < 0) remaining = 0; @@ -1501,15 +1518,10 @@ static void update_target_expiration(struct kvm_lapic *apic, uint32_t old_diviso apic->lapic_timer.tscdeadline += nsec_to_cycles(apic->vcpu, ns_remaining_new) - nsec_to_cycles(apic->vcpu, ns_remaining_old); - apic->lapic_timer.target_expiration = ktime_add_ns(now, ns_remaining_new); } static bool set_target_expiration(struct kvm_lapic *apic) { - ktime_t now; - u64 tscl = rdtsc(); - - now = ktime_get(); apic->lapic_timer.period = (u64)kvm_lapic_get_reg(apic, APIC_TMICT) * APIC_BUS_CYCLE_NS * apic->divide_count; @@ -1520,19 +1532,8 @@ static bool set_target_expiration(struct kvm_lapic *apic) limit_periodic_timer_frequency(apic); - apic_debug("%s: bus cycle is %" PRId64 "ns, now 0x%016" - PRIx64 ", " - "timer initial count 0x%x, period %lldns, " - "expire @ 0x%016" PRIx64 ".\n", __func__, - APIC_BUS_CYCLE_NS, ktime_to_ns(now), - kvm_lapic_get_reg(apic, APIC_TMICT), - apic->lapic_timer.period, - ktime_to_ns(ktime_add_ns(now, - apic->lapic_timer.period))); - - apic->lapic_timer.tscdeadline = kvm_read_l1_tsc(apic->vcpu, tscl) + + apic->lapic_timer.tscdeadline = kvm_read_l1_tsc(apic->vcpu, rdtsc()) + nsec_to_cycles(apic->vcpu, apic->lapic_timer.period); - apic->lapic_timer.target_expiration = ktime_add_ns(now, apic->lapic_timer.period); return true; } @@ -1541,9 +1542,6 @@ static void advance_periodic_target_expiration(struct kvm_lapic *apic) { apic->lapic_timer.tscdeadline += nsec_to_cycles(apic->vcpu, apic->lapic_timer.period); - apic->lapic_timer.target_expiration = - ktime_add_ns(apic->lapic_timer.target_expiration, - apic->lapic_timer.period); } bool kvm_lapic_hv_timer_in_use(struct kvm_vcpu *vcpu) @@ -2216,7 +2214,6 @@ void kvm_inject_apic_timer_irqs(struct kvm_vcpu *vcpu) apic->lapic_timer.tscdeadline = 0; if (apic_lvtt_oneshot(apic)) { apic->lapic_timer.tscdeadline = 0; - apic->lapic_timer.target_expiration = 0; } atomic_set(&apic->lapic_timer.pending, 0); } diff --git a/arch/x86/kvm/lapic.h b/arch/x86/kvm/lapic.h index edce055e9fd7..56823b159e9b 100644 --- a/arch/x86/kvm/lapic.h +++ b/arch/x86/kvm/lapic.h @@ -19,7 +19,6 @@ struct kvm_timer { struct hrtimer timer; s64 period; /* unit: ns */ - ktime_t target_expiration; u32 timer_mode; u32 timer_mode_mask; u64 tscdeadline; -- 2.11.0