Rather than have KVM look up the host timer and fiddle with the irq-work internal details, have the powerpc/time.c code provide a function for KVM to re-arm the Linux timer code when exiting a guest. This is implementation has an improvement over existing code of marking a decrementer interrupt as soft-pending if a timer has expired, rather than setting DEC to a -ve value, which tended to cause host timers to take two interrupts (first hdec to exit the guest, then the immediate dec). Signed-off-by: Nicholas Piggin <npiggin@xxxxxxxxx> --- arch/powerpc/include/asm/time.h | 16 +++------- arch/powerpc/kernel/time.c | 52 +++++++++++++++++++++++++++------ arch/powerpc/kvm/book3s_hv.c | 7 ++--- 3 files changed, 49 insertions(+), 26 deletions(-) diff --git a/arch/powerpc/include/asm/time.h b/arch/powerpc/include/asm/time.h index 69b6be617772..924b2157882f 100644 --- a/arch/powerpc/include/asm/time.h +++ b/arch/powerpc/include/asm/time.h @@ -99,18 +99,6 @@ extern void div128_by_32(u64 dividend_high, u64 dividend_low, extern void secondary_cpu_time_init(void); extern void __init time_init(void); -#ifdef CONFIG_PPC64 -static inline unsigned long test_irq_work_pending(void) -{ - unsigned long x; - - asm volatile("lbz %0,%1(13)" - : "=r" (x) - : "i" (offsetof(struct paca_struct, irq_work_pending))); - return x; -} -#endif - DECLARE_PER_CPU(u64, decrementers_next_tb); static inline u64 timer_get_next_tb(void) @@ -118,6 +106,10 @@ static inline u64 timer_get_next_tb(void) return __this_cpu_read(decrementers_next_tb); } +#ifdef CONFIG_KVM_BOOK3S_HV_POSSIBLE +void timer_rearm_host_dec(u64 now); +#endif + /* Convert timebase ticks to nanoseconds */ unsigned long long tb_to_ns(unsigned long long tb_ticks); diff --git a/arch/powerpc/kernel/time.c b/arch/powerpc/kernel/time.c index 72d872b49167..016828b7401b 100644 --- a/arch/powerpc/kernel/time.c +++ b/arch/powerpc/kernel/time.c @@ -499,6 +499,16 @@ EXPORT_SYMBOL(profile_pc); * 64-bit uses a byte in the PACA, 32-bit uses a per-cpu variable... */ #ifdef CONFIG_PPC64 +static inline unsigned long test_irq_work_pending(void) +{ + unsigned long x; + + asm volatile("lbz %0,%1(13)" + : "=r" (x) + : "i" (offsetof(struct paca_struct, irq_work_pending))); + return x; +} + static inline void set_irq_work_pending_flag(void) { asm volatile("stb %0,%1(13)" : : @@ -542,13 +552,44 @@ void arch_irq_work_raise(void) preempt_enable(); } +static void set_dec_or_work(u64 val) +{ + set_dec(val); + /* We may have raced with new irq work */ + if (unlikely(test_irq_work_pending())) + set_dec(1); +} + #else /* CONFIG_IRQ_WORK */ #define test_irq_work_pending() 0 #define clear_irq_work_pending() +static void set_dec_or_work(u64 val) +{ + set_dec(val); +} #endif /* CONFIG_IRQ_WORK */ +#ifdef CONFIG_KVM_BOOK3S_HV_POSSIBLE +void timer_rearm_host_dec(u64 now) +{ + u64 *next_tb = this_cpu_ptr(&decrementers_next_tb); + + WARN_ON_ONCE(!arch_irqs_disabled()); + WARN_ON_ONCE(mfmsr() & MSR_EE); + + if (now >= *next_tb) { + local_paca->irq_happened |= PACA_IRQ_DEC; + } else { + now = *next_tb - now; + if (now <= decrementer_max) + set_dec_or_work(now); + } +} +EXPORT_SYMBOL_GPL(timer_rearm_host_dec); +#endif + /* * timer_interrupt - gets called when the decrementer overflows, * with interrupts disabled. @@ -609,10 +650,7 @@ DEFINE_INTERRUPT_HANDLER_ASYNC(timer_interrupt) } else { now = *next_tb - now; if (now <= decrementer_max) - set_dec(now); - /* We may have raced with new irq work */ - if (test_irq_work_pending()) - set_dec(1); + set_dec_or_work(now); __this_cpu_inc(irq_stat.timer_irqs_others); } @@ -854,11 +892,7 @@ static int decrementer_set_next_event(unsigned long evt, struct clock_event_device *dev) { __this_cpu_write(decrementers_next_tb, get_tb() + evt); - set_dec(evt); - - /* We may have raced with new irq work */ - if (test_irq_work_pending()) - set_dec(1); + set_dec_or_work(evt); return 0; } diff --git a/arch/powerpc/kvm/book3s_hv.c b/arch/powerpc/kvm/book3s_hv.c index 49a3c1e6a544..5e48a929d670 100644 --- a/arch/powerpc/kvm/book3s_hv.c +++ b/arch/powerpc/kvm/book3s_hv.c @@ -4048,11 +4048,8 @@ static int kvmhv_p9_guest_entry(struct kvm_vcpu *vcpu, u64 time_limit, vc->entry_exit_map = 0x101; vc->in_guest = 0; - next_timer = timer_get_next_tb(); - set_dec(next_timer - tb); - /* We may have raced with new irq work */ - if (test_irq_work_pending()) - set_dec(1); + timer_rearm_host_dec(tb); + mtspr(SPRN_SPRG_VDSO_WRITE, local_paca->sprg_vdso); kvmhv_load_host_pmu(); -- 2.23.0