From: David Woodhouse <dwmw@xxxxxxxxxxxx> Leaving the PIT interrupt running can cause noticeable steal time for virtual guests. The VMM generally has a timer which toggles the IRQ input to the PIC and I/O APIC, which takes CPU time away from the guest. Make sure it's turned off if it isn't going to be used. Except on real hardware, because the less we change on real hardware the better. There be dragons. Signed-off-by: David Woodhouse <dwmw@xxxxxxxxxxxx> --- arch/x86/kernel/i8253.c | 13 +++++++++++-- drivers/clocksource/i8253.c | 30 ++++++++++++++++++++++++++---- include/linux/i8253.h | 1 + 3 files changed, 38 insertions(+), 6 deletions(-) diff --git a/arch/x86/kernel/i8253.c b/arch/x86/kernel/i8253.c index 2b7999a1a50a..54bfbd2aa773 100644 --- a/arch/x86/kernel/i8253.c +++ b/arch/x86/kernel/i8253.c @@ -8,6 +8,7 @@ #include <linux/timex.h> #include <linux/i8253.h> +#include <asm/hypervisor.h> #include <asm/apic.h> #include <asm/hpet.h> #include <asm/time.h> @@ -39,9 +40,17 @@ static bool __init use_pit(void) bool __init pit_timer_init(void) { - if (!use_pit()) - return false; + if (!use_pit()) { + /* + * Don't just ignore the PIT. Ensure it's stopped, because + * VMMs otherwise steal CPU time just to pointlessly waggle + * the (masked) IRQ. + */ + if (!hypervisor_is_type(X86_HYPER_NATIVE)) + clockevent_i8253_disable(); + return false; + } clockevent_i8253_init(true); global_clock_event = &i8253_clockevent; return true; diff --git a/drivers/clocksource/i8253.c b/drivers/clocksource/i8253.c index d4350bb10b83..51aab0a74481 100644 --- a/drivers/clocksource/i8253.c +++ b/drivers/clocksource/i8253.c @@ -108,21 +108,43 @@ int __init clocksource_i8253_init(void) #endif #ifdef CONFIG_CLKEVT_I8253 -static int pit_shutdown(struct clock_event_device *evt) +void clockevent_i8253_disable(void) { - if (!clockevent_state_oneshot(evt) && !clockevent_state_periodic(evt)) - return 0; - raw_spin_lock(&i8253_lock); outb_p(0x30, PIT_MODE); + /* + * The spec is a little bit ambiguous, although it does say that + * "The actual order of the programming is quite flexible. Writing + * out of the MODE control word can be in any sequence of counter + * selection". + * + * Implementations differ, however, in whether a mode change takes + * effect immediately or whether it only occurs when the counter is + * subsequently written. The KVM in-kernel and AWS Nitro hypervisor + * implementations need the counter to be written; QEMU does not. + * + * Theoretically, in one-shot mode, writing the counter will cause + * the IRQ to trigger one last time before falling quiet. Allegedly, + * under Hyper-V it keeps firing repeatedly, thus the existence of + * the i8253_clear_counter_on_shutdown quick to refrain from doing + * so. + */ if (i8253_clear_counter_on_shutdown) { outb_p(0, PIT_CH0); outb_p(0, PIT_CH0); } raw_spin_unlock(&i8253_lock); +} + +static int pit_shutdown(struct clock_event_device *evt) +{ + if (!clockevent_state_oneshot(evt) && !clockevent_state_periodic(evt)) + return 0; + + clockevent_i8253_disable(); return 0; } diff --git a/include/linux/i8253.h b/include/linux/i8253.h index 8336b2f6f834..bf169cfef7f1 100644 --- a/include/linux/i8253.h +++ b/include/linux/i8253.h @@ -24,6 +24,7 @@ extern raw_spinlock_t i8253_lock; extern bool i8253_clear_counter_on_shutdown; extern struct clock_event_device i8253_clockevent; extern void clockevent_i8253_init(bool oneshot); +extern void clockevent_i8253_disable(void); extern void setup_pit_timer(void); -- 2.44.0
Attachment:
smime.p7s
Description: S/MIME cryptographic signature