This patch implements virtual suspend time injection support for kvm guests. If this functionality is enabled and the host supports KVM_FEATURE_HOST_SUSPEND_TIME, the guest will register struct kvm_host_suspend_time through MSR to monitor how much time we spend during the host's suspension. If the duration is increased, the guest will advance CLOCK_BOOTTIME to match with the host's suspension. Signed-off-by: Hikaru Nishida <hikalium@xxxxxxxxxxxx> --- arch/x86/include/asm/kvm_para.h | 9 +++++++++ arch/x86/kernel/kvmclock.c | 25 +++++++++++++++++++++++++ include/linux/timekeeper_internal.h | 4 ++++ kernel/time/timekeeping.c | 27 +++++++++++++++++++++++++++ 4 files changed, 65 insertions(+) diff --git a/arch/x86/include/asm/kvm_para.h b/arch/x86/include/asm/kvm_para.h index 338119852512..e552efa931a8 100644 --- a/arch/x86/include/asm/kvm_para.h +++ b/arch/x86/include/asm/kvm_para.h @@ -18,6 +18,15 @@ static inline bool kvm_check_and_clear_guest_paused(void) } #endif /* CONFIG_KVM_GUEST */ +#ifdef CONFIG_KVM_VIRT_SUSPEND_TIMING_GUEST +u64 kvm_get_suspend_time(void); +#else +static inline u64 kvm_get_suspend_time(void) +{ + return 0; +} +#endif /* CONFIG_KVM_VIRT_SUSPEND_TIMING_GUEST */ + #define KVM_HYPERCALL \ ALTERNATIVE("vmcall", "vmmcall", X86_FEATURE_VMMCALL) diff --git a/arch/x86/kernel/kvmclock.c b/arch/x86/kernel/kvmclock.c index 1fc0962c89c0..630460beb05a 100644 --- a/arch/x86/kernel/kvmclock.c +++ b/arch/x86/kernel/kvmclock.c @@ -49,6 +49,9 @@ early_param("no-kvmclock-vsyscall", parse_no_kvmclock_vsyscall); static struct pvclock_vsyscall_time_info hv_clock_boot[HVC_BOOT_ARRAY_SIZE] __bss_decrypted __aligned(PAGE_SIZE); +#ifdef CONFIG_KVM_VIRT_SUSPEND_TIMING_GUEST +static struct kvm_host_suspend_time suspend_time __bss_decrypted; +#endif static struct pvclock_wall_clock wall_clock __bss_decrypted; static DEFINE_PER_CPU(struct pvclock_vsyscall_time_info *, hv_clock_per_cpu); static struct pvclock_vsyscall_time_info *hvclock_mem; @@ -164,6 +167,20 @@ static int kvm_cs_enable(struct clocksource *cs) return 0; } +#ifdef CONFIG_KVM_VIRT_SUSPEND_TIMING_GUEST +/* + * kvm_get_suspend_time_and_clear_request - This function + * checks how much suspend time is injected by the host. + * A returned value is a total time passed during the guest in a suspend + * state while this guest is running. (Not a duration of the last host + * suspend.) + */ +u64 kvm_get_suspend_time(void) +{ + return suspend_time.suspend_time_ns; +} +#endif /* CONFIG_KVM_VIRT_SUSPEND_TIMING_GUEST */ + struct clocksource kvm_clock = { .name = "kvm-clock", .read = kvm_clock_get_cycles, @@ -324,6 +341,14 @@ void __init kvmclock_init(void) return; } +#ifdef CONFIG_KVM_VIRT_SUSPEND_TIMING_GUEST + if (kvm_para_has_feature(KVM_FEATURE_HOST_SUSPEND_TIME)) { + /* Register the suspend time structure */ + wrmsrl(MSR_KVM_HOST_SUSPEND_TIME, + slow_virt_to_phys(&suspend_time) | KVM_MSR_ENABLED); + } +#endif + if (cpuhp_setup_state(CPUHP_BP_PREPARE_DYN, "kvmclock:setup_percpu", kvmclock_setup_percpu, NULL) < 0) { return; diff --git a/include/linux/timekeeper_internal.h b/include/linux/timekeeper_internal.h index 84ff2844df2a..a5fd515f0a9d 100644 --- a/include/linux/timekeeper_internal.h +++ b/include/linux/timekeeper_internal.h @@ -124,6 +124,10 @@ struct timekeeper { u32 ntp_err_mult; /* Flag used to avoid updating NTP twice with same second */ u32 skip_second_overflow; +#ifdef CONFIG_KVM_VIRT_SUSPEND_TIMING_GUEST + /* suspend_time_injected keeps the duration injected through kvm */ + u64 suspend_time_injected; +#endif #ifdef CONFIG_DEBUG_TIMEKEEPING long last_warning; /* diff --git a/kernel/time/timekeeping.c b/kernel/time/timekeeping.c index 342a032ad552..6b89e0d42596 100644 --- a/kernel/time/timekeeping.c +++ b/kernel/time/timekeeping.c @@ -2114,6 +2114,29 @@ static u64 logarithmic_accumulation(struct timekeeper *tk, u64 offset, return offset; } +#ifdef CONFIG_KVM_VIRT_SUSPEND_TIMING_GUEST +/* + * timekeeping_inject_suspend_time - Inject virtual suspend time + * if it is requested by kvm host. + * This function should be called under holding timekeeper_lock and + * only from timekeeping_advance(). + */ +static void timekeeping_inject_virtual_suspend_time(struct timekeeper *tk) +{ + struct timespec64 delta; + u64 suspend_time; + + suspend_time = kvm_get_suspend_time(); + if (suspend_time <= tk->suspend_time_injected) { + /* Sufficient amount of suspend time is already injected. */ + return; + } + delta = ns_to_timespec64(suspend_time - tk->suspend_time_injected); + __timekeeping_inject_sleeptime(tk, &delta); + tk->suspend_time_injected = suspend_time; +} +#endif + /* * timekeeping_advance - Updates the timekeeper to the current time and * current NTP tick length @@ -2143,6 +2166,10 @@ static void timekeeping_advance(enum timekeeping_adv_mode mode) /* Do some additional sanity checking */ timekeeping_check_update(tk, offset); +#ifdef CONFIG_KVM_VIRT_SUSPEND_TIMING_GUEST + timekeeping_inject_virtual_suspend_time(tk); +#endif + /* * With NO_HZ we may have to accumulate many cycle_intervals * (think "ticks") worth of time at once. To do this efficiently, -- 2.31.1.498.g6c1eba8ee3d-goog