Before kernel commit 78db6a5037965429c04d708281f35a6e5562d31b, kvm_guest_time_update() would use vcpu->virtual_tsc_khz to calculate tsc_shift value in the vcpus pvclock structure written to guest memory. For those kernels, if vcpu->virtual_tsc_khz != tsc_khz (which can be the case when guest state is restored via migration, or if tsc-khz option is passed to QEMU), and TSC scaling is not enabled (which happens if the difference between the frequency requested via KVM_SET_TSC_KHZ and the host TSC KHZ is smaller than 250ppm), then there can be a difference between what KVM_GET_CLOCK would return and what the guest reads as kvmclock value. The effect is that the guest sees a jump in kvmclock value (either forwards or backwards) in such case. To fix incoming migration from pre-78db6a5037965 hosts, read kvmclock value from guest memory. Unless the KVM_CLOCK_CORRECT_TSC_SHIFT bit indicates that the value retrieved by KVM_GET_CLOCK on the source is safe to be used. Signed-off-by: Marcelo Tosatti <mtosatti@xxxxxxxxxx> Index: qemu/hw/i386/kvm/clock.c =================================================================== --- qemu.orig/hw/i386/kvm/clock.c +++ qemu/hw/i386/kvm/clock.c @@ -50,6 +50,16 @@ struct KVMClockState { /* whether the 'clock' value was obtained in a host with * reliable KVM_GET_CLOCK */ bool clock_is_reliable; + + /* whether machine type supports correct_tsc_shift */ + bool mach_use_correct_tsc_shift; + + /* + * whether the 'clock' value was obtained in a host + * that computes correct tsc_shift field (the one + * written to guest memory) + */ + bool clock_correct_tsc_shift; }; struct pvclock_vcpu_time_info { @@ -150,6 +160,8 @@ static void kvm_update_clock(KVMClockSta * read from memory */ s->clock_is_reliable = kvm_has_adjust_clock_stable(); + + s->clock_correct_tsc_shift = kvm_has_correct_tsc_shift(); } static void do_kvmclock_ctrl(CPUState *cpu, run_on_cpu_data data) @@ -176,7 +188,7 @@ static void kvmclock_vm_state_change(voi * If the host where s->clock was read did not support reliable * KVM_GET_CLOCK, read kvmclock value from memory. */ - if (!s->clock_is_reliable) { + if (!s->clock_is_reliable || !s->clock_correct_tsc_shift) { uint64_t pvclock_via_mem = kvmclock_current_nsec(s); /* We can't rely on the saved clock value, just discard it */ if (pvclock_via_mem) { @@ -252,14 +264,40 @@ static const VMStateDescription kvmclock }; /* + * Sending clock_correct_tsc_shift=true means that the destination + * can use VMSTATE_UINT64(clock, KVMClockState) value, + * instead of reading from guest memory. + */ +static bool kvmclock_clock_correct_tsc_shift_needed(void *opaque) +{ + KVMClockState *s = opaque; + + return s->mach_use_correct_tsc_shift; +} + +static const VMStateDescription kvmclock_correct_tsc_shift = { + .name = "kvmclock/clock_correct_tsc_shift", + .version_id = 1, + .minimum_version_id = 1, + .needed = kvmclock_clock_correct_tsc_shift_needed, + .fields = (VMStateField[]) { + VMSTATE_BOOL(clock_correct_tsc_shift, KVMClockState), + VMSTATE_END_OF_LIST() + } +}; + +/* * When migrating, assume the source has an unreliable - * KVM_GET_CLOCK unless told otherwise. + * KVM_GET_CLOCK (and computes tsc shift + * in guest memory using vcpu->virtual_tsc_khz), + * unless told otherwise. */ static int kvmclock_pre_load(void *opaque) { KVMClockState *s = opaque; s->clock_is_reliable = false; + s->clock_correct_tsc_shift = false; return 0; } @@ -301,6 +339,7 @@ static const VMStateDescription kvmclock }, .subsections = (const VMStateDescription * []) { &kvmclock_reliable_get_clock, + &kvmclock_correct_tsc_shift, NULL } }; @@ -308,6 +347,8 @@ static const VMStateDescription kvmclock static Property kvmclock_properties[] = { DEFINE_PROP_BOOL("x-mach-use-reliable-get-clock", KVMClockState, mach_use_reliable_get_clock, true), + DEFINE_PROP_BOOL("x-mach-use-correct-tsc-shift", KVMClockState, + mach_use_correct_tsc_shift, true), DEFINE_PROP_END_OF_LIST(), }; Index: qemu/target/i386/kvm/kvm.c =================================================================== --- qemu.orig/target/i386/kvm/kvm.c +++ qemu/target/i386/kvm/kvm.c @@ -164,6 +164,13 @@ bool kvm_has_adjust_clock_stable(void) return (ret & KVM_CLOCK_TSC_STABLE); } +bool kvm_has_correct_tsc_shift(void) +{ + int ret = kvm_check_extension(kvm_state, KVM_CAP_ADJUST_CLOCK); + + return ret & KVM_CLOCK_CORRECT_TSC_SHIFT; +} + bool kvm_has_adjust_clock(void) { return kvm_check_extension(kvm_state, KVM_CAP_ADJUST_CLOCK); Index: qemu/target/i386/kvm/kvm_i386.h =================================================================== --- qemu.orig/target/i386/kvm/kvm_i386.h +++ qemu/target/i386/kvm/kvm_i386.h @@ -35,6 +35,7 @@ bool kvm_has_smm(void); bool kvm_has_adjust_clock(void); bool kvm_has_adjust_clock_stable(void); +bool kvm_has_correct_tsc_shift(void); bool kvm_has_exception_payload(void); void kvm_synchronize_all_tsc(void); void kvm_arch_reset_vcpu(X86CPU *cs);