Hi Marcelo, I'm marking this as superseded by pwid 303977 http://patchwork.lab.bos.redhat.com/patch/303977/ which was included in kernel-4.18.0-196.el8 commit 9751522d92195bc64883c71e2bee8ed0fcbc5007 Author: Vitaly Kuznetsov <vkuznets@xxxxxxxxxx> Date: Mon Apr 20 15:20:04 2020 -0400 [x86] kvm: x86: use raw clock values consistently Message-id: <20200420152004.933168-1-vkuznets@xxxxxxxxxx> Patchwork-id: 303977 Patchwork-instance: patchwork O-Subject: [RHEL8.3 virt PATCH v2 312/614] KVM: x86: use raw clock values consistently Bugzilla: 1813987 RH-Acked-by: Andrew Jones <drjones@xxxxxxxxxx> RH-Acked-by: Paolo Bonzini <pbonzini@xxxxxxxxxx> RH-Acked-by: Tony Camuso <tcamuso@xxxxxxxxxx> Note there is a difference between yours and Vitaly's patch in the following hunk +@@ -1656,6 +1656,18 @@ static void update_pvclock_gtod(struct timekeeper *tk) write_seqcount_end(&vdata->seq); } @@ -14,12 +16,12 @@ +static s64 get_kvmclock_base_ns(void) +{ + /* Master clock not used, so we can just use CLOCK_BOOTTIME. */ -+ return ktime_get_boottime_ns(); ++ return ktime_get_boot_ns(); +} #endif This is in !CONFIG_X86_64 path, so I'm not sure how much we care about this. Anyway Vitaly's patch corrects this, because rhel does not have upstream commit 9285ec4c8b61d4930a575081abeba2cd4f449a74 Thank you On Mon, May 18, 2020 at 04:15:19PM -0300, Marcelo Tosatti wrote: > BZ: 1768622 > Brew: https://brewweb.engineering.redhat.com/brew/taskinfo?taskID=28587593 > commit 8171cd68806bd2fc28ef688e32fb2a3b3deb04e5 > > Commit 53fafdbb8b21f ("KVM: x86: switch KVMCLOCK base to monotonic raw > clock") changed kvmclock to use tkr_raw instead of tkr_mono. However, > the default kvmclock_offset for the VM was still based on the monotonic > clock and, if the raw clock drifted enough from the monotonic clock, > this could cause a negative system_time to be written to the guest's > struct pvclock. RHEL5 does not like it and (if it boots fast enough to > observe a negative time value) it hangs. > > There is another thing to be careful about: getboottime64 returns the > host boot time with tkr_mono frequency, and subtracting the tkr_raw-based > kvmclock value will cause the wallclock to be off if tkr_raw drifts > from tkr_mono. To avoid this, compute the wallclock delta from the > current time instead of being clever and using getboottime64. > > Fixes: 53fafdbb8b21f ("KVM: x86: switch KVMCLOCK base to monotonic raw clock") > Cc: stable@xxxxxxxxxxxxxxx > Reviewed-by: Vitaly Kuznetsov <vkuznets@xxxxxxxxxx> > Signed-off-by: Paolo Bonzini <pbonzini@xxxxxxxxxx> > > Index: kernel-rhel/arch/x86/kvm/x86.c > =================================================================== > --- kernel-rhel.orig/arch/x86/kvm/x86.c > +++ kernel-rhel/arch/x86/kvm/x86.c > @@ -1595,6 +1595,18 @@ static void update_pvclock_gtod(struct t > > write_seqcount_end(&vdata->seq); > } > + > +static s64 get_kvmclock_base_ns(void) > +{ > + /* Count up from boot time, but with the frequency of the raw clock. */ > + return ktime_to_ns(ktime_add(ktime_get_raw(), pvclock_gtod_data.offs_boot)); > +} > +#else > +static s64 get_kvmclock_base_ns(void) > +{ > + /* Master clock not used, so we can just use CLOCK_BOOTTIME. */ > + return ktime_get_boottime_ns(); > +} > #endif > > void kvm_set_pending_timer(struct kvm_vcpu *vcpu) > @@ -1608,7 +1620,7 @@ static void kvm_write_wall_clock(struct > int version; > int r; > struct pvclock_wall_clock wc; > - struct timespec64 boot; > + u64 wall_nsec; > > if (!wall_clock) > return; > @@ -1628,17 +1640,12 @@ static void kvm_write_wall_clock(struct > /* > * The guest calculates current wall clock time by adding > * system time (updated by kvm_guest_time_update below) to the > - * wall clock specified here. guest system time equals host > - * system time for us, thus we must fill in host boot time here. > + * wall clock specified here. We do the reverse here. > */ > - getboottime64(&boot); > + wall_nsec = ktime_get_real_ns() - get_kvmclock_ns(kvm); > > - if (kvm->arch.kvmclock_offset) { > - struct timespec64 ts = ns_to_timespec64(kvm->arch.kvmclock_offset); > - boot = timespec64_sub(boot, ts); > - } > - wc.sec = (u32)boot.tv_sec; /* overflow in 2106 guest time */ > - wc.nsec = boot.tv_nsec; > + wc.nsec = do_div(wall_nsec, 1000000000); > + wc.sec = (u32)wall_nsec; /* overflow in 2106 guest time */ > wc.version = version; > > kvm_write_guest(kvm, wall_clock, &wc, sizeof(wc)); > @@ -1886,7 +1893,7 @@ void kvm_write_tsc(struct kvm_vcpu *vcpu > > raw_spin_lock_irqsave(&kvm->arch.tsc_write_lock, flags); > offset = kvm_compute_tsc_offset(vcpu, data); > - ns = ktime_get_boot_ns(); > + ns = get_kvmclock_base_ns(); > elapsed = ns - kvm->arch.last_tsc_nsec; > > if (vcpu->arch.virtual_tsc_khz) { > @@ -2224,7 +2231,7 @@ u64 get_kvmclock_ns(struct kvm *kvm) > spin_lock(&ka->pvclock_gtod_sync_lock); > if (!ka->use_master_clock) { > spin_unlock(&ka->pvclock_gtod_sync_lock); > - return ktime_get_boot_ns() + ka->kvmclock_offset; > + return get_kvmclock_base_ns() + ka->kvmclock_offset; > } > > hv_clock.tsc_timestamp = ka->master_cycle_now; > @@ -2240,7 +2247,7 @@ u64 get_kvmclock_ns(struct kvm *kvm) > &hv_clock.tsc_to_system_mul); > ret = __pvclock_read_cycles(&hv_clock, rdtsc()); > } else > - ret = ktime_get_boot_ns() + ka->kvmclock_offset; > + ret = get_kvmclock_base_ns() + ka->kvmclock_offset; > > put_cpu(); > > @@ -2339,7 +2346,7 @@ static int kvm_guest_time_update(struct > } > if (!use_master_clock) { > host_tsc = rdtsc(); > - kernel_ns = ktime_get_boot_ns(); > + kernel_ns = get_kvmclock_base_ns(); > } > > tsc_timestamp = kvm_read_l1_tsc(v, host_tsc); > @@ -2379,6 +2386,7 @@ static int kvm_guest_time_update(struct > vcpu->hv_clock.tsc_timestamp = tsc_timestamp; > vcpu->hv_clock.system_time = kernel_ns + v->kvm->arch.kvmclock_offset; > vcpu->last_guest_tsc = tsc_timestamp; > + WARN_ON(vcpu->hv_clock.system_time < 0); > > /* If the host uses TSC clocksource, then it is stable */ > pvclock_flags = 0; > @@ -9486,7 +9494,7 @@ int kvm_arch_init_vm(struct kvm *kvm, un > mutex_init(&kvm->arch.apic_map_lock); > spin_lock_init(&kvm->arch.pvclock_gtod_sync_lock); > > - kvm->arch.kvmclock_offset = -ktime_get_boot_ns(); > + kvm->arch.kvmclock_offset = -get_kvmclock_base_ns(); > pvclock_update_vm_gtod_copy(kvm); > > kvm->arch.guest_can_read_msr_platform_info = true; > > -- Frantisek Hrbata