Re: [RHEL 8.3 BZ 1768622 PATCH 3/3] KVM: x86: use raw clock values consistently

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

 



Hi Marcelo,

I'm marking this as superseded by pwid 303977

http://patchwork.lab.bos.redhat.com/patch/303977/

which was included in kernel-4.18.0-196.el8

    commit 9751522d92195bc64883c71e2bee8ed0fcbc5007
    Author: Vitaly Kuznetsov <vkuznets@xxxxxxxxxx>
    Date:   Mon Apr 20 15:20:04 2020 -0400

    [x86] kvm: x86: use raw clock values consistently

    Message-id: <20200420152004.933168-1-vkuznets@xxxxxxxxxx>
    Patchwork-id: 303977
    Patchwork-instance: patchwork
    O-Subject: [RHEL8.3 virt PATCH v2 312/614] KVM: x86: use raw clock values consistently
    Bugzilla: 1813987
    RH-Acked-by: Andrew Jones <drjones@xxxxxxxxxx>
    RH-Acked-by: Paolo Bonzini <pbonzini@xxxxxxxxxx>
    RH-Acked-by: Tony Camuso <tcamuso@xxxxxxxxxx>

Note there is a difference between yours and Vitaly's patch in the
following hunk

+@@ -1656,6 +1656,18 @@ static void update_pvclock_gtod(struct timekeeper *tk)

        write_seqcount_end(&vdata->seq);
  }
@@ -14,12 +16,12 @@
 +static s64 get_kvmclock_base_ns(void)
 +{
 +      /* Master clock not used, so we can just use CLOCK_BOOTTIME.  */
-+      return ktime_get_boottime_ns();
++      return ktime_get_boot_ns();
 +}
  #endif


This is in !CONFIG_X86_64 path, so I'm not sure how much we care about
this. Anyway Vitaly's patch corrects this, because
rhel does not have upstream commit 9285ec4c8b61d4930a575081abeba2cd4f449a74

Thank you

On Mon, May 18, 2020 at 04:15:19PM -0300, Marcelo Tosatti wrote:
> BZ: 1768622 
> Brew: https://brewweb.engineering.redhat.com/brew/taskinfo?taskID=28587593
> commit 8171cd68806bd2fc28ef688e32fb2a3b3deb04e5
> 
> Commit 53fafdbb8b21f ("KVM: x86: switch KVMCLOCK base to monotonic raw
> clock") changed kvmclock to use tkr_raw instead of tkr_mono.  However,
> the default kvmclock_offset for the VM was still based on the monotonic
> clock and, if the raw clock drifted enough from the monotonic clock,
> this could cause a negative system_time to be written to the guest's
> struct pvclock.  RHEL5 does not like it and (if it boots fast enough to
> observe a negative time value) it hangs.
> 
> There is another thing to be careful about: getboottime64 returns the
> host boot time with tkr_mono frequency, and subtracting the tkr_raw-based
> kvmclock value will cause the wallclock to be off if tkr_raw drifts
> from tkr_mono.  To avoid this, compute the wallclock delta from the
> current time instead of being clever and using getboottime64.
> 
> Fixes: 53fafdbb8b21f ("KVM: x86: switch KVMCLOCK base to monotonic raw clock")
> Cc: stable@xxxxxxxxxxxxxxx
> Reviewed-by: Vitaly Kuznetsov <vkuznets@xxxxxxxxxx>
> Signed-off-by: Paolo Bonzini <pbonzini@xxxxxxxxxx>
> 
> Index: kernel-rhel/arch/x86/kvm/x86.c
> ===================================================================
> --- kernel-rhel.orig/arch/x86/kvm/x86.c
> +++ kernel-rhel/arch/x86/kvm/x86.c
> @@ -1595,6 +1595,18 @@ static void update_pvclock_gtod(struct t
>  
>  	write_seqcount_end(&vdata->seq);
>  }
> +
> +static s64 get_kvmclock_base_ns(void)
> +{
> +	/* Count up from boot time, but with the frequency of the raw clock.  */
> +	return ktime_to_ns(ktime_add(ktime_get_raw(), pvclock_gtod_data.offs_boot));
> +}
> +#else
> +static s64 get_kvmclock_base_ns(void)
> +{
> +	/* Master clock not used, so we can just use CLOCK_BOOTTIME.  */
> +	return ktime_get_boottime_ns();
> +}
>  #endif
>  
>  void kvm_set_pending_timer(struct kvm_vcpu *vcpu)
> @@ -1608,7 +1620,7 @@ static void kvm_write_wall_clock(struct
>  	int version;
>  	int r;
>  	struct pvclock_wall_clock wc;
> -	struct timespec64 boot;
> +	u64 wall_nsec;
>  
>  	if (!wall_clock)
>  		return;
> @@ -1628,17 +1640,12 @@ static void kvm_write_wall_clock(struct
>  	/*
>  	 * The guest calculates current wall clock time by adding
>  	 * system time (updated by kvm_guest_time_update below) to the
> -	 * wall clock specified here.  guest system time equals host
> -	 * system time for us, thus we must fill in host boot time here.
> +	 * wall clock specified here.  We do the reverse here.
>  	 */
> -	getboottime64(&boot);
> +	wall_nsec = ktime_get_real_ns() - get_kvmclock_ns(kvm);
>  
> -	if (kvm->arch.kvmclock_offset) {
> -		struct timespec64 ts = ns_to_timespec64(kvm->arch.kvmclock_offset);
> -		boot = timespec64_sub(boot, ts);
> -	}
> -	wc.sec = (u32)boot.tv_sec; /* overflow in 2106 guest time */
> -	wc.nsec = boot.tv_nsec;
> +	wc.nsec = do_div(wall_nsec, 1000000000);
> +	wc.sec = (u32)wall_nsec; /* overflow in 2106 guest time */
>  	wc.version = version;
>  
>  	kvm_write_guest(kvm, wall_clock, &wc, sizeof(wc));
> @@ -1886,7 +1893,7 @@ void kvm_write_tsc(struct kvm_vcpu *vcpu
>  
>  	raw_spin_lock_irqsave(&kvm->arch.tsc_write_lock, flags);
>  	offset = kvm_compute_tsc_offset(vcpu, data);
> -	ns = ktime_get_boot_ns();
> +	ns = get_kvmclock_base_ns();
>  	elapsed = ns - kvm->arch.last_tsc_nsec;
>  
>  	if (vcpu->arch.virtual_tsc_khz) {
> @@ -2224,7 +2231,7 @@ u64 get_kvmclock_ns(struct kvm *kvm)
>  	spin_lock(&ka->pvclock_gtod_sync_lock);
>  	if (!ka->use_master_clock) {
>  		spin_unlock(&ka->pvclock_gtod_sync_lock);
> -		return ktime_get_boot_ns() + ka->kvmclock_offset;
> +		return get_kvmclock_base_ns() + ka->kvmclock_offset;
>  	}
>  
>  	hv_clock.tsc_timestamp = ka->master_cycle_now;
> @@ -2240,7 +2247,7 @@ u64 get_kvmclock_ns(struct kvm *kvm)
>  				   &hv_clock.tsc_to_system_mul);
>  		ret = __pvclock_read_cycles(&hv_clock, rdtsc());
>  	} else
> -		ret = ktime_get_boot_ns() + ka->kvmclock_offset;
> +		ret = get_kvmclock_base_ns() + ka->kvmclock_offset;
>  
>  	put_cpu();
>  
> @@ -2339,7 +2346,7 @@ static int kvm_guest_time_update(struct
>  	}
>  	if (!use_master_clock) {
>  		host_tsc = rdtsc();
> -		kernel_ns = ktime_get_boot_ns();
> +		kernel_ns = get_kvmclock_base_ns();
>  	}
>  
>  	tsc_timestamp = kvm_read_l1_tsc(v, host_tsc);
> @@ -2379,6 +2386,7 @@ static int kvm_guest_time_update(struct
>  	vcpu->hv_clock.tsc_timestamp = tsc_timestamp;
>  	vcpu->hv_clock.system_time = kernel_ns + v->kvm->arch.kvmclock_offset;
>  	vcpu->last_guest_tsc = tsc_timestamp;
> +	WARN_ON(vcpu->hv_clock.system_time < 0);
>  
>  	/* If the host uses TSC clocksource, then it is stable */
>  	pvclock_flags = 0;
> @@ -9486,7 +9494,7 @@ int kvm_arch_init_vm(struct kvm *kvm, un
>  	mutex_init(&kvm->arch.apic_map_lock);
>  	spin_lock_init(&kvm->arch.pvclock_gtod_sync_lock);
>  
> -	kvm->arch.kvmclock_offset = -ktime_get_boot_ns();
> +	kvm->arch.kvmclock_offset = -get_kvmclock_base_ns();
>  	pvclock_update_vm_gtod_copy(kvm);
>  
>  	kvm->arch.guest_can_read_msr_platform_info = true;
> 
> 

-- 
Frantisek Hrbata




[Index of Archives]     [Linux Kernel]     [Kernel Development Newbies]     [Linux USB Devel]     [Video for Linux]     [Linux Audio Users]     [Yosemite Hiking]     [Linux Kernel]     [Linux SCSI]

  Powered by Linux