Sean noticed that KVM_GET_CLOCK was checking kvm_arch.use_master_clock outside of the pvclock sync lock. This is problematic, as the clock value written to the user may or may not actually correspond to a stable TSC. Fix the race by populating the entire kvm_clock_data structure behind the pvclock_gtod_sync_lock. Suggested-by: Sean Christopherson <seanjc@xxxxxxxxxx> Signed-off-by: Oliver Upton <oupton@xxxxxxxxxx> --- arch/x86/kvm/x86.c | 39 ++++++++++++++++++++++++++++----------- 1 file changed, 28 insertions(+), 11 deletions(-) diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c index fdc0c18339fb..2f3929bd5f58 100644 --- a/arch/x86/kvm/x86.c +++ b/arch/x86/kvm/x86.c @@ -2787,19 +2787,20 @@ static void kvm_update_masterclock(struct kvm *kvm) kvm_end_pvclock_update(kvm); } -u64 get_kvmclock_ns(struct kvm *kvm) +static void get_kvmclock(struct kvm *kvm, struct kvm_clock_data *data) { struct kvm_arch *ka = &kvm->arch; struct pvclock_vcpu_time_info hv_clock; unsigned long flags; - u64 ret; spin_lock_irqsave(&ka->pvclock_gtod_sync_lock, flags); if (!ka->use_master_clock) { spin_unlock_irqrestore(&ka->pvclock_gtod_sync_lock, flags); - return get_kvmclock_base_ns() + ka->kvmclock_offset; + data->clock = get_kvmclock_base_ns() + ka->kvmclock_offset; + return; } + data->flags |= KVM_CLOCK_TSC_STABLE; hv_clock.tsc_timestamp = ka->master_cycle_now; hv_clock.system_time = ka->master_kernel_ns + ka->kvmclock_offset; spin_unlock_irqrestore(&ka->pvclock_gtod_sync_lock, flags); @@ -2811,13 +2812,26 @@ u64 get_kvmclock_ns(struct kvm *kvm) kvm_get_time_scale(NSEC_PER_SEC, __this_cpu_read(cpu_tsc_khz) * 1000LL, &hv_clock.tsc_shift, &hv_clock.tsc_to_system_mul); - ret = __pvclock_read_cycles(&hv_clock, rdtsc()); - } else - ret = get_kvmclock_base_ns() + ka->kvmclock_offset; + data->clock = __pvclock_read_cycles(&hv_clock, rdtsc()); + } else { + data->clock = get_kvmclock_base_ns() + ka->kvmclock_offset; + } put_cpu(); +} - return ret; +u64 get_kvmclock_ns(struct kvm *kvm) +{ + struct kvm_clock_data data; + + /* + * Zero flags as it's accessed RMW, leave everything else uninitialized + * as clock is always written and no other fields are consumed. + */ + data.flags = 0; + + get_kvmclock(kvm, &data); + return data.clock; } static void kvm_setup_pvclock_page(struct kvm_vcpu *v, @@ -6098,11 +6112,14 @@ long kvm_arch_vm_ioctl(struct file *filp, } case KVM_GET_CLOCK: { struct kvm_clock_data user_ns; - u64 now_ns; - now_ns = get_kvmclock_ns(kvm); - user_ns.clock = now_ns; - user_ns.flags = kvm->arch.use_master_clock ? KVM_CLOCK_TSC_STABLE : 0; + /* + * Zero flags as it is accessed RMW, leave everything else + * uninitialized as clock is always written and no other fields + * are consumed. + */ + user_ns.flags = 0; + get_kvmclock(kvm, &user_ns); memset(&user_ns.pad, 0, sizeof(user_ns.pad)); r = -EFAULT; -- 2.33.0.rc1.237.g0d66db33f3-goog