From: Like Xu <likexu@xxxxxxxxxxx> Add kvm->arch.user_set_tsc to avoid synchronization on the first write from userspace so as not to misconstrue state restoration after live migration as an attempt from userspace to synchronize. More precisely, the problem is that the sync code doesn't differentiate between userspace initializing the TSC and userspace attempting to synchronize the TSC. Reported-by: Yong He <alexyonghe@xxxxxxxxxxx> Closes: https://bugzilla.kernel.org/show_bug.cgi?id=217423 Suggested-by: Sean Christopherson <seanjc@xxxxxxxxxx> Suggested-by: Oliver Upton <oliver.upton@xxxxxxxxx> Original-by: Sean Christopherson <seanjc@xxxxxxxxxx> Tested-by: Like Xu <likexu@xxxxxxxxxxx> Signed-off-by: Like Xu <likexu@xxxxxxxxxxx> --- V4 -> V5 Changelog: - Making kvm_synchronize_tsc(@data) a pointer and passing NULL; (Sean) - Refine commit message in a more accurate way; (Sean) V4: https://lore.kernel.org/kvm/20230801034524.64007-1-likexu@xxxxxxxxxxx/ arch/x86/include/asm/kvm_host.h | 1 + arch/x86/kvm/x86.c | 25 ++++++++++++++++--------- 2 files changed, 17 insertions(+), 9 deletions(-) diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h index 1a4def36d5bb..9a7dfef9d32d 100644 --- a/arch/x86/include/asm/kvm_host.h +++ b/arch/x86/include/asm/kvm_host.h @@ -1324,6 +1324,7 @@ struct kvm_arch { int nr_vcpus_matched_tsc; u32 default_tsc_khz; + bool user_set_tsc; seqcount_raw_spinlock_t pvclock_sc; bool use_master_clock; diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c index 6c9c81e82e65..0fef6ed69cbb 100644 --- a/arch/x86/kvm/x86.c +++ b/arch/x86/kvm/x86.c @@ -2714,8 +2714,9 @@ static void __kvm_synchronize_tsc(struct kvm_vcpu *vcpu, u64 offset, u64 tsc, kvm_track_tsc_matching(vcpu); } -static void kvm_synchronize_tsc(struct kvm_vcpu *vcpu, u64 data) +static void kvm_synchronize_tsc(struct kvm_vcpu *vcpu, u64 *user_value) { + u64 data = user_value ? *user_value : 0; struct kvm *kvm = vcpu->kvm; u64 offset, ns, elapsed; unsigned long flags; @@ -2728,14 +2729,17 @@ static void kvm_synchronize_tsc(struct kvm_vcpu *vcpu, u64 data) elapsed = ns - kvm->arch.last_tsc_nsec; if (vcpu->arch.virtual_tsc_khz) { + /* + * Force synchronization when creating or hotplugging a vCPU, + * i.e. when the TSC value is '0', to help keep clocks stable. + * If this is NOT a hotplug/creation case, skip synchronization + * on the first write from userspace so as not to misconstrue + * state restoration after live migration as an attempt from + * userspace to synchronize. + */ if (data == 0) { - /* - * detection of vcpu initialization -- need to sync - * with other vCPUs. This particularly helps to keep - * kvm_clock stable after CPU hotplug - */ synchronizing = true; - } else { + } else if (kvm->arch.user_set_tsc) { u64 tsc_exp = kvm->arch.last_tsc_write + nsec_to_cycles(vcpu, elapsed); u64 tsc_hz = vcpu->arch.virtual_tsc_khz * 1000LL; @@ -2749,6 +2753,9 @@ static void kvm_synchronize_tsc(struct kvm_vcpu *vcpu, u64 data) } } + if (user_value) + kvm->arch.user_set_tsc = true; + /* * For a reliable TSC, we can match TSC offsets, and for an unstable * TSC, we add elapsed time in this computation. We could let the @@ -3777,7 +3784,7 @@ int kvm_set_msr_common(struct kvm_vcpu *vcpu, struct msr_data *msr_info) break; case MSR_IA32_TSC: if (msr_info->host_initiated) { - kvm_synchronize_tsc(vcpu, data); + kvm_synchronize_tsc(vcpu, &data); } else { u64 adj = kvm_compute_l1_tsc_offset(vcpu, data) - vcpu->arch.l1_tsc_offset; adjust_tsc_offset_guest(vcpu, adj); @@ -11959,7 +11966,7 @@ void kvm_arch_vcpu_postcreate(struct kvm_vcpu *vcpu) if (mutex_lock_killable(&vcpu->mutex)) return; vcpu_load(vcpu); - kvm_synchronize_tsc(vcpu, 0); + kvm_synchronize_tsc(vcpu, NULL); vcpu_put(vcpu); /* poll control enabled by default */ base-commit: 0bb80ecc33a8fb5a682236443c1e740d5c917d1d -- 2.42.0