On Thu, Jan 23, 2025, David Woodhouse wrote: > On Thu, 2025-01-23 at 19:02 +0000, Fred Griffoul wrote: > > > +static inline void kvm_xen_may_update_tsc_info(struct kvm_vcpu *vcpu, > > + u32 function, u32 index, > > + u32 *eax, u32 *ecx, u32 *edx) > > Should this be called kvm_xen_maybe_update_tsc_info() ? > > Is it worth adding if (static_branch_unlikely(&kvm_xen_enabled.key))? Or add a helper? Especially if we end up processing KVM_REQ_CLOCK_UPDATE. static inline bool kvm_xen_is_tsc_leaf(struct kvm_vcpu *vcpu, u32 function) { return static_branch_unlikely(&kvm_xen_enabled.key) && vcpu->arch.xen.cpuid.base && function < vcpu->arch.xen.cpuid.limit; function == (vcpu->arch.xen.cpuid.base | XEN_CPUID_LEAF(3)); } > > > +{ > > + u32 base = vcpu->arch.xen.cpuid.base; > > + > > + if (base && (function == (base | XEN_CPUID_LEAF(3)))) { Pretty sure cpuid.limit needs to be checked, e.g. to avoid a false positive in the unlikely scenario that userspace advertised a lower limit but still filled the CPUID entry. > > + if (index == 1) { > > + *ecx = vcpu->arch.hv_clock.tsc_to_system_mul; > > + *edx = vcpu->arch.hv_clock.tsc_shift; > > Are these fields in vcpu->arch.hv_clock definitely going to be set? Set, yes. Up-to-date, no. If there is a pending KVM_REQ_CLOCK_UPDATE, e.g. due to frequency change, KVM could emulate CPUID before processing the request if the CPUID VM-Exit occurred before the request was made. > If so, can we have a comment to that effect? And perhaps a warning to > assert the truth of that claim? > > Before this patch, if the hv_clock isn't yet set then the guest would > see the original content of the leaves as set by userspace? In theory, yes, but in practice that can't happen because KVM always pends a KVM_REQ_CLOCK_UPDATE before entering the guest (it's stupidly hard to see). On the first kvm_arch_vcpu_load(), vcpu->cpu will be -1, which results in KVM_REQ_GLOBAL_CLOCK_UPDATE being pending. if (unlikely(vcpu->cpu != cpu) || kvm_check_tsc_unstable()) { ... if (!vcpu->kvm->arch.use_master_clock || vcpu->cpu == -1) kvm_make_request(KVM_REQ_GLOBAL_CLOCK_UPDATE, vcpu); } That in turn triggers a KVM_REQ_CLOCK_UPDATE. if (kvm_check_request(KVM_REQ_GLOBAL_CLOCK_UPDATE, vcpu)) kvm_gen_kvmclock_update(vcpu); static void kvm_gen_kvmclock_update(struct kvm_vcpu *v) { struct kvm *kvm = v->kvm; kvm_make_request(KVM_REQ_CLOCK_UPDATE, v); schedule_delayed_work(&kvm->arch.kvmclock_update_work, KVMCLOCK_UPDATE_DELAY); } And in the extremely unlikely failure path, which I assume handles the case where TSC calibration hasn't completed, KVM requests another KVM_REQ_CLOCK_UPDATE and aborts VM-Enter. So AFAICT, it's impossible to trigger CPUID emulation without first stuffing Xen CPUID. /* Keep irq disabled to prevent changes to the clock */ local_irq_save(flags); tgt_tsc_khz = get_cpu_tsc_khz(); if (unlikely(tgt_tsc_khz == 0)) { local_irq_restore(flags); kvm_make_request(KVM_REQ_CLOCK_UPDATE, v); return 1; } > Now it gets zeroes if that happens? Somewhat of a tangent, if userspace is providing non-zero values, commit f422f853af03 ("KVM: x86/xen: update Xen CPUID Leaf 4 (tsc info) sub-leaves, if present") would have broken userspace. QEMU doesn't appear to stuff non-zero values and no one has complained, so I think we escaped this time. Jumping back to the code, if we add kvm_xen_is_tsc_leaf(), I would be a-ok with handling the CPUID manipulations in kvm_cpuid(). I'd probably even prefer it, because overall I think bleeding a few Xen details into common code is worth making the flow easier to follow. Putting it all together, something like this? Compile tested only. --- arch/x86/kvm/cpuid.c | 16 ++++++++++++++++ arch/x86/kvm/x86.c | 3 +-- arch/x86/kvm/x86.h | 1 + arch/x86/kvm/xen.c | 23 ----------------------- arch/x86/kvm/xen.h | 13 +++++++++++-- 5 files changed, 29 insertions(+), 27 deletions(-) diff --git a/arch/x86/kvm/cpuid.c b/arch/x86/kvm/cpuid.c index edef30359c19..689882326618 100644 --- a/arch/x86/kvm/cpuid.c +++ b/arch/x86/kvm/cpuid.c @@ -2005,6 +2005,22 @@ bool kvm_cpuid(struct kvm_vcpu *vcpu, u32 *eax, u32 *ebx, } else if (function == 0x80000007) { if (kvm_hv_invtsc_suppressed(vcpu)) *edx &= ~feature_bit(CONSTANT_TSC); + } else if (IS_ENABLED(CONFIG_KVM_XEN) && + kvm_xen_is_tsc_leaf(vcpu, function)) { + /* + * Update guest TSC frequency information is necessary. + * Ignore failures, there is no sane value that can be + * provided if KVM can't get the TSC frequency. + */ + if (kvm_check_request(KVM_REQ_CLOCK_UPDATE, vcpu)) + kvm_guest_time_update(vcpu); + + if (index == 1) { + *ecx = vcpu->arch.hv_clock.tsc_to_system_mul; + *edx = vcpu->arch.hv_clock.tsc_shift; + } else if (index == 2) { + *eax = vcpu->arch.hw_tsc_khz; + } } } else { *eax = *ebx = *ecx = *edx = 0; diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c index f61d71783d07..817a7e522935 100644 --- a/arch/x86/kvm/x86.c +++ b/arch/x86/kvm/x86.c @@ -3173,7 +3173,7 @@ static void kvm_setup_guest_pvclock(struct kvm_vcpu *v, trace_kvm_pvclock_update(v->vcpu_id, &vcpu->hv_clock); } -static int kvm_guest_time_update(struct kvm_vcpu *v) +int kvm_guest_time_update(struct kvm_vcpu *v) { unsigned long flags, tgt_tsc_khz; unsigned seq; @@ -3256,7 +3256,6 @@ static int kvm_guest_time_update(struct kvm_vcpu *v) &vcpu->hv_clock.tsc_shift, &vcpu->hv_clock.tsc_to_system_mul); vcpu->hw_tsc_khz = tgt_tsc_khz; - kvm_xen_update_tsc_info(v); } vcpu->hv_clock.tsc_timestamp = tsc_timestamp; diff --git a/arch/x86/kvm/x86.h b/arch/x86/kvm/x86.h index 7a87c5fc57f1..5fdf32ba9406 100644 --- a/arch/x86/kvm/x86.h +++ b/arch/x86/kvm/x86.h @@ -362,6 +362,7 @@ void kvm_inject_realmode_interrupt(struct kvm_vcpu *vcpu, int irq, int inc_eip); u64 get_kvmclock_ns(struct kvm *kvm); uint64_t kvm_get_wall_clock_epoch(struct kvm *kvm); bool kvm_get_monotonic_and_clockread(s64 *kernel_ns, u64 *tsc_timestamp); +int kvm_guest_time_update(struct kvm_vcpu *v); int kvm_read_guest_virt(struct kvm_vcpu *vcpu, gva_t addr, void *val, unsigned int bytes, diff --git a/arch/x86/kvm/xen.c b/arch/x86/kvm/xen.c index a909b817b9c0..ed5c2f088361 100644 --- a/arch/x86/kvm/xen.c +++ b/arch/x86/kvm/xen.c @@ -2247,29 +2247,6 @@ void kvm_xen_destroy_vcpu(struct kvm_vcpu *vcpu) del_timer_sync(&vcpu->arch.xen.poll_timer); } -void kvm_xen_update_tsc_info(struct kvm_vcpu *vcpu) -{ - struct kvm_cpuid_entry2 *entry; - u32 function; - - if (!vcpu->arch.xen.cpuid.base) - return; - - function = vcpu->arch.xen.cpuid.base | XEN_CPUID_LEAF(3); - if (function > vcpu->arch.xen.cpuid.limit) - return; - - entry = kvm_find_cpuid_entry_index(vcpu, function, 1); - if (entry) { - entry->ecx = vcpu->arch.hv_clock.tsc_to_system_mul; - entry->edx = vcpu->arch.hv_clock.tsc_shift; - } - - entry = kvm_find_cpuid_entry_index(vcpu, function, 2); - if (entry) - entry->eax = vcpu->arch.hw_tsc_khz; -} - void kvm_xen_init_vm(struct kvm *kvm) { mutex_init(&kvm->arch.xen.xen_lock); diff --git a/arch/x86/kvm/xen.h b/arch/x86/kvm/xen.h index f5841d9000ae..5ee7f3f1b45f 100644 --- a/arch/x86/kvm/xen.h +++ b/arch/x86/kvm/xen.h @@ -9,6 +9,7 @@ #ifndef __ARCH_X86_KVM_XEN_H__ #define __ARCH_X86_KVM_XEN_H__ +#include <asm/xen/cpuid.h> #include <asm/xen/hypervisor.h> #ifdef CONFIG_KVM_XEN @@ -35,7 +36,6 @@ int kvm_xen_set_evtchn_fast(struct kvm_xen_evtchn *xe, int kvm_xen_setup_evtchn(struct kvm *kvm, struct kvm_kernel_irq_routing_entry *e, const struct kvm_irq_routing_entry *ue); -void kvm_xen_update_tsc_info(struct kvm_vcpu *vcpu); static inline void kvm_xen_sw_enable_lapic(struct kvm_vcpu *vcpu) { @@ -50,6 +50,14 @@ static inline void kvm_xen_sw_enable_lapic(struct kvm_vcpu *vcpu) kvm_xen_inject_vcpu_vector(vcpu); } +static inline bool kvm_xen_is_tsc_leaf(struct kvm_vcpu *vcpu, u32 function) +{ + return static_branch_unlikely(&kvm_xen_enabled.key) && + vcpu->arch.xen.cpuid.base && + function < vcpu->arch.xen.cpuid.limit && + function == (vcpu->arch.xen.cpuid.base | XEN_CPUID_LEAF(3)); +} + static inline bool kvm_xen_msr_enabled(struct kvm *kvm) { return static_branch_unlikely(&kvm_xen_enabled.key) && @@ -157,8 +165,9 @@ static inline bool kvm_xen_timer_enabled(struct kvm_vcpu *vcpu) return false; } -static inline void kvm_xen_update_tsc_info(struct kvm_vcpu *vcpu) +static inline bool kvm_xen_is_tsc_leaf(struct kvm_vcpu *vcpu, u32 function) { + return false; } #endif base-commit: 84be94b5b6490e29a6f386cec90f8d5c6d14f0df --