From: Sean Christopherson <seanjc@xxxxxxxxxx> Sent: Wednesday, February 26, 2025 6:19 PM > > Add a "tsc_properties" set of flags and use it to annotate whether the > TSC operates at a known and/or reliable frequency when registering a > paravirtual TSC calibration routine. Currently, each PV flow manually > sets the associated feature flags, but often in haphazard fashion that > makes it difficult for unfamiliar readers to see the properties of the > TSC when running under a particular hypervisor. > > The other, bigger issue with manually setting the feature flags is that > it decouples the flags from the calibration routine. E.g. in theory, PV > code could mark the TSC as having a known frequency, but then have its > PV calibration discarded in favor of a method that doesn't use that known > frequency. Passing the TSC properties along with the calibration routine > will allow adding sanity checks to guard against replacing a "better" > calibration routine with a "worse" routine. > > As a bonus, the flags also give developers working on new PV code a heads > up that they should at least mark the TSC as having a known frequency. > > Signed-off-by: Sean Christopherson <seanjc@xxxxxxxxxx> > --- > arch/x86/coco/sev/core.c | 6 ++---- > arch/x86/coco/tdx/tdx.c | 7 ++----- > arch/x86/include/asm/tsc.h | 8 +++++++- > arch/x86/kernel/cpu/acrn.c | 4 ++-- > arch/x86/kernel/cpu/mshyperv.c | 10 +++++++--- > arch/x86/kernel/cpu/vmware.c | 7 ++++--- > arch/x86/kernel/jailhouse.c | 4 ++-- > arch/x86/kernel/kvmclock.c | 4 ++-- > arch/x86/kernel/tsc.c | 8 +++++++- > arch/x86/xen/time.c | 4 ++-- > 10 files changed, 37 insertions(+), 25 deletions(-) > > diff --git a/arch/x86/coco/sev/core.c b/arch/x86/coco/sev/core.c > index dab386f782ce..29dd50552715 100644 > --- a/arch/x86/coco/sev/core.c > +++ b/arch/x86/coco/sev/core.c > @@ -3284,12 +3284,10 @@ void __init snp_secure_tsc_init(void) > { > unsigned long long tsc_freq_mhz; > > - setup_force_cpu_cap(X86_FEATURE_TSC_KNOWN_FREQ); > - setup_force_cpu_cap(X86_FEATURE_TSC_RELIABLE); > - > rdmsrl(MSR_AMD64_GUEST_TSC_FREQ, tsc_freq_mhz); > snp_tsc_freq_khz = (unsigned long)(tsc_freq_mhz * 1000); > > tsc_register_calibration_routines(securetsc_get_tsc_khz, > - securetsc_get_tsc_khz); > + securetsc_get_tsc_khz, > + TSC_FREQ_KNOWN_AND_RELIABLE); > } > diff --git a/arch/x86/coco/tdx/tdx.c b/arch/x86/coco/tdx/tdx.c > index 42cdaa98dc5e..ca31560d0dd3 100644 > --- a/arch/x86/coco/tdx/tdx.c > +++ b/arch/x86/coco/tdx/tdx.c > @@ -1135,14 +1135,11 @@ static unsigned long tdx_get_tsc_khz(void) > > void __init tdx_tsc_init(void) > { > - /* TSC is the only reliable clock in TDX guest */ > - setup_force_cpu_cap(X86_FEATURE_TSC_RELIABLE); > - setup_force_cpu_cap(X86_FEATURE_TSC_KNOWN_FREQ); > - > /* > * Override the PV calibration routines (if set) with more trustworthy > * CPUID-based calibration. The TDX module emulates CPUID, whereas any > * PV information is provided by the hypervisor. > */ > - tsc_register_calibration_routines(tdx_get_tsc_khz, NULL); > + tsc_register_calibration_routines(tdx_get_tsc_khz, NULL, > + TSC_FREQ_KNOWN_AND_RELIABLE); > } > diff --git a/arch/x86/include/asm/tsc.h b/arch/x86/include/asm/tsc.h > index 9318c74e8d13..360f47610258 100644 > --- a/arch/x86/include/asm/tsc.h > +++ b/arch/x86/include/asm/tsc.h > @@ -41,8 +41,14 @@ extern int cpuid_get_cpu_freq(unsigned int *cpu_khz); > extern void tsc_early_init(void); > extern void tsc_init(void); > #if defined(CONFIG_HYPERVISOR_GUEST) || defined(CONFIG_AMD_MEM_ENCRYPT) > +enum tsc_properties { > + TSC_FREQUENCY_KNOWN = BIT(0), > + TSC_RELIABLE = BIT(1), > + TSC_FREQ_KNOWN_AND_RELIABLE = TSC_FREQUENCY_KNOWN | TSC_RELIABLE, > +}; > extern void tsc_register_calibration_routines(unsigned long (*calibrate_tsc)(void), > - unsigned long (*calibrate_cpu)(void)); > + unsigned long (*calibrate_cpu)(void), > + enum tsc_properties properties); > #endif > extern void mark_tsc_unstable(char *reason); > extern int unsynchronized_tsc(void); > diff --git a/arch/x86/kernel/cpu/acrn.c b/arch/x86/kernel/cpu/acrn.c > index 2da3de4d470e..4f2f4f7ec334 100644 > --- a/arch/x86/kernel/cpu/acrn.c > +++ b/arch/x86/kernel/cpu/acrn.c > @@ -29,9 +29,9 @@ static void __init acrn_init_platform(void) > /* Install system interrupt handler for ACRN hypervisor callback */ > sysvec_install(HYPERVISOR_CALLBACK_VECTOR, sysvec_acrn_hv_callback); > > - setup_force_cpu_cap(X86_FEATURE_TSC_KNOWN_FREQ); > tsc_register_calibration_routines(acrn_get_tsc_khz, > - acrn_get_tsc_khz); > + acrn_get_tsc_khz, > + TSC_FREQUENCY_KNOWN); > } > > static bool acrn_x2apic_available(void) > diff --git a/arch/x86/kernel/cpu/mshyperv.c b/arch/x86/kernel/cpu/mshyperv.c > index 174f6a71c899..445ac3adfebc 100644 > --- a/arch/x86/kernel/cpu/mshyperv.c > +++ b/arch/x86/kernel/cpu/mshyperv.c > @@ -421,8 +421,13 @@ static void __init ms_hyperv_init_platform(void) > > if (ms_hyperv.features & HV_ACCESS_FREQUENCY_MSRS && > ms_hyperv.misc_features & HV_FEATURE_FREQUENCY_MSRS_AVAILABLE) { > - tsc_register_calibration_routines(hv_get_tsc_khz, hv_get_tsc_khz); > - setup_force_cpu_cap(X86_FEATURE_TSC_KNOWN_FREQ); > + enum tsc_properties tsc_properties = TSC_FREQUENCY_KNOWN; > + > + if (ms_hyperv.features & HV_ACCESS_TSC_INVARIANT) > + tsc_properties = TSC_FREQ_KNOWN_AND_RELIABLE; > + > + tsc_register_calibration_routines(hv_get_tsc_khz, hv_get_tsc_khz, > + tsc_properties); > } For the Hyper-V guest code, Reviewed-by: Michael Kelley <mhklinux@xxxxxxxxxxx> Tested-by: Michael Kelley <mhklinux@xxxxxxxxxxx> > > if (ms_hyperv.priv_high & HV_ISOLATION) { > @@ -525,7 +530,6 @@ static void __init ms_hyperv_init_platform(void) > * is called. > */ > wrmsrl(HV_X64_MSR_TSC_INVARIANT_CONTROL, > HV_EXPOSE_INVARIANT_TSC); > - setup_force_cpu_cap(X86_FEATURE_TSC_RELIABLE); > } > > /* > diff --git a/arch/x86/kernel/cpu/vmware.c b/arch/x86/kernel/cpu/vmware.c > index 399cf3286a60..a3a71309214c 100644 > --- a/arch/x86/kernel/cpu/vmware.c > +++ b/arch/x86/kernel/cpu/vmware.c > @@ -385,10 +385,10 @@ static void __init vmware_paravirt_ops_setup(void) > */ > static void __init vmware_set_capabilities(void) > { > + /* TSC is non-stop and reliable even if the frequency isn't known. */ > setup_force_cpu_cap(X86_FEATURE_CONSTANT_TSC); > setup_force_cpu_cap(X86_FEATURE_TSC_RELIABLE); > - if (vmware_tsc_khz) > - setup_force_cpu_cap(X86_FEATURE_TSC_KNOWN_FREQ); > + > if (vmware_hypercall_mode == CPUID_VMWARE_FEATURES_ECX_VMCALL) > setup_force_cpu_cap(X86_FEATURE_VMCALL); > else if (vmware_hypercall_mode == CPUID_VMWARE_FEATURES_ECX_VMMCALL) > @@ -417,7 +417,8 @@ static void __init vmware_platform_setup(void) > > vmware_tsc_khz = tsc_khz; > tsc_register_calibration_routines(vmware_get_tsc_khz, > - vmware_get_tsc_khz); > + vmware_get_tsc_khz, > + TSC_FREQ_KNOWN_AND_RELIABLE); > > #ifdef CONFIG_X86_LOCAL_APIC > /* Skip lapic calibration since we know the bus frequency. */ > diff --git a/arch/x86/kernel/jailhouse.c b/arch/x86/kernel/jailhouse.c > index b0a053692161..d73a4d0fb118 100644 > --- a/arch/x86/kernel/jailhouse.c > +++ b/arch/x86/kernel/jailhouse.c > @@ -218,7 +218,8 @@ static void __init jailhouse_init_platform(void) > > machine_ops.emergency_restart = jailhouse_no_restart; > > - tsc_register_calibration_routines(jailhouse_get_tsc, jailhouse_get_tsc); > + tsc_register_calibration_routines(jailhouse_get_tsc, jailhouse_get_tsc, > + TSC_FREQUENCY_KNOWN); > > while (pa_data) { > mapping = early_memremap(pa_data, sizeof(header)); > @@ -256,7 +257,6 @@ static void __init jailhouse_init_platform(void) > pr_debug("Jailhouse: PM-Timer IO Port: %#x\n", pmtmr_ioport); > > precalibrated_tsc_khz = setup_data.v1.tsc_khz; > - setup_force_cpu_cap(X86_FEATURE_TSC_KNOWN_FREQ); > > pci_probe = 0; > > diff --git a/arch/x86/kernel/kvmclock.c b/arch/x86/kernel/kvmclock.c > index 1dbe12ecb26e..ce676e735ced 100644 > --- a/arch/x86/kernel/kvmclock.c > +++ b/arch/x86/kernel/kvmclock.c > @@ -199,7 +199,6 @@ void kvmclock_cpu_action(enum kvm_guest_cpu_action action) > */ > static unsigned long kvm_get_tsc_khz(void) > { > - setup_force_cpu_cap(X86_FEATURE_TSC_KNOWN_FREQ); > return pvclock_tsc_khz(this_cpu_pvti()); > } > > @@ -403,7 +402,8 @@ void __init kvmclock_init(void) > > kvm_sched_clock_init(stable); > > - tsc_register_calibration_routines(kvm_get_tsc_khz, kvm_get_tsc_khz); > + tsc_register_calibration_routines(kvm_get_tsc_khz, kvm_get_tsc_khz, > + TSC_FREQUENCY_KNOWN); > > x86_platform.get_wallclock = kvm_get_wallclock; > x86_platform.set_wallclock = kvm_set_wallclock; > diff --git a/arch/x86/kernel/tsc.c b/arch/x86/kernel/tsc.c > index 5501d76243c8..be58df4fef66 100644 > --- a/arch/x86/kernel/tsc.c > +++ b/arch/x86/kernel/tsc.c > @@ -1301,11 +1301,17 @@ static void __init check_system_tsc_reliable(void) > */ > #if defined(CONFIG_HYPERVISOR_GUEST) || defined(CONFIG_AMD_MEM_ENCRYPT) > void tsc_register_calibration_routines(unsigned long (*calibrate_tsc)(void), > - unsigned long (*calibrate_cpu)(void)) > + unsigned long (*calibrate_cpu)(void), > + enum tsc_properties properties) > { > if (WARN_ON_ONCE(!calibrate_tsc)) > return; > > + if (properties & TSC_FREQUENCY_KNOWN) > + setup_force_cpu_cap(X86_FEATURE_TSC_KNOWN_FREQ); > + if (properties & TSC_RELIABLE) > + setup_force_cpu_cap(X86_FEATURE_TSC_RELIABLE); > + > x86_platform.calibrate_tsc = calibrate_tsc; > if (calibrate_cpu) > x86_platform.calibrate_cpu = calibrate_cpu; > diff --git a/arch/x86/xen/time.c b/arch/x86/xen/time.c > index 13e5888c4501..4de06ea55397 100644 > --- a/arch/x86/xen/time.c > +++ b/arch/x86/xen/time.c > @@ -40,7 +40,6 @@ static unsigned long xen_tsc_khz(void) > struct pvclock_vcpu_time_info *info = > &HYPERVISOR_shared_info->vcpu_info[0].time; > > - setup_force_cpu_cap(X86_FEATURE_TSC_KNOWN_FREQ); > return pvclock_tsc_khz(info); > } > > @@ -571,7 +570,8 @@ static void __init xen_init_time_common(void) > */ > paravirt_set_sched_clock(xen_sched_clock, NULL, NULL); > > - tsc_register_calibration_routines(xen_tsc_khz, NULL); > + tsc_register_calibration_routines(xen_tsc_khz, NULL, > + TSC_FREQUENCY_KNOWN); > x86_platform.get_wallclock = xen_get_wallclock; > } > > -- > 2.48.1.711.g2feabab25a-goog >