On Wed, Feb 26, 2025 at 06:18:22PM -0800, Sean Christopherson wrote: > When running as a TDX guest, explicitly override the TSC frequency > calibration routine with CPUID-based calibration instead of potentially > relying on a hypervisor-controlled PV routine. For TDX guests, CPUID.0x15 > is always emulated by the TDX-Module, i.e. the information from CPUID is > more trustworthy than the information provided by the hypervisor. > > To maintain backwards compatibility with TDX guest kernels that use native > calibration, and because it's the least awful option, retain > native_calibrate_tsc()'s stuffing of the local APIC bus period using the > core crystal frequency. While it's entirely possible for the hypervisor > to emulate the APIC timer at a different frequency than the core crystal > frequency, the commonly accepted interpretation of Intel's SDM is that APIC > timer runs at the core crystal frequency when that latter is enumerated via > CPUID: > > The APIC timer frequency will be the processor’s bus clock or core > crystal clock frequency (when TSC/core crystal clock ratio is enumerated > in CPUID leaf 0x15). > > If the hypervisor is malicious and deliberately runs the APIC timer at the > wrong frequency, nothing would stop the hypervisor from modifying the > frequency at any time, i.e. attempting to manually calibrate the frequency > out of paranoia would be futile. > > Deliberately leave the CPU frequency calibration routine as is, since the > TDX-Module doesn't provide any guarantees with respect to CPUID.0x16. > > Opportunistically add a comment explaining that CoCo TSC initialization > needs to come after hypervisor specific initialization. > > Cc: Kirill A. Shutemov <kirill.shutemov@xxxxxxxxxxxxxxx> > Signed-off-by: Sean Christopherson <seanjc@xxxxxxxxxx> > --- > arch/x86/coco/tdx/tdx.c | 30 +++++++++++++++++++++++++++--- > arch/x86/include/asm/tdx.h | 2 ++ > arch/x86/kernel/tsc.c | 8 ++++++++ > 3 files changed, 37 insertions(+), 3 deletions(-) > > diff --git a/arch/x86/coco/tdx/tdx.c b/arch/x86/coco/tdx/tdx.c > index 32809a06dab4..42cdaa98dc5e 100644 > --- a/arch/x86/coco/tdx/tdx.c > +++ b/arch/x86/coco/tdx/tdx.c > @@ -8,6 +8,7 @@ > #include <linux/export.h> > #include <linux/io.h> > #include <linux/kexec.h> > +#include <asm/apic.h> > #include <asm/coco.h> > #include <asm/tdx.h> > #include <asm/vmx.h> > @@ -1063,9 +1064,6 @@ void __init tdx_early_init(void) > > setup_force_cpu_cap(X86_FEATURE_TDX_GUEST); > > - /* TSC is the only reliable clock in TDX guest */ > - setup_force_cpu_cap(X86_FEATURE_TSC_RELIABLE); > - > cc_vendor = CC_VENDOR_INTEL; > > /* Configure the TD */ > @@ -1122,3 +1120,29 @@ void __init tdx_early_init(void) > > tdx_announce(); > } > + > +static unsigned long tdx_get_tsc_khz(void) > +{ > + struct cpuid_tsc_info info; > + > + if (WARN_ON_ONCE(cpuid_get_tsc_freq(&info))) > + return 0; > + > + lapic_timer_period = info.crystal_khz * 1000 / HZ; > + > + return info.tsc_khz; > +} > + > +void __init tdx_tsc_init(void) > +{ > + /* TSC is the only reliable clock in TDX guest */ > + setup_force_cpu_cap(X86_FEATURE_TSC_RELIABLE); > + setup_force_cpu_cap(X86_FEATURE_TSC_KNOWN_FREQ); > + > + /* > + * Override the PV calibration routines (if set) with more trustworthy > + * CPUID-based calibration. The TDX module emulates CPUID, whereas any > + * PV information is provided by the hypervisor. > + */ > + tsc_register_calibration_routines(tdx_get_tsc_khz, NULL); > +} > diff --git a/arch/x86/include/asm/tdx.h b/arch/x86/include/asm/tdx.h > index b4b16dafd55e..621fbdd101e2 100644 > --- a/arch/x86/include/asm/tdx.h > +++ b/arch/x86/include/asm/tdx.h > @@ -53,6 +53,7 @@ struct ve_info { > #ifdef CONFIG_INTEL_TDX_GUEST > > void __init tdx_early_init(void); > +void __init tdx_tsc_init(void); > > void tdx_get_ve_info(struct ve_info *ve); > > @@ -72,6 +73,7 @@ void __init tdx_dump_td_ctls(u64 td_ctls); > #else > > static inline void tdx_early_init(void) { }; > +static inline void tdx_tsc_init(void) { } > static inline void tdx_safe_halt(void) { }; > > static inline bool tdx_early_handle_ve(struct pt_regs *regs) { return false; } > diff --git a/arch/x86/kernel/tsc.c b/arch/x86/kernel/tsc.c > index 6a011cd1ff94..472d6c71d3a5 100644 > --- a/arch/x86/kernel/tsc.c > +++ b/arch/x86/kernel/tsc.c > @@ -32,6 +32,7 @@ > #include <asm/topology.h> > #include <asm/uv/uv.h> > #include <asm/sev.h> > +#include <asm/tdx.h> > > unsigned int __read_mostly cpu_khz; /* TSC clocks / usec, not used here */ > EXPORT_SYMBOL(cpu_khz); > @@ -1563,8 +1564,15 @@ void __init tsc_early_init(void) > if (is_early_uv_system()) > return; > > + /* > + * Do CoCo specific "secure" TSC initialization *after* hypervisor > + * platform initialization so that the secure variant can override the > + * hypervisor's PV calibration routine with a more trusted method. > + */ > if (cc_platform_has(CC_ATTR_GUEST_SNP_SECURE_TSC)) > snp_secure_tsc_init(); > + else if (boot_cpu_has(X86_FEATURE_TDX_GUEST)) > + tdx_tsc_init(); Maybe a x86_platform.guest callback for this? -- Kiryl Shutsemau / Kirill A. Shutemov