Re: [PATCH v2 06/38] x86/tdx: Override PV calibration routines with CPUID-based calibration

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

 



On Wed, Feb 26, 2025 at 06:18:22PM -0800, Sean Christopherson wrote:
> When running as a TDX guest, explicitly override the TSC frequency
> calibration routine with CPUID-based calibration instead of potentially
> relying on a hypervisor-controlled PV routine.  For TDX guests, CPUID.0x15
> is always emulated by the TDX-Module, i.e. the information from CPUID is
> more trustworthy than the information provided by the hypervisor.
> 
> To maintain backwards compatibility with TDX guest kernels that use native
> calibration, and because it's the least awful option, retain
> native_calibrate_tsc()'s stuffing of the local APIC bus period using the
> core crystal frequency.  While it's entirely possible for the hypervisor
> to emulate the APIC timer at a different frequency than the core crystal
> frequency, the commonly accepted interpretation of Intel's SDM is that APIC
> timer runs at the core crystal frequency when that latter is enumerated via
> CPUID:
> 
>   The APIC timer frequency will be the processor’s bus clock or core
>   crystal clock frequency (when TSC/core crystal clock ratio is enumerated
>   in CPUID leaf 0x15).
> 
> If the hypervisor is malicious and deliberately runs the APIC timer at the
> wrong frequency, nothing would stop the hypervisor from modifying the
> frequency at any time, i.e. attempting to manually calibrate the frequency
> out of paranoia would be futile.
> 
> Deliberately leave the CPU frequency calibration routine as is, since the
> TDX-Module doesn't provide any guarantees with respect to CPUID.0x16.
> 
> Opportunistically add a comment explaining that CoCo TSC initialization
> needs to come after hypervisor specific initialization.
> 
> Cc: Kirill A. Shutemov <kirill.shutemov@xxxxxxxxxxxxxxx>
> Signed-off-by: Sean Christopherson <seanjc@xxxxxxxxxx>
> ---
>  arch/x86/coco/tdx/tdx.c    | 30 +++++++++++++++++++++++++++---
>  arch/x86/include/asm/tdx.h |  2 ++
>  arch/x86/kernel/tsc.c      |  8 ++++++++
>  3 files changed, 37 insertions(+), 3 deletions(-)
> 
> diff --git a/arch/x86/coco/tdx/tdx.c b/arch/x86/coco/tdx/tdx.c
> index 32809a06dab4..42cdaa98dc5e 100644
> --- a/arch/x86/coco/tdx/tdx.c
> +++ b/arch/x86/coco/tdx/tdx.c
> @@ -8,6 +8,7 @@
>  #include <linux/export.h>
>  #include <linux/io.h>
>  #include <linux/kexec.h>
> +#include <asm/apic.h>
>  #include <asm/coco.h>
>  #include <asm/tdx.h>
>  #include <asm/vmx.h>
> @@ -1063,9 +1064,6 @@ void __init tdx_early_init(void)
>  
>  	setup_force_cpu_cap(X86_FEATURE_TDX_GUEST);
>  
> -	/* TSC is the only reliable clock in TDX guest */
> -	setup_force_cpu_cap(X86_FEATURE_TSC_RELIABLE);
> -
>  	cc_vendor = CC_VENDOR_INTEL;
>  
>  	/* Configure the TD */
> @@ -1122,3 +1120,29 @@ void __init tdx_early_init(void)
>  
>  	tdx_announce();
>  }
> +
> +static unsigned long tdx_get_tsc_khz(void)
> +{
> +	struct cpuid_tsc_info info;
> +
> +	if (WARN_ON_ONCE(cpuid_get_tsc_freq(&info)))
> +		return 0;
> +
> +	lapic_timer_period = info.crystal_khz * 1000 / HZ;
> +
> +	return info.tsc_khz;
> +}
> +
> +void __init tdx_tsc_init(void)
> +{
> +	/* TSC is the only reliable clock in TDX guest */
> +	setup_force_cpu_cap(X86_FEATURE_TSC_RELIABLE);
> +	setup_force_cpu_cap(X86_FEATURE_TSC_KNOWN_FREQ);
> +
> +	/*
> +	 * Override the PV calibration routines (if set) with more trustworthy
> +	 * CPUID-based calibration.  The TDX module emulates CPUID, whereas any
> +	 * PV information is provided by the hypervisor.
> +	 */
> +	tsc_register_calibration_routines(tdx_get_tsc_khz, NULL);
> +}
> diff --git a/arch/x86/include/asm/tdx.h b/arch/x86/include/asm/tdx.h
> index b4b16dafd55e..621fbdd101e2 100644
> --- a/arch/x86/include/asm/tdx.h
> +++ b/arch/x86/include/asm/tdx.h
> @@ -53,6 +53,7 @@ struct ve_info {
>  #ifdef CONFIG_INTEL_TDX_GUEST
>  
>  void __init tdx_early_init(void);
> +void __init tdx_tsc_init(void);
>  
>  void tdx_get_ve_info(struct ve_info *ve);
>  
> @@ -72,6 +73,7 @@ void __init tdx_dump_td_ctls(u64 td_ctls);
>  #else
>  
>  static inline void tdx_early_init(void) { };
> +static inline void tdx_tsc_init(void) { }
>  static inline void tdx_safe_halt(void) { };
>  
>  static inline bool tdx_early_handle_ve(struct pt_regs *regs) { return false; }
> diff --git a/arch/x86/kernel/tsc.c b/arch/x86/kernel/tsc.c
> index 6a011cd1ff94..472d6c71d3a5 100644
> --- a/arch/x86/kernel/tsc.c
> +++ b/arch/x86/kernel/tsc.c
> @@ -32,6 +32,7 @@
>  #include <asm/topology.h>
>  #include <asm/uv/uv.h>
>  #include <asm/sev.h>
> +#include <asm/tdx.h>
>  
>  unsigned int __read_mostly cpu_khz;	/* TSC clocks / usec, not used here */
>  EXPORT_SYMBOL(cpu_khz);
> @@ -1563,8 +1564,15 @@ void __init tsc_early_init(void)
>  	if (is_early_uv_system())
>  		return;
>  
> +	/*
> +	 * Do CoCo specific "secure" TSC initialization *after* hypervisor
> +	 * platform initialization so that the secure variant can override the
> +	 * hypervisor's PV calibration routine with a more trusted method.
> +	 */
>  	if (cc_platform_has(CC_ATTR_GUEST_SNP_SECURE_TSC))
>  		snp_secure_tsc_init();
> +	else if (boot_cpu_has(X86_FEATURE_TDX_GUEST))
> +		tdx_tsc_init();

Maybe a x86_platform.guest callback for this?


-- 
  Kiryl Shutsemau / Kirill A. Shutemov




[Index of Archives]     [KVM Development]     [Libvirt Development]     [Libvirt Users]     [CentOS Virtualization]     [Netdev]     [Ethernet Bridging]     [Linux Wireless]     [Kernel Newbies]     [Security]     [Linux for Hams]     [Netfilter]     [Bugtraq]     [Yosemite Forum]     [MIPS Linux]     [ARM Linux]     [Linux RAID]     [Linux Admin]     [Samba]

  Powered by Linux