Re: [PATCH 1/3] x86/tsc: implement tsc=directsync for systems without IA32_TSC_ADJUST

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

 



Hi,

Kind reminder.

On 8/8/22 4:39 PM, Muhammad Usama Anjum wrote:
> From: Steven Noonan <steven@xxxxxxxxxxxxxx>
> 
> AMD processors don't implement any mechanism like Intel's
> IA32_TSC_ADJUST MSR to sync the TSC. Instead of just relying on the
> BIOS, TSC can be synced by calculating the difference and directly
> writing it to the TSC MSR.
> 
> Add directsync flag to turn on the TSC sync when IA32_TSC_MSR isn't
> available. Attempt 1000 times or for 30 seconds before giving up.
> 
> Signed-off-by: Steven Noonan <steven@xxxxxxxxxxxxxx>
> Signed-off-by: Muhammad Usama Anjum <usama.anjum@xxxxxxxxxxxxx>
> ---
>  .../admin-guide/kernel-parameters.txt         |  4 +-
>  arch/x86/include/asm/tsc.h                    |  1 +
>  arch/x86/kernel/tsc.c                         |  3 ++
>  arch/x86/kernel/tsc_sync.c                    | 46 +++++++++++++++----
>  4 files changed, 43 insertions(+), 11 deletions(-)
> 
> diff --git a/Documentation/admin-guide/kernel-parameters.txt b/Documentation/admin-guide/kernel-parameters.txt
> index db5de5f0b9d3..f0e6ea580e68 100644
> --- a/Documentation/admin-guide/kernel-parameters.txt
> +++ b/Documentation/admin-guide/kernel-parameters.txt
> @@ -6271,7 +6271,7 @@
>  			If not specified, "default" is used. In this case,
>  			the RNG's choice is left to each individual trust source.
>  
> -	tsc=		Disable clocksource stability checks for TSC.
> +	tsc=		Disable clocksource stability checks for TSC or sync the TSC.
>  			Format: <string>
>  			[x86] reliable: mark tsc clocksource as reliable, this
>  			disables clocksource verification at runtime, as well
> @@ -6289,6 +6289,8 @@
>  			in situations with strict latency requirements (where
>  			interruptions from clocksource watchdog are not
>  			acceptable).
> +			[x86] directsync: attempt to sync the tsc via direct
> +			writes if MSR_IA32_TSC_ADJUST isn't available
>  
>  	tsc_early_khz=  [X86] Skip early TSC calibration and use the given
>  			value instead. Useful when the early TSC frequency discovery
> diff --git a/arch/x86/include/asm/tsc.h b/arch/x86/include/asm/tsc.h
> index fbdc3d951494..dc70909119e8 100644
> --- a/arch/x86/include/asm/tsc.h
> +++ b/arch/x86/include/asm/tsc.h
> @@ -42,6 +42,7 @@ extern unsigned long native_calibrate_tsc(void);
>  extern unsigned long long native_sched_clock_from_tsc(u64 tsc);
>  
>  extern int tsc_clocksource_reliable;
> +extern int tsc_allow_direct_sync;
>  #ifdef CONFIG_X86_TSC
>  extern bool tsc_async_resets;
>  #else
> diff --git a/arch/x86/kernel/tsc.c b/arch/x86/kernel/tsc.c
> index cafacb2e58cc..6345af65a549 100644
> --- a/arch/x86/kernel/tsc.c
> +++ b/arch/x86/kernel/tsc.c
> @@ -47,6 +47,7 @@ static unsigned int __initdata tsc_early_khz;
>  static DEFINE_STATIC_KEY_FALSE(__use_tsc);
>  
>  int tsc_clocksource_reliable;
> +int tsc_allow_direct_sync;
>  
>  static u32 art_to_tsc_numerator;
>  static u32 art_to_tsc_denominator;
> @@ -303,6 +304,8 @@ static int __init tsc_setup(char *str)
>  		mark_tsc_unstable("boot parameter");
>  	if (!strcmp(str, "nowatchdog"))
>  		no_tsc_watchdog = 1;
> +	if (!strcmp(str, "directsync"))
> +		tsc_allow_direct_sync = 1;
>  	return 1;
>  }
>  
> diff --git a/arch/x86/kernel/tsc_sync.c b/arch/x86/kernel/tsc_sync.c
> index 9452dc9664b5..2a855991f982 100644
> --- a/arch/x86/kernel/tsc_sync.c
> +++ b/arch/x86/kernel/tsc_sync.c
> @@ -340,6 +340,8 @@ static cycles_t check_tsc_warp(unsigned int timeout)
>   */
>  static inline unsigned int loop_timeout(int cpu)
>  {
> +	if (!boot_cpu_has(X86_FEATURE_TSC_ADJUST))
> +		return 30;
>  	return (cpumask_weight(topology_core_cpumask(cpu)) > 1) ? 2 : 20;
>  }
>  
> @@ -360,13 +362,16 @@ void check_tsc_sync_source(int cpu)
>  
>  	/*
>  	 * Set the maximum number of test runs to
> -	 *  1 if the CPU does not provide the TSC_ADJUST MSR
> -	 *  3 if the MSR is available, so the target can try to adjust
> +	 *  3 if TSC_ADJUST MSR is available, so the target can try to adjust
> +	 *  1000 if TSC MSR can be written to compensate
> +	 *  1 if MSRs cannot be written
>  	 */
> -	if (!boot_cpu_has(X86_FEATURE_TSC_ADJUST))
> -		atomic_set(&test_runs, 1);
> -	else
> +	if (boot_cpu_has(X86_FEATURE_TSC_ADJUST))
>  		atomic_set(&test_runs, 3);
> +	else if (tsc_allow_direct_sync)
> +		atomic_set(&test_runs, 1000);
> +	else
> +		atomic_set(&test_runs, 1);
>  retry:
>  	/*
>  	 * Wait for the target to start or to skip the test:
> @@ -434,6 +439,21 @@ void check_tsc_sync_source(int cpu)
>  		goto retry;
>  }
>  
> +static inline cycles_t write_tsc_adjustment(cycles_t adjustment)
> +{
> +	cycles_t adjval, nextval;
> +
> +	rdmsrl(MSR_IA32_TSC, adjval);
> +	adjval += adjustment;
> +	wrmsrl(MSR_IA32_TSC, adjval);
> +	rdmsrl(MSR_IA32_TSC, nextval);
> +
> +	/*
> +	 * Estimated clock cycle overhead for wrmsr + rdmsr
> +	 */
> +	return nextval - adjval;
> +}
> +
>  /*
>   * Freshly booted CPUs call into this:
>   */
> @@ -441,7 +461,7 @@ void check_tsc_sync_target(void)
>  {
>  	struct tsc_adjust *cur = this_cpu_ptr(&tsc_adjust);
>  	unsigned int cpu = smp_processor_id();
> -	cycles_t cur_max_warp, gbl_max_warp;
> +	cycles_t cur_max_warp, gbl_max_warp, est_overhead = 0;
>  	int cpus = 2;
>  
>  	/* Also aborts if there is no TSC. */
> @@ -521,12 +541,18 @@ void check_tsc_sync_target(void)
>  	 * value is used. In the worst case the adjustment needs to go
>  	 * through a 3rd run for fine tuning.
>  	 */
> -	cur->adjusted += cur_max_warp;
> +	if (boot_cpu_has(X86_FEATURE_TSC_ADJUST)) {
> +		cur->adjusted += cur_max_warp;
>  
> -	pr_warn("TSC ADJUST compensate: CPU%u observed %lld warp. Adjust: %lld\n",
> -		cpu, cur_max_warp, cur->adjusted);
> +		pr_warn("TSC ADJUST compensate: CPU%u observed %lld warp. Adjust: %lld\n",
> +			cpu, cur_max_warp, cur->adjusted);
>  
> -	wrmsrl(MSR_IA32_TSC_ADJUST, cur->adjusted);
> +		wrmsrl(MSR_IA32_TSC_ADJUST, cur->adjusted);
> +	} else {
> +		pr_debug("TSC direct sync: CPU%u observed %lld warp. Overhead: %lld\n",
> +			cpu, cur_max_warp, est_overhead);
> +		est_overhead = write_tsc_adjustment(cur_max_warp + est_overhead);
> +	}
>  	goto retry;
>  
>  }

-- 
Muhammad Usama Anjum



[Index of Archives]     [Kernel Newbies]     [Security]     [Netfilter]     [Bugtraq]     [Linux FS]     [Yosemite Forum]     [MIPS Linux]     [ARM Linux]     [Linux Security]     [Linux RAID]     [Samba]     [Video 4 Linux]     [Device Mapper]     [Linux Resources]

  Powered by Linux