Re: [PATCH] clocksource/drivers/arm_arch_timer: Update sched_clock when non-boot CPUs need counter workaround

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

 



Hi Marc,

On Fri, Jan 13, 2023 at 11:16:48AM +0000, Marc Zyngier wrote:
> When booting on a CPU that has a countertum on the counter read,
> we use the arch_counter_get_cnt{v,p}ct_stable() backend which
> applies the workaround.
> 
> However, we don't do the same thing when an affected CPU is
> a secondary CPU, and we're stuck with the standard sched_clock()
> backend that knows nothing about the workaround.
> 
> Fix it by always indirecting sched_clock(), making arch_timer_read_counter
> a function instead of a function pointer. In turn, we update the
> pointer (now private to the driver code) when detecting a new
> workaround.

Unfortunately, I don't think this is sufficient.

I'm pretty sure secondary CPUs might call sched_clock() before getting to
arch_counter_register(), so there'll be a window where this could go wrong.

If we consider late onlining on a preemptible kernel we'll also have a race at
runtime:

| sched_clock() {
| 	arch_timer_read_counter() {
| 		// reads __arch_timer_read_counter == arch_counter_get_cntvct;
| 		
| 		arch_counter_get_cntvct() {
| 			
| 			< PREEMPTED >
| 			< CPU requiring workaround onlined >
| 			< RESCHEDULED on affected CPU >
| 
| 			MRS xN, CNTVCT_EL0	// reads junk here
| 		}
| 	}
| }

I think we need to reconsider the approach.

Since the accessor is out-of-line anyway, we could use a static key *within*
the accessor to handle that, e.g.

| u64 arch_timer_get_cntvct(void)
| {
| 	u64 val = read_sysreg(cntvct_el0);
| 	if (!static_branch_unlikely(use_timer_workaround))
| 		return val;
| 
| 	// do stablisation workaround here
| 	
| 	return val;
| }

... and we'd need to transiently enable the workaround when beinging a CPU
online in case it needs the workaround. We could use the static key inc / dec
helpers from the CPU invoking the hotplug to manage that.

With that, we should never perform the first read on an affected core without
also deciding to perform the workaround.

Does that make sound plausible?

Thanks,
Mark.

> Cc: Mark Rutland <mark.rutland@xxxxxxx>
> Cc: Will Deacon <will@xxxxxxxxxx>
> Cc: Daniel Lezcano <daniel.lezcano@xxxxxxxxxx>
> Cc: Thomas Gleixner <tglx@xxxxxxxxxxxxx>
> Cc: stable@xxxxxxxxxxxxxxx
> Reported-by: Yogesh Lal <quic_ylal@xxxxxxxxxxx>
> Signed-off-by: Marc Zyngier <maz@xxxxxxxxxx>
> Fixes: 0ea415390cd3 ("clocksource/arm_arch_timer: Use arch_timer_read_counter to access stable counters")
> Link: https://lore.kernel.org/r/ca4679a0-7f29-65f4-54b9-c575248192f1@xxxxxxxxxxx
> ---
>  drivers/clocksource/arm_arch_timer.c | 56 +++++++++++++++++-----------
>  include/clocksource/arm_arch_timer.h |  2 +-
>  2 files changed, 36 insertions(+), 22 deletions(-)
> 
> diff --git a/drivers/clocksource/arm_arch_timer.c b/drivers/clocksource/arm_arch_timer.c
> index e09d4427f604..5272db86bef5 100644
> --- a/drivers/clocksource/arm_arch_timer.c
> +++ b/drivers/clocksource/arm_arch_timer.c
> @@ -217,7 +217,12 @@ static notrace u64 arch_counter_get_cntvct(void)
>   * to exist on arm64. arm doesn't use this before DT is probed so even
>   * if we don't have the cp15 accessors we won't have a problem.
>   */
> -u64 (*arch_timer_read_counter)(void) __ro_after_init = arch_counter_get_cntvct;
> +static u64 (*__arch_timer_read_counter)(void) __ro_after_init = arch_counter_get_cntvct;
> +
> +u64 arch_timer_read_counter(void)
> +{
> +	return __arch_timer_read_counter();

Since the function pointer can be modified concurrently, we'll need to use 
	return READ_ONCE(__arch_timer_read_counter)();

SInce this could change dynamically, we
> +}
>  EXPORT_SYMBOL_GPL(arch_timer_read_counter);
>  
>  static u64 arch_counter_read(struct clocksource *cs)
> @@ -230,6 +235,28 @@ static u64 arch_counter_read_cc(const struct cyclecounter *cc)
>  	return arch_timer_read_counter();
>  }
>  
> +static bool arch_timer_counter_has_wa(void);
> +
> +static u64 (*arch_counter_get_read_fn(void))(void)
> +{
> +	u64 (*rd)(void);
> +
> +	if ((IS_ENABLED(CONFIG_ARM64) && !is_hyp_mode_available()) ||
> +	    arch_timer_uses_ppi == ARCH_TIMER_VIRT_PPI) {
> +		if (arch_timer_counter_has_wa())
> +			rd = arch_counter_get_cntvct_stable;
> +		else
> +			rd = arch_counter_get_cntvct;
> +	} else {
> +		if (arch_timer_counter_has_wa())
> +			rd = arch_counter_get_cntpct_stable;
> +		else
> +			rd = arch_counter_get_cntpct;
> +	}
> +
> +	return rd;
> +}
> +
>  static struct clocksource clocksource_counter = {
>  	.name	= "arch_sys_counter",
>  	.id	= CSID_ARM_ARCH_COUNTER,
> @@ -571,8 +598,10 @@ void arch_timer_enable_workaround(const struct arch_timer_erratum_workaround *wa
>  			per_cpu(timer_unstable_counter_workaround, i) = wa;
>  	}
>  
> -	if (wa->read_cntvct_el0 || wa->read_cntpct_el0)
> -		atomic_set(&timer_unstable_counter_workaround_in_use, 1);
> +	if (wa->read_cntvct_el0 || wa->read_cntpct_el0) {
> +		__arch_timer_read_counter = arch_counter_get_read_fn();
> +		atomic_set_release(&timer_unstable_counter_workaround_in_use, 1);
> +	}
>  
>  	/*
>  	 * Don't use the vdso fastpath if errata require using the
> @@ -641,7 +670,7 @@ static bool arch_timer_counter_has_wa(void)
>  #else
>  #define arch_timer_check_ool_workaround(t,a)		do { } while(0)
>  #define arch_timer_this_cpu_has_cntvct_wa()		({false;})
> -#define arch_timer_counter_has_wa()			({false;})
> +static bool arch_timer_counter_has_wa(void)		{ return false; }
>  #endif /* CONFIG_ARM_ARCH_TIMER_OOL_WORKAROUND */
>  
>  static __always_inline irqreturn_t timer_handler(const int access,
> @@ -1079,25 +1108,10 @@ static void __init arch_counter_register(unsigned type)
>  
>  	/* Register the CP15 based counter if we have one */
>  	if (type & ARCH_TIMER_TYPE_CP15) {
> -		u64 (*rd)(void);
> -
> -		if ((IS_ENABLED(CONFIG_ARM64) && !is_hyp_mode_available()) ||
> -		    arch_timer_uses_ppi == ARCH_TIMER_VIRT_PPI) {
> -			if (arch_timer_counter_has_wa())
> -				rd = arch_counter_get_cntvct_stable;
> -			else
> -				rd = arch_counter_get_cntvct;
> -		} else {
> -			if (arch_timer_counter_has_wa())
> -				rd = arch_counter_get_cntpct_stable;
> -			else
> -				rd = arch_counter_get_cntpct;
> -		}
> -
> -		arch_timer_read_counter = rd;
> +		__arch_timer_read_counter = arch_counter_get_read_fn();
>  		clocksource_counter.vdso_clock_mode = vdso_default;
>  	} else {
> -		arch_timer_read_counter = arch_counter_get_cntvct_mem;
> +		__arch_timer_read_counter = arch_counter_get_cntvct_mem;
>  	}
>  
>  	width = arch_counter_get_width();
> diff --git a/include/clocksource/arm_arch_timer.h b/include/clocksource/arm_arch_timer.h
> index 057c8964aefb..ec331b65ba23 100644
> --- a/include/clocksource/arm_arch_timer.h
> +++ b/include/clocksource/arm_arch_timer.h
> @@ -85,7 +85,7 @@ struct arch_timer_mem {
>  #ifdef CONFIG_ARM_ARCH_TIMER
>  
>  extern u32 arch_timer_get_rate(void);
> -extern u64 (*arch_timer_read_counter)(void);
> +extern u64 arch_timer_read_counter(void);
>  extern struct arch_timer_kvm_info *arch_timer_get_kvm_info(void);
>  extern bool arch_timer_evtstrm_available(void);
>  
> -- 
> 2.34.1
> 



[Index of Archives]     [Linux Kernel]     [Kernel Development Newbies]     [Linux USB Devel]     [Video for Linux]     [Linux Audio Users]     [Yosemite Hiking]     [Linux Kernel]     [Linux SCSI]

  Powered by Linux