Implement smp_vcond_load_relaxed() atop __delay_until_ul() on arm64, to reduce number of busy loops while waiting for a value condition. This implementation only support unsigned long words. It can be extended via the enclosed case structure in barrier.h as needed. Signed-off-by: Haris Okanovic <harisokn@xxxxxxxxxx> --- arch/arm64/include/asm/barrier.h | 18 ++++++++++++++++++ arch/arm64/lib/delay.c | 16 ++++++++++++++++ 2 files changed, 34 insertions(+) diff --git a/arch/arm64/include/asm/barrier.h b/arch/arm64/include/asm/barrier.h index 1ca947d5c939..188327e3ce72 100644 --- a/arch/arm64/include/asm/barrier.h +++ b/arch/arm64/include/asm/barrier.h @@ -203,6 +203,24 @@ do { \ (typeof(*ptr))VAL; \ }) +extern unsigned long __smp_vcond_load_relaxed_ul( + u64 nsecs, unsigned long* addr, unsigned long mask, unsigned long val); + +#define smp_vcond_load_relaxed(nsecs, addr, mask, val) ({ \ + u64 __nsecs = (nsecs); \ + typeof(addr) __addr = (addr); \ + typeof(*__addr) __mask = (mask); \ + typeof(*__addr) __val = (val); \ + typeof(*__addr) __cur; \ + switch (sizeof(*__addr)) { \ + case sizeof(unsigned long): \ + __cur = __smp_vcond_load_relaxed_ul( \ + __nsecs, __addr, __mask, __val); \ + break; \ + } \ + (__cur); \ +}) + #define smp_cond_load_acquire(ptr, cond_expr) \ ({ \ typeof(ptr) __PTR = (ptr); \ diff --git a/arch/arm64/lib/delay.c b/arch/arm64/lib/delay.c index a7c3040af316..a61a13b04439 100644 --- a/arch/arm64/lib/delay.c +++ b/arch/arm64/lib/delay.c @@ -12,6 +12,7 @@ #include <linux/kernel.h> #include <linux/module.h> #include <linux/timex.h> +#include <linux/sched/clock.h> #include <clocksource/arm_arch_timer.h> #include <asm/readex.h> @@ -97,3 +98,18 @@ void __ndelay(unsigned long nsecs) __delay(NSECS_TO_CYCLES(nsecs)); } EXPORT_SYMBOL(__ndelay); + +unsigned long __smp_vcond_load_relaxed_ul( + u64 nsecs, unsigned long* addr, unsigned long mask, unsigned long val) +{ + const u64 start = local_clock_noinstr(); + const u64 cycles = NSECS_TO_CYCLES(nsecs); + unsigned long cur; + + do { + cur = __delay_until_ul(cycles, addr, mask, val); + } while((cur & mask) != val && local_clock_noinstr() - start < nsecs); + + return cur; +} +EXPORT_SYMBOL(__smp_vcond_load_relaxed_ul); -- 2.34.1