On Sun, 20 Oct 2019, Andy Lutomirski wrote: > On Sat, Oct 19, 2019 at 3:01 AM Thomas Gleixner <tglx@xxxxxxxxxxxxx> wrote: > > __arch_use_vsyscall() is a pointless exercise TBH. The VDSO data should be > > updated unconditionally so all the trivial interfaces like time() and > > getres() just work independently of the functions which depend on the > > underlying clocksource. > > > > This functions have a fallback operation already: > > > > Let __arch_get_hw_counter() return U64_MAX and the syscall fallback is > > invoked. > > > > My thought was that __arch_get_hw_counter() could return last-1 to > indicate failure, which would allow the two checks to be folded into > one check. Or we could continue to use U64_MAX and rely on the fact > that (s64)U64_MAX < 0, not worry about the cycle counter overflowing, > and letting cycles < last catch it. This is not an overflow catch. It's solely to deal with the fact that on X86 you can observe (cycles < last) on multi socket systems under rare circumstances. Any other architecture does not have that issue AFAIK. The wraparound of clocksources with a smaller width than 64bit is handled by: delta = (cycles - last) & mask; which operates on unsigned values for obvious reasons. > (And we should change it to return s64 at some point regardless -- all > the math is signed, so the underlying types should be too IMO.) See above. delta is guaranteed to be >= 0 and the mult/shift is not signed either. All the base values which are in the VDSO are unsigned as well. The weird typecast there: if ((s64)cycles < 0) could as well be if (cycles == U64_MAX) but the typecasted version creates better code. I tried to fold the two operations (see patch below) and on all machines I tested on (various generations of Intel and AMD) the result is slower than what we have now by a couple of cycles, which is a lot for these functions (i.e. between 3-5%). I'm sure I tried that before and ended up with the existing code as the fastest variant. Why? That's subject to speculation :) Thanks, tglx 8<---------- arch/x86/include/asm/vdso/gettimeofday.h | 39 ++++++------------------------- lib/vdso/gettimeofday.c | 23 +++--------------- 2 files changed, 13 insertions(+), 49 deletions(-) --- a/arch/x86/include/asm/vdso/gettimeofday.h +++ b/arch/x86/include/asm/vdso/gettimeofday.h @@ -235,10 +235,14 @@ static u64 vread_hvclock(void) } #endif -static inline u64 __arch_get_hw_counter(s32 clock_mode) +static inline u64 __arch_get_hw_counter(s32 clock_mode, u64 last, u64 mask) { + /* + * Mask operation is not required as all VDSO clocksources are + * 64bit wide. + */ if (clock_mode == VCLOCK_TSC) - return (u64)rdtsc_ordered(); + return (u64)rdtsc_ordered() - last; /* * For any memory-mapped vclock type, we need to make sure that gcc * doesn't cleverly hoist a load before the mode check. Otherwise we @@ -248,13 +252,13 @@ static inline u64 __arch_get_hw_counter( #ifdef CONFIG_PARAVIRT_CLOCK if (clock_mode == VCLOCK_PVCLOCK) { barrier(); - return vread_pvclock(); + return vread_pvclock() - last; } #endif #ifdef CONFIG_HYPERV_TIMER if (clock_mode == VCLOCK_HVCLOCK) { barrier(); - return vread_hvclock(); + return vread_hvclock() - last; } #endif return U64_MAX; @@ -265,33 +269,6 @@ static __always_inline const struct vdso return __vdso_data; } -/* - * x86 specific delta calculation. - * - * The regular implementation assumes that clocksource reads are globally - * monotonic. The TSC can be slightly off across sockets which can cause - * the regular delta calculation (@cycles - @last) to return a huge time - * jump. - * - * Therefore it needs to be verified that @cycles are greater than - * @last. If not then use @last, which is the base time of the current - * conversion period. - * - * This variant also removes the masking of the subtraction because the - * clocksource mask of all VDSO capable clocksources on x86 is U64_MAX - * which would result in a pointless operation. The compiler cannot - * optimize it away as the mask comes from the vdso data and is not compile - * time constant. - */ -static __always_inline -u64 vdso_calc_delta(u64 cycles, u64 last, u64 mask, u32 mult) -{ - if (cycles > last) - return (cycles - last) * mult; - return 0; -} -#define vdso_calc_delta vdso_calc_delta - #endif /* !__ASSEMBLY__ */ #endif /* __ASM_VDSO_GETTIMEOFDAY_H */ --- a/lib/vdso/gettimeofday.c +++ b/lib/vdso/gettimeofday.c @@ -26,34 +26,21 @@ #include <asm/vdso/gettimeofday.h> #endif /* ENABLE_COMPAT_VDSO */ -#ifndef vdso_calc_delta -/* - * Default implementation which works for all sane clocksources. That - * obviously excludes x86/TSC. - */ -static __always_inline -u64 vdso_calc_delta(u64 cycles, u64 last, u64 mask, u32 mult) -{ - return ((cycles - last) & mask) * mult; -} -#endif - static int do_hres(const struct vdso_data *vd, clockid_t clk, struct __kernel_timespec *ts) { const struct vdso_timestamp *vdso_ts = &vd->basetime[clk]; - u64 cycles, last, sec, ns; + u64 delta, sec, ns; u32 seq; do { seq = vdso_read_begin(vd); - cycles = __arch_get_hw_counter(vd->clock_mode); - ns = vdso_ts->nsec; - last = vd->cycle_last; - if (unlikely((s64)cycles < 0)) + delta = __arch_get_hw_counter(vd->clock_mode, vd->cycle_last, + vd->mask); + if (unlikely((s64)delta < 0)) return -1; - ns += vdso_calc_delta(cycles, last, vd->mask, vd->mult); + ns = vdso_ts->nsec + delta * vd->mult; ns >>= vd->shift; sec = vdso_ts->sec; } while (unlikely(vdso_read_retry(vd, seq)));