Improve performance of time system calls when using Linux pvclock, by reading time info from fixmap visible copy of pvclock data. Originally from Jeremy Fitzhardinge. Signed-off-by: Marcelo Tosatti <mtosatti@xxxxxxxxxx> Index: vsyscall/arch/x86/vdso/vclock_gettime.c =================================================================== --- vsyscall.orig/arch/x86/vdso/vclock_gettime.c +++ vsyscall/arch/x86/vdso/vclock_gettime.c @@ -22,6 +22,7 @@ #include <asm/hpet.h> #include <asm/unistd.h> #include <asm/io.h> +#include <asm/pvclock.h> #define gtod (&VVAR(vsyscall_gtod_data)) @@ -62,6 +63,69 @@ static notrace cycle_t vread_hpet(void) return readl((const void __iomem *)fix_to_virt(VSYSCALL_HPET) + 0xf0); } +#ifdef CONFIG_PARAVIRT_CLOCK_VSYSCALL + +static notrace const struct pvclock_vsyscall_time_info *get_pvti(int cpu) +{ + const aligned_pvti_t *pvti_base; + int idx = cpu / (PAGE_SIZE/PVTI_SIZE); + int offset = cpu % (PAGE_SIZE/PVTI_SIZE); + + BUG_ON(PVCLOCK_FIXMAP_BEGIN + idx > PVCLOCK_FIXMAP_END); + + pvti_base = (aligned_pvti_t *)__fix_to_virt(PVCLOCK_FIXMAP_BEGIN+idx); + + return &pvti_base[offset].info; +} + +static notrace cycle_t vread_pvclock(int *mode) +{ + const struct pvclock_vsyscall_time_info *pvti; + cycle_t ret; + u64 last; + u32 version; + u32 migrate_count; + u8 flags; + unsigned cpu, cpu1; + + + /* + * When looping to get a consistent (time-info, tsc) pair, we + * also need to deal with the possibility we can switch vcpus, + * so make sure we always re-fetch time-info for the current vcpu. + */ + do { + cpu = __getcpu() & 0xfff; + pvti = get_pvti(cpu); + + migrate_count = pvti->migrate_count; + + version = __pvclock_read_cycles(&pvti->pvti, &ret, &flags); + + /* + * Test we're still on the cpu as well as the version. + * We could have been migrated just after the first + * vgetcpu but before fetching the version, so we + * wouldn't notice a version change. + */ + cpu1 = __getcpu() & 0xfff; + } while (unlikely(cpu != cpu1 || + (pvti->pvti.version & 1) || + pvti->pvti.version != version || + pvti->migrate_count != migrate_count)); + + if (unlikely(!(flags & PVCLOCK_TSC_STABLE_BIT))) + *mode = VCLOCK_NONE; + + last = VVAR(vsyscall_gtod_data).clock.cycle_last; + + if (likely(ret >= last)) + return ret; + + return last; +} +#endif + notrace static long vdso_fallback_gettime(long clock, struct timespec *ts) { long ret; @@ -88,6 +152,8 @@ notrace static inline u64 vgetsns(int *m cycles = vread_tsc(); else if (gtod->clock.vclock_mode == VCLOCK_HPET) cycles = vread_hpet(); + else if (gtod->clock.vclock_mode == VCLOCK_PVCLOCK) + cycles = vread_pvclock(mode); else return 0; v = (cycles - gtod->clock.cycle_last) & gtod->clock.mask; Index: vsyscall/arch/x86/include/asm/vsyscall.h =================================================================== --- vsyscall.orig/arch/x86/include/asm/vsyscall.h +++ vsyscall/arch/x86/include/asm/vsyscall.h @@ -33,6 +33,21 @@ extern void map_vsyscall(void); */ extern bool emulate_vsyscall(struct pt_regs *regs, unsigned long address); +static inline unsigned int __getcpu(void) +{ + unsigned int p; + + if (VVAR(vgetcpu_mode) == VGETCPU_RDTSCP) { + /* Load per CPU data from RDTSCP */ + native_read_tscp(&p); + } else { + /* Load per CPU data from GDT */ + asm("lsl %1,%0" : "=r" (p) : "r" (__PER_CPU_SEG)); + } + + return p; +} + #endif /* __KERNEL__ */ #endif /* _ASM_X86_VSYSCALL_H */ Index: vsyscall/arch/x86/vdso/vgetcpu.c =================================================================== --- vsyscall.orig/arch/x86/vdso/vgetcpu.c +++ vsyscall/arch/x86/vdso/vgetcpu.c @@ -17,13 +17,8 @@ __vdso_getcpu(unsigned *cpu, unsigned *n { unsigned int p; - if (VVAR(vgetcpu_mode) == VGETCPU_RDTSCP) { - /* Load per CPU data from RDTSCP */ - native_read_tscp(&p); - } else { - /* Load per CPU data from GDT */ - asm("lsl %1,%0" : "=r" (p) : "r" (__PER_CPU_SEG)); - } + p = __getcpu(); + if (cpu) *cpu = p & 0xfff; if (node) -- To unsubscribe from this list: send the line "unsubscribe kvm" in the body of a message to majordomo@xxxxxxxxxxxxxxx More majordomo info at http://vger.kernel.org/majordomo-info.html