The test checks the relative precision of the reference TSC page and the time reference counter. Signed-off-by: Paolo Bonzini <pbonzini@xxxxxxxxxx> --- Andrey, the test has a relative error of approximately 3 parts per million on my machine. In other words it drifts by 3 microseconds every second, which I don't think is acceptable. The problem is that virtual_tsc_khz is 2593993 while the actual frequency is more like 2594001 kHz. But I have a problem in general with using tsc_khz to compute the hyperv reference clock scale. The maximum possible accuracy on a 2.5 GHz machine is around 200 ppb, corresponding to a drift of about 16 milliseconds per day. Perhaps we could use Linux (and kvmclock's) timekeeping parameters to derive the tsc_scale and tsc_offset? config/config-x86-common.mak | 2 + config/config-x86_64.mak | 1 + x86/hyperv.h | 9 ++ x86/hyperv_clock.c | 193 +++++++++++++++++++++++++++++++++++++++++++ x86/unittests.cfg | 5 ++ 5 files changed, 210 insertions(+) create mode 100644 x86/hyperv_clock.c diff --git a/config/config-x86-common.mak b/config/config-x86-common.mak index 72b95e3..621413b 100644 --- a/config/config-x86-common.mak +++ b/config/config-x86-common.mak @@ -119,6 +119,8 @@ $(TEST_DIR)/hyperv_synic.elf: $(cstart.o) $(TEST_DIR)/hyperv.o \ $(TEST_DIR)/hyperv_stimer.elf: $(cstart.o) $(TEST_DIR)/hyperv.o \ $(TEST_DIR)/hyperv_stimer.o +$(TEST_DIR)/hyperv_clock.elf: $(cstart.o) $(TEST_DIR)/hyperv.o + arch_clean: $(RM) $(TEST_DIR)/*.o $(TEST_DIR)/*.flat $(TEST_DIR)/*.elf \ $(TEST_DIR)/.*.d lib/x86/.*.d diff --git a/config/config-x86_64.mak b/config/config-x86_64.mak index 1764701..634d09d 100644 --- a/config/config-x86_64.mak +++ b/config/config-x86_64.mak @@ -12,5 +12,6 @@ tests = $(TEST_DIR)/access.flat $(TEST_DIR)/apic.flat \ tests += $(TEST_DIR)/svm.flat tests += $(TEST_DIR)/vmx.flat tests += $(TEST_DIR)/tscdeadline_latency.flat +tests += $(TEST_DIR)/hyperv_clock.flat include config/config-x86-common.mak diff --git a/x86/hyperv.h b/x86/hyperv.h index faf931b..974df56 100644 --- a/x86/hyperv.h +++ b/x86/hyperv.h @@ -12,6 +12,7 @@ #define HV_X64_MSR_SYNTIMER_AVAILABLE (1 << 3) #define HV_X64_MSR_TIME_REF_COUNT 0x40000020 +#define HV_X64_MSR_REFERENCE_TSC 0x40000021 /* Define synthetic interrupt controller model specific registers. */ #define HV_X64_MSR_SCONTROL 0x40000080 @@ -180,4 +181,12 @@ void synic_sint_create(int vcpu, int sint, int vec, bool auto_eoi); void synic_sint_set(int vcpu, int sint); void synic_sint_destroy(int vcpu, int sint); +struct hv_reference_tsc_page { + uint32_t tsc_sequence; + uint32_t res1; + uint64_t tsc_scale; + int64_t tsc_offset; +}; + + #endif diff --git a/x86/hyperv_clock.c b/x86/hyperv_clock.c new file mode 100644 index 0000000..680ba7c --- /dev/null +++ b/x86/hyperv_clock.c @@ -0,0 +1,193 @@ +#include "libcflat.h" +#include "smp.h" +#include "atomic.h" +#include "processor.h" +#include "hyperv.h" +#include "vm.h" + +#define MAX_CPU 4 +#define TICKS_PER_SEC (1000000000 / 100) + +struct hv_reference_tsc_page *hv_clock; + +/* + * Scale a 64-bit delta by scaling and multiplying by a 32-bit fraction, + * yielding a 64-bit result. + */ +static inline u64 scale_delta(u64 delta, u64 mul_frac) +{ + u64 product, unused; + + __asm__ ( + "mul %3" + : "=d" (product), "=a" (unused) : "1" (delta), "rm" ((u64)mul_frac) ); + + return product; +} + +static u64 hvclock_tsc_to_ticks(struct hv_reference_tsc_page *shadow, uint64_t tsc) +{ + u64 delta = tsc; + return scale_delta(delta, shadow->tsc_scale) + shadow->tsc_offset; +} + +/* + * Reads a consistent set of time-base values from hypervisor, + * into a shadow data area. + */ +static void hvclock_get_time_values(struct hv_reference_tsc_page *shadow, + struct hv_reference_tsc_page *page) +{ + int seq; + do { + seq = page->tsc_sequence; + rmb(); /* fetch version before data */ + *shadow = *page; + rmb(); /* test version after fetching data */ + } while (shadow->tsc_sequence != seq); +} + +uint64_t hv_clock_read(void) +{ + struct hv_reference_tsc_page shadow; + + hvclock_get_time_values(&shadow, hv_clock); + return hvclock_tsc_to_ticks(&shadow, rdtsc()); +} + +atomic_t cpus_left; +bool ok[MAX_CPU]; +uint64_t loops[MAX_CPU]; + +static void hv_clock_test(void *data) +{ + int i = smp_id(); + uint64_t t = rdmsr(HV_X64_MSR_TIME_REF_COUNT); + uint64_t end = t + 3 * TICKS_PER_SEC; + + ok[i] = true; + do { + uint64_t now = hv_clock_read(); + if (now < t) { + printf("warp on CPU %d!\n", smp_id()); + ok[i] = false; + break; + } + t = now; + } while(t < end); + + barrier(); + if (t >= end) { + int64_t ref = rdmsr(HV_X64_MSR_TIME_REF_COUNT); + if (i == 0) + printf("Time reference MSR drift: %lld\n\n", ref - end); + ok[i] &= (ref - end) > -5 && (ref - end) < 5; + } + + atomic_dec(&cpus_left); +} + +static void check_test(int ncpus) +{ + int i; + bool pass; + + atomic_set(&cpus_left, ncpus); + for (i = ncpus - 1; i >= 0; i--) + on_cpu_async(i, hv_clock_test, NULL); + + /* Wait for the end of other vcpu */ + while(atomic_read(&cpus_left)) + ; + + pass = true; + for (i = ncpus - 1; i >= 0; i--) + pass &= ok[i]; + + report("TSC reference precision test", pass); +} + +static void hv_perf_test(void *data) +{ + uint64_t t = hv_clock_read(); + uint64_t end = t + 1000000000 / 100; + uint64_t local_loops = 0; + + do { + t = hv_clock_read(); + local_loops++; + } while(t < end); + + loops[smp_id()] = local_loops; + atomic_dec(&cpus_left); +} + +static void perf_test(int ncpus) +{ + int i; + uint64_t total_loops; + + atomic_set(&cpus_left, ncpus); + for (i = ncpus - 1; i >= 0; i--) + on_cpu_async(i, hv_perf_test, NULL); + + /* Wait for the end of other vcpu */ + while(atomic_read(&cpus_left)) + ; + + total_loops = 0; + for (i = ncpus - 1; i >= 0; i--) + total_loops += loops[i]; + printf("iterations/sec: %lld\n", total_loops / ncpus); +} + +int main(int ac, char **av) +{ + int nerr = 0; + int ncpus; + struct hv_reference_tsc_page shadow; + uint64_t tsc1, t1, tsc2, t2; + uint64_t ref1, ref2; + + setup_vm(); + smp_init(); + + hv_clock = alloc_page(); + wrmsr(HV_X64_MSR_REFERENCE_TSC, (u64)(uintptr_t)hv_clock | 1); + report("MSR value after enabling", + rdmsr(HV_X64_MSR_REFERENCE_TSC) == ((u64)(uintptr_t)hv_clock | 1)); + + hvclock_get_time_values(&shadow, hv_clock); + if (shadow.tsc_sequence == 0 || shadow.tsc_sequence == 0xFFFFFFFF) { + printf("Reference TSC page not available\n"); + exit(1); + } + + printf("scale: %llx offset: %lld\n", shadow.tsc_scale, shadow.tsc_offset); + ref1 = rdmsr(HV_X64_MSR_TIME_REF_COUNT); + tsc1 = rdtsc(); + t1 = hvclock_tsc_to_ticks(&shadow, tsc1); + printf("refcnt %lld, TSC %llx, TSC reference %lld\n", + ref1, tsc1, t1); + + do + ref2 = rdmsr(HV_X64_MSR_TIME_REF_COUNT); + while (ref2 < ref1 + 2 * TICKS_PER_SEC); + + tsc2 = rdtsc(); + t2 = hvclock_tsc_to_ticks(&shadow, tsc2); + printf("refcnt %lld (delta %lld), TSC %llx, TSC reference %lld (delta %lld)\n", + ref2, ref2 - ref1, tsc2, t2, t2 - t1); + + ncpus = cpu_count(); + if (ncpus > MAX_CPU) + ncpus = MAX_CPU; + + check_test(ncpus); + perf_test(ncpus); + + wrmsr(HV_X64_MSR_REFERENCE_TSC, 0LL); + report("MSR value after disabling", rdmsr(HV_X64_MSR_REFERENCE_TSC) == 0); + + return nerr > 0 ? 1 : 0; +} diff --git a/x86/unittests.cfg b/x86/unittests.cfg index 99eff26..e981c00 100644 --- a/x86/unittests.cfg +++ b/x86/unittests.cfg @@ -188,3 +188,8 @@ extra_params = -cpu kvm64,hv_synic -device hyperv-testdev file = hyperv_stimer.flat smp = 2 extra_params = -cpu kvm64,hv_time,hv_synic,hv_stimer -device hyperv-testdev + +[hyperv_clock] +file = hyperv_clock.flat +smp = 2 +extra_params = -cpu kvm64,hv_time -- 1.8.3.1 -- To unsubscribe from this list: send the line "unsubscribe kvm" in the body of a message to majordomo@xxxxxxxxxxxxxxx More majordomo info at http://vger.kernel.org/majordomo-info.html