On 01/28/2016 05:04 PM, Paolo Bonzini wrote:
The test checks the relative precision of the reference TSC page
and the time reference counter.
Signed-off-by: Paolo Bonzini <pbonzini@xxxxxxxxxx>
---
Andrey, the test has a relative error of approximately 3 parts
per million on my machine. In other words it drifts by 3
microseconds every second, which I don't think is acceptable.
The problem is that virtual_tsc_khz is 2593993 while the actual
frequency is more like 2594001 kHz.
But I have a problem in general with using tsc_khz to compute
the hyperv reference clock scale. The maximum possible accuracy
on a 2.5 GHz machine is around 200 ppb, corresponding to a drift
of about 16 milliseconds per day. Perhaps we could use Linux
(and kvmclock's) timekeeping parameters to derive the tsc_scale
and tsc_offset?
Probably need to check the code. But can we just workaround it
by host periodic timer with period ~10ms which will issue(by scheduling
vcpu KVM_REQ_*) refresh of guest tsc page to prevent drifting?
config/config-x86-common.mak | 2 +
config/config-x86_64.mak | 1 +
x86/hyperv.h | 9 ++
x86/hyperv_clock.c | 193 +++++++++++++++++++++++++++++++++++++++++++
x86/unittests.cfg | 5 ++
5 files changed, 210 insertions(+)
create mode 100644 x86/hyperv_clock.c
diff --git a/config/config-x86-common.mak b/config/config-x86-common.mak
index 72b95e3..621413b 100644
--- a/config/config-x86-common.mak
+++ b/config/config-x86-common.mak
@@ -119,6 +119,8 @@ $(TEST_DIR)/hyperv_synic.elf: $(cstart.o) $(TEST_DIR)/hyperv.o \
$(TEST_DIR)/hyperv_stimer.elf: $(cstart.o) $(TEST_DIR)/hyperv.o \
$(TEST_DIR)/hyperv_stimer.o
+$(TEST_DIR)/hyperv_clock.elf: $(cstart.o) $(TEST_DIR)/hyperv.o
+
arch_clean:
$(RM) $(TEST_DIR)/*.o $(TEST_DIR)/*.flat $(TEST_DIR)/*.elf \
$(TEST_DIR)/.*.d lib/x86/.*.d
diff --git a/config/config-x86_64.mak b/config/config-x86_64.mak
index 1764701..634d09d 100644
--- a/config/config-x86_64.mak
+++ b/config/config-x86_64.mak
@@ -12,5 +12,6 @@ tests = $(TEST_DIR)/access.flat $(TEST_DIR)/apic.flat \
tests += $(TEST_DIR)/svm.flat
tests += $(TEST_DIR)/vmx.flat
tests += $(TEST_DIR)/tscdeadline_latency.flat
+tests += $(TEST_DIR)/hyperv_clock.flat
include config/config-x86-common.mak
diff --git a/x86/hyperv.h b/x86/hyperv.h
index faf931b..974df56 100644
--- a/x86/hyperv.h
+++ b/x86/hyperv.h
@@ -12,6 +12,7 @@
#define HV_X64_MSR_SYNTIMER_AVAILABLE (1 << 3)
#define HV_X64_MSR_TIME_REF_COUNT 0x40000020
+#define HV_X64_MSR_REFERENCE_TSC 0x40000021
/* Define synthetic interrupt controller model specific registers. */
#define HV_X64_MSR_SCONTROL 0x40000080
@@ -180,4 +181,12 @@ void synic_sint_create(int vcpu, int sint, int vec, bool auto_eoi);
void synic_sint_set(int vcpu, int sint);
void synic_sint_destroy(int vcpu, int sint);
+struct hv_reference_tsc_page {
+ uint32_t tsc_sequence;
+ uint32_t res1;
+ uint64_t tsc_scale;
+ int64_t tsc_offset;
+};
+
+
#endif
diff --git a/x86/hyperv_clock.c b/x86/hyperv_clock.c
new file mode 100644
index 0000000..680ba7c
--- /dev/null
+++ b/x86/hyperv_clock.c
@@ -0,0 +1,193 @@
+#include "libcflat.h"
+#include "smp.h"
+#include "atomic.h"
+#include "processor.h"
+#include "hyperv.h"
+#include "vm.h"
+
+#define MAX_CPU 4
+#define TICKS_PER_SEC (1000000000 / 100)
+
+struct hv_reference_tsc_page *hv_clock;
+
+/*
+ * Scale a 64-bit delta by scaling and multiplying by a 32-bit fraction,
+ * yielding a 64-bit result.
+ */
+static inline u64 scale_delta(u64 delta, u64 mul_frac)
+{
+ u64 product, unused;
+
+ __asm__ (
+ "mul %3"
+ : "=d" (product), "=a" (unused) : "1" (delta), "rm" ((u64)mul_frac) );
+
+ return product;
+}
+
+static u64 hvclock_tsc_to_ticks(struct hv_reference_tsc_page *shadow, uint64_t tsc)
+{
+ u64 delta = tsc;
+ return scale_delta(delta, shadow->tsc_scale) + shadow->tsc_offset;
+}
+
+/*
+ * Reads a consistent set of time-base values from hypervisor,
+ * into a shadow data area.
+ */
+static void hvclock_get_time_values(struct hv_reference_tsc_page *shadow,
+ struct hv_reference_tsc_page *page)
+{
+ int seq;
+ do {
+ seq = page->tsc_sequence;
+ rmb(); /* fetch version before data */
+ *shadow = *page;
+ rmb(); /* test version after fetching data */
+ } while (shadow->tsc_sequence != seq);
+}
+
+uint64_t hv_clock_read(void)
+{
+ struct hv_reference_tsc_page shadow;
+
+ hvclock_get_time_values(&shadow, hv_clock);
+ return hvclock_tsc_to_ticks(&shadow, rdtsc());
+}
+
+atomic_t cpus_left;
+bool ok[MAX_CPU];
+uint64_t loops[MAX_CPU];
+
+static void hv_clock_test(void *data)
+{
+ int i = smp_id();
+ uint64_t t = rdmsr(HV_X64_MSR_TIME_REF_COUNT);
+ uint64_t end = t + 3 * TICKS_PER_SEC;
+
+ ok[i] = true;
+ do {
+ uint64_t now = hv_clock_read();
+ if (now < t) {
+ printf("warp on CPU %d!\n", smp_id());
+ ok[i] = false;
+ break;
+ }
+ t = now;
+ } while(t < end);
+
+ barrier();
+ if (t >= end) {
+ int64_t ref = rdmsr(HV_X64_MSR_TIME_REF_COUNT);
+ if (i == 0)
+ printf("Time reference MSR drift: %lld\n\n", ref - end);
+ ok[i] &= (ref - end) > -5 && (ref - end) < 5;
+ }
+
+ atomic_dec(&cpus_left);
+}
+
+static void check_test(int ncpus)
+{
+ int i;
+ bool pass;
+
+ atomic_set(&cpus_left, ncpus);
+ for (i = ncpus - 1; i >= 0; i--)
+ on_cpu_async(i, hv_clock_test, NULL);
+
+ /* Wait for the end of other vcpu */
+ while(atomic_read(&cpus_left))
+ ;
+
+ pass = true;
+ for (i = ncpus - 1; i >= 0; i--)
+ pass &= ok[i];
+
+ report("TSC reference precision test", pass);
+}
+
+static void hv_perf_test(void *data)
+{
+ uint64_t t = hv_clock_read();
+ uint64_t end = t + 1000000000 / 100;
+ uint64_t local_loops = 0;
+
+ do {
+ t = hv_clock_read();
+ local_loops++;
+ } while(t < end);
+
+ loops[smp_id()] = local_loops;
+ atomic_dec(&cpus_left);
+}
+
+static void perf_test(int ncpus)
+{
+ int i;
+ uint64_t total_loops;
+
+ atomic_set(&cpus_left, ncpus);
+ for (i = ncpus - 1; i >= 0; i--)
+ on_cpu_async(i, hv_perf_test, NULL);
+
+ /* Wait for the end of other vcpu */
+ while(atomic_read(&cpus_left))
+ ;
+
+ total_loops = 0;
+ for (i = ncpus - 1; i >= 0; i--)
+ total_loops += loops[i];
+ printf("iterations/sec: %lld\n", total_loops / ncpus);
+}
+
+int main(int ac, char **av)
+{
+ int nerr = 0;
+ int ncpus;
+ struct hv_reference_tsc_page shadow;
+ uint64_t tsc1, t1, tsc2, t2;
+ uint64_t ref1, ref2;
+
+ setup_vm();
+ smp_init();
+
+ hv_clock = alloc_page();
+ wrmsr(HV_X64_MSR_REFERENCE_TSC, (u64)(uintptr_t)hv_clock | 1);
+ report("MSR value after enabling",
+ rdmsr(HV_X64_MSR_REFERENCE_TSC) == ((u64)(uintptr_t)hv_clock | 1));
+
+ hvclock_get_time_values(&shadow, hv_clock);
+ if (shadow.tsc_sequence == 0 || shadow.tsc_sequence == 0xFFFFFFFF) {
+ printf("Reference TSC page not available\n");
+ exit(1);
+ }
+
+ printf("scale: %llx offset: %lld\n", shadow.tsc_scale, shadow.tsc_offset);
+ ref1 = rdmsr(HV_X64_MSR_TIME_REF_COUNT);
+ tsc1 = rdtsc();
+ t1 = hvclock_tsc_to_ticks(&shadow, tsc1);
+ printf("refcnt %lld, TSC %llx, TSC reference %lld\n",
+ ref1, tsc1, t1);
+
+ do
+ ref2 = rdmsr(HV_X64_MSR_TIME_REF_COUNT);
+ while (ref2 < ref1 + 2 * TICKS_PER_SEC);
+
+ tsc2 = rdtsc();
+ t2 = hvclock_tsc_to_ticks(&shadow, tsc2);
+ printf("refcnt %lld (delta %lld), TSC %llx, TSC reference %lld (delta %lld)\n",
+ ref2, ref2 - ref1, tsc2, t2, t2 - t1);
+
+ ncpus = cpu_count();
+ if (ncpus > MAX_CPU)
+ ncpus = MAX_CPU;
+
+ check_test(ncpus);
+ perf_test(ncpus);
+
+ wrmsr(HV_X64_MSR_REFERENCE_TSC, 0LL);
+ report("MSR value after disabling", rdmsr(HV_X64_MSR_REFERENCE_TSC) == 0);
+
+ return nerr > 0 ? 1 : 0;
+}
diff --git a/x86/unittests.cfg b/x86/unittests.cfg
index 99eff26..e981c00 100644
--- a/x86/unittests.cfg
+++ b/x86/unittests.cfg
@@ -188,3 +188,8 @@ extra_params = -cpu kvm64,hv_synic -device hyperv-testdev
file = hyperv_stimer.flat
smp = 2
extra_params = -cpu kvm64,hv_time,hv_synic,hv_stimer -device hyperv-testdev
+
+[hyperv_clock]
+file = hyperv_clock.flat
+smp = 2
+extra_params = -cpu kvm64,hv_time
--
To unsubscribe from this list: send the line "unsubscribe kvm" in
the body of a message to majordomo@xxxxxxxxxxxxxxx
More majordomo info at http://vger.kernel.org/majordomo-info.html