On 23/08/2017 18:02, Paolo Bonzini wrote:
More duct tape would have been just:
- if (pvclock_gtod_data.clock.vclock_mode != VCLOCK_TSC)
+ mode = READ_ONCE(pvclock_gtod_data.clock.vclock_mode);
+ if (mode != VCLOCK_TSC &&
+ (mode != VCLOCK_PVCLOCK || !pvclock_nested_virt_magic())
return false;
- return do_realtime(ts, cycle_now) == VCLOCK_TSC;
+ switch (mode) {
+ case VCLOCK_TSC:
+ return do_realtime_tsc(ts, cycle_now);
+ case VCLOCK_PVCLOCK:
+ return do_realtime_pvclock(ts, cycle_now);
+ }
Nested virtualization does need a clocksource change notifier on top,
but we can cross that bridge later. Maybe Denis can post just those
patches to begin with.
For what it's worth, this is all that's needed (with patches 1-2-3-4-5-7)
to support kvmclock on top of Hyper-V clock. It's trivial.
Even if we could add paravirtualization magic to KVM live migration, we
certainly couldn't do that for other hypervisors.
diff --git a/arch/x86/hyperv/hv_init.c b/arch/x86/hyperv/hv_init.c
index 5b882cc0c0e9..3bab935b021a 100644
--- a/arch/x86/hyperv/hv_init.c
+++ b/arch/x86/hyperv/hv_init.c
@@ -46,10 +46,24 @@ static u64 read_hv_clock_tsc(struct clocksource *arg)
return current_tick;
}
+static bool read_hv_clock_tsc_with_stamp(struct clocksource *arg,
+ u64 *cycles, u64 *cycles_stamp)
+{
+ *cycles = __hv_read_tsc_page(tsc_pg, &cycles_stamp);
+
+ if (*cycles == U64_MAX) {
+ *cycles = rdmsrl(HV_X64_MSR_TIME_REF_COUNT);
+ return false;
+ }
+
+ return true;
+}
+
static struct clocksource hyperv_cs_tsc = {
.name = "hyperv_clocksource_tsc_page",
.rating = 400,
.read = read_hv_clock_tsc,
+ .read_with_stamp = read_hv_clock_tsc_with_stamp,
.mask = CLOCKSOURCE_MASK(64),
.flags = CLOCK_SOURCE_IS_CONTINUOUS,
};
diff --git a/arch/x86/include/asm/mshyperv.h b/arch/x86/include/asm/mshyperv.h
index 2b58c8c1eeaa..5aff66e9fff7 100644
--- a/arch/x86/include/asm/mshyperv.h
+++ b/arch/x86/include/asm/mshyperv.h
@@ -176,9 +176,9 @@ void hyperv_cleanup(void);
#endif
#ifdef CONFIG_HYPERV_TSCPAGE
struct ms_hyperv_tsc_page *hv_get_tsc_page(void);
-static inline u64 hv_read_tsc_page(const struct ms_hyperv_tsc_page *tsc_pg)
+static inline u64 __hv_read_tsc_page(const struct ms_hyperv_tsc_page *tsc_pg, u64 *cur_tsc)
{
- u64 scale, offset, cur_tsc;
+ u64 scale, offset;
u32 sequence;
/*
@@ -209,7 +209,7 @@ static inline u64 hv_read_tsc_page(const struct ms_hyperv_tsc_page *tsc_pg)
scale = READ_ONCE(tsc_pg->tsc_scale);
offset = READ_ONCE(tsc_pg->tsc_offset);
- cur_tsc = rdtsc_ordered();
+ *cur_tsc = rdtsc_ordered();
/*
* Make sure we read sequence after we read all other values
@@ -219,9 +219,14 @@ static inline u64 hv_read_tsc_page(const struct ms_hyperv_tsc_page *tsc_pg)
} while (READ_ONCE(tsc_pg->tsc_sequence) != sequence);
- return mul_u64_u64_shr(cur_tsc, scale, 64) + offset;
+ return mul_u64_u64_shr(*cur_tsc, scale, 64) + offset;
}
+static inline u64 hv_read_tsc_page(const struct ms_hyperv_tsc_page *tsc_pg)
+{
+ u64 cur_tsc;
+ return __hv_read_tsc_page(tsc_pg, &cur_tsc);
+}
#else
static inline struct ms_hyperv_tsc_page *hv_get_tsc_page(void)
{
Denis, could you try redoing patch 7 to use the pvclock_gtod_notifier
instead of the new one you're adding, and only send that first part? I
think it's a worthwhile cleanup anyway, so let's start with that.
Paolo