On 23/08/2017 18:02, Paolo Bonzini wrote: > > More duct tape would have been just: > > - if (pvclock_gtod_data.clock.vclock_mode != VCLOCK_TSC) > + mode = READ_ONCE(pvclock_gtod_data.clock.vclock_mode); > + if (mode != VCLOCK_TSC && > + (mode != VCLOCK_PVCLOCK || !pvclock_nested_virt_magic()) > return false; > > - return do_realtime(ts, cycle_now) == VCLOCK_TSC; > + switch (mode) { > + case VCLOCK_TSC: > + return do_realtime_tsc(ts, cycle_now); > + case VCLOCK_PVCLOCK: > + return do_realtime_pvclock(ts, cycle_now); > + } > > Nested virtualization does need a clocksource change notifier on top, > but we can cross that bridge later. Maybe Denis can post just those > patches to begin with. For what it's worth, this is all that's needed (with patches 1-2-3-4-5-7) to support kvmclock on top of Hyper-V clock. It's trivial. Even if we could add paravirtualization magic to KVM live migration, we certainly couldn't do that for other hypervisors. diff --git a/arch/x86/hyperv/hv_init.c b/arch/x86/hyperv/hv_init.c index 5b882cc0c0e9..3bab935b021a 100644 --- a/arch/x86/hyperv/hv_init.c +++ b/arch/x86/hyperv/hv_init.c @@ -46,10 +46,24 @@ static u64 read_hv_clock_tsc(struct clocksource *arg) return current_tick; } +static bool read_hv_clock_tsc_with_stamp(struct clocksource *arg, + u64 *cycles, u64 *cycles_stamp) +{ + *cycles = __hv_read_tsc_page(tsc_pg, &cycles_stamp); + + if (*cycles == U64_MAX) { + *cycles = rdmsrl(HV_X64_MSR_TIME_REF_COUNT); + return false; + } + + return true; +} + static struct clocksource hyperv_cs_tsc = { .name = "hyperv_clocksource_tsc_page", .rating = 400, .read = read_hv_clock_tsc, + .read_with_stamp = read_hv_clock_tsc_with_stamp, .mask = CLOCKSOURCE_MASK(64), .flags = CLOCK_SOURCE_IS_CONTINUOUS, }; diff --git a/arch/x86/include/asm/mshyperv.h b/arch/x86/include/asm/mshyperv.h index 2b58c8c1eeaa..5aff66e9fff7 100644 --- a/arch/x86/include/asm/mshyperv.h +++ b/arch/x86/include/asm/mshyperv.h @@ -176,9 +176,9 @@ void hyperv_cleanup(void); #endif #ifdef CONFIG_HYPERV_TSCPAGE struct ms_hyperv_tsc_page *hv_get_tsc_page(void); -static inline u64 hv_read_tsc_page(const struct ms_hyperv_tsc_page *tsc_pg) +static inline u64 __hv_read_tsc_page(const struct ms_hyperv_tsc_page *tsc_pg, u64 *cur_tsc) { - u64 scale, offset, cur_tsc; + u64 scale, offset; u32 sequence; /* @@ -209,7 +209,7 @@ static inline u64 hv_read_tsc_page(const struct ms_hyperv_tsc_page *tsc_pg) scale = READ_ONCE(tsc_pg->tsc_scale); offset = READ_ONCE(tsc_pg->tsc_offset); - cur_tsc = rdtsc_ordered(); + *cur_tsc = rdtsc_ordered(); /* * Make sure we read sequence after we read all other values @@ -219,9 +219,14 @@ static inline u64 hv_read_tsc_page(const struct ms_hyperv_tsc_page *tsc_pg) } while (READ_ONCE(tsc_pg->tsc_sequence) != sequence); - return mul_u64_u64_shr(cur_tsc, scale, 64) + offset; + return mul_u64_u64_shr(*cur_tsc, scale, 64) + offset; } +static inline u64 hv_read_tsc_page(const struct ms_hyperv_tsc_page *tsc_pg) +{ + u64 cur_tsc; + return __hv_read_tsc_page(tsc_pg, &cur_tsc); +} #else static inline struct ms_hyperv_tsc_page *hv_get_tsc_page(void) { Denis, could you try redoing patch 7 to use the pvclock_gtod_notifier instead of the new one you're adding, and only send that first part? I think it's a worthwhile cleanup anyway, so let's start with that. Paolo