Since, KVM has been switched to getting masterclock related data right from the timekeeper by the previous patches, now we are able to remove all the parts related to the old scheme of getting masterclock data. This patch removes those parts. Signed-off-by: Denis Plotnikov <dplotnikov@xxxxxxxxxxxxx> --- arch/x86/include/asm/kvm_host.h | 2 +- arch/x86/kvm/trace.h | 31 ++---- arch/x86/kvm/x86.c | 216 ++++++---------------------------------- 3 files changed, 42 insertions(+), 207 deletions(-) diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h index 87ac4fb..91465db 100644 --- a/arch/x86/include/asm/kvm_host.h +++ b/arch/x86/include/asm/kvm_host.h @@ -791,7 +791,7 @@ struct kvm_arch { u64 cur_tsc_generation; int nr_vcpus_matched_tsc; - spinlock_t pvclock_gtod_sync_lock; + spinlock_t masterclock_lock; bool use_master_clock; u64 master_kernel_ns; u64 master_cycle_now; diff --git a/arch/x86/kvm/trace.h b/arch/x86/kvm/trace.h index 0a6cc67..923ab31 100644 --- a/arch/x86/kvm/trace.h +++ b/arch/x86/kvm/trace.h @@ -807,45 +807,39 @@ TRACE_EVENT(kvm_write_tsc_offset, #ifdef CONFIG_X86_64 -#define host_clocks \ - {VCLOCK_NONE, "none"}, \ - {VCLOCK_TSC, "tsc"} \ - TRACE_EVENT(kvm_update_master_clock, - TP_PROTO(bool use_master_clock, unsigned int host_clock, bool offset_matched), - TP_ARGS(use_master_clock, host_clock, offset_matched), + TP_PROTO(bool use_master_clock, bool host_clock_stable, + bool offset_matched), + TP_ARGS(use_master_clock, host_clock_stable, offset_matched), TP_STRUCT__entry( __field( bool, use_master_clock ) - __field( unsigned int, host_clock ) + __field( bool, host_clock_stable ) __field( bool, offset_matched ) ), TP_fast_assign( __entry->use_master_clock = use_master_clock; - __entry->host_clock = host_clock; + __entry->host_clock_stable = host_clock_stable; __entry->offset_matched = offset_matched; ), - TP_printk("masterclock %d hostclock %s offsetmatched %u", + TP_printk("masterclock %d hostclock stable %u offsetmatched %u", __entry->use_master_clock, - __print_symbolic(__entry->host_clock, host_clocks), + __entry->host_clock_stable, __entry->offset_matched) ); TRACE_EVENT(kvm_track_tsc, TP_PROTO(unsigned int vcpu_id, unsigned int nr_matched, - unsigned int online_vcpus, bool use_master_clock, - unsigned int host_clock), - TP_ARGS(vcpu_id, nr_matched, online_vcpus, use_master_clock, - host_clock), + unsigned int online_vcpus, bool use_master_clock), + TP_ARGS(vcpu_id, nr_matched, online_vcpus, use_master_clock), TP_STRUCT__entry( __field( unsigned int, vcpu_id ) __field( unsigned int, nr_vcpus_matched_tsc ) __field( unsigned int, online_vcpus ) __field( bool, use_master_clock ) - __field( unsigned int, host_clock ) ), TP_fast_assign( @@ -853,14 +847,11 @@ TRACE_EVENT(kvm_track_tsc, __entry->nr_vcpus_matched_tsc = nr_matched; __entry->online_vcpus = online_vcpus; __entry->use_master_clock = use_master_clock; - __entry->host_clock = host_clock; ), - TP_printk("vcpu_id %u masterclock %u offsetmatched %u nr_online %u" - " hostclock %s", + TP_printk("vcpu_id %u masterclock %u offsetmatched %u nr_online %u", __entry->vcpu_id, __entry->use_master_clock, - __entry->nr_vcpus_matched_tsc, __entry->online_vcpus, - __print_symbolic(__entry->host_clock, host_clocks)) + __entry->nr_vcpus_matched_tsc, __entry->online_vcpus) ); #endif /* CONFIG_X86_64 */ diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c index 496e731..7d232ad 100644 --- a/arch/x86/kvm/x86.c +++ b/arch/x86/kvm/x86.c @@ -50,7 +50,7 @@ #include <linux/hash.h> #include <linux/pci.h> #include <linux/timekeeper_internal.h> -#include <linux/pvclock_gtod.h> +#include <linux/cs_notifier.h> #include <linux/kvm_irqfd.h> #include <linux/irqbypass.h> #include <linux/sched/stat.h> @@ -1134,50 +1134,6 @@ static int do_set_msr(struct kvm_vcpu *vcpu, unsigned index, u64 *data) return kvm_set_msr(vcpu, &msr); } -#ifdef CONFIG_X86_64 -struct pvclock_gtod_data { - seqcount_t seq; - - struct { /* extract of a clocksource struct */ - int vclock_mode; - u64 cycle_last; - u64 mask; - u32 mult; - u32 shift; - } clock; - - u64 boot_ns; - u64 nsec_base; - u64 wall_time_sec; -}; - -static struct pvclock_gtod_data pvclock_gtod_data; - -static void update_pvclock_gtod(struct timekeeper *tk) -{ - struct pvclock_gtod_data *vdata = &pvclock_gtod_data; - u64 boot_ns; - - boot_ns = ktime_to_ns(ktime_add(tk->tkr_mono.base, tk->offs_boot)); - - write_seqcount_begin(&vdata->seq); - - /* copy pvclock gtod data */ - vdata->clock.vclock_mode = tk->tkr_mono.clock->archdata.vclock_mode; - vdata->clock.cycle_last = tk->tkr_mono.cycle_last; - vdata->clock.mask = tk->tkr_mono.mask; - vdata->clock.mult = tk->tkr_mono.mult; - vdata->clock.shift = tk->tkr_mono.shift; - - vdata->boot_ns = boot_ns; - vdata->nsec_base = tk->tkr_mono.xtime_nsec; - - vdata->wall_time_sec = tk->xtime_sec; - - write_seqcount_end(&vdata->seq); -} -#endif - void kvm_set_pending_timer(struct kvm_vcpu *vcpu) { /* @@ -1269,10 +1225,6 @@ static void kvm_get_time_scale(uint64_t scaled_hz, uint64_t base_hz, __func__, base_hz, scaled_hz, shift, *pmultiplier); } -#ifdef CONFIG_X86_64 -static atomic_t kvm_guest_has_master_clock = ATOMIC_INIT(0); -#endif - static DEFINE_PER_CPU(unsigned long, cpu_tsc_khz); static unsigned long max_tsc_khz; @@ -1366,7 +1318,6 @@ static void kvm_track_tsc_matching(struct kvm_vcpu *vcpu) #ifdef CONFIG_X86_64 bool vcpus_matched; struct kvm_arch *ka = &vcpu->kvm->arch; - struct pvclock_gtod_data *gtod = &pvclock_gtod_data; vcpus_matched = (ka->nr_vcpus_matched_tsc + 1 == atomic_read(&vcpu->kvm->online_vcpus)); @@ -1379,13 +1330,12 @@ static void kvm_track_tsc_matching(struct kvm_vcpu *vcpu) * and the vcpus need to have matched TSCs. When that happens, * perform request to enable masterclock. */ - if (ka->use_master_clock || - (gtod->clock.vclock_mode == VCLOCK_TSC && vcpus_matched)) + if (ka->use_master_clock || vcpus_matched) kvm_make_request(KVM_REQ_MASTERCLOCK_UPDATE, vcpu); trace_kvm_track_tsc(vcpu->vcpu_id, ka->nr_vcpus_matched_tsc, - atomic_read(&vcpu->kvm->online_vcpus), - ka->use_master_clock, gtod->clock.vclock_mode); + atomic_read(&vcpu->kvm->online_vcpus), + ka->use_master_clock); #endif } @@ -1538,7 +1488,7 @@ void kvm_write_tsc(struct kvm_vcpu *vcpu, struct msr_data *msr) kvm_vcpu_write_tsc_offset(vcpu, offset); raw_spin_unlock_irqrestore(&kvm->arch.tsc_write_lock, flags); - spin_lock(&kvm->arch.pvclock_gtod_sync_lock); + spin_lock(&kvm->arch.masterclock_lock); if (!matched) { kvm->arch.nr_vcpus_matched_tsc = 0; } else if (!already_matched) { @@ -1546,9 +1496,8 @@ void kvm_write_tsc(struct kvm_vcpu *vcpu, struct msr_data *msr) } kvm_track_tsc_matching(vcpu); - spin_unlock(&kvm->arch.pvclock_gtod_sync_lock); + spin_unlock(&kvm->arch.masterclock_lock); } - EXPORT_SYMBOL_GPL(kvm_write_tsc); static inline void adjust_tsc_offset_guest(struct kvm_vcpu *vcpu, @@ -1567,79 +1516,6 @@ static inline void adjust_tsc_offset_host(struct kvm_vcpu *vcpu, s64 adjustment) #ifdef CONFIG_X86_64 -static u64 read_tsc(void) -{ - u64 ret = (u64)rdtsc_ordered(); - u64 last = pvclock_gtod_data.clock.cycle_last; - - if (likely(ret >= last)) - return ret; - - /* - * GCC likes to generate cmov here, but this branch is extremely - * predictable (it's just a function of time and the likely is - * very likely) and there's a data dependence, so force GCC - * to generate a branch instead. I don't barrier() because - * we don't actually need a barrier, and if this function - * ever gets inlined it will generate worse code. - */ - asm volatile (""); - return last; -} - -static inline u64 vgettsc(u64 *cycle_now) -{ - long v; - struct pvclock_gtod_data *gtod = &pvclock_gtod_data; - - *cycle_now = read_tsc(); - - v = (*cycle_now - gtod->clock.cycle_last) & gtod->clock.mask; - return v * gtod->clock.mult; -} - -static int do_monotonic_boot(s64 *t, u64 *cycle_now) -{ - struct pvclock_gtod_data *gtod = &pvclock_gtod_data; - unsigned long seq; - int mode; - u64 ns; - - do { - seq = read_seqcount_begin(>od->seq); - mode = gtod->clock.vclock_mode; - ns = gtod->nsec_base; - ns += vgettsc(cycle_now); - ns >>= gtod->clock.shift; - ns += gtod->boot_ns; - } while (unlikely(read_seqcount_retry(>od->seq, seq))); - *t = ns; - - return mode; -} - -static int do_realtime(struct timespec *ts, u64 *cycle_now) -{ - struct pvclock_gtod_data *gtod = &pvclock_gtod_data; - unsigned long seq; - int mode; - u64 ns; - - do { - seq = read_seqcount_begin(>od->seq); - mode = gtod->clock.vclock_mode; - ts->tv_sec = gtod->wall_time_sec; - ns = gtod->nsec_base; - ns += vgettsc(cycle_now); - ns >>= gtod->clock.shift; - } while (unlikely(read_seqcount_retry(>od->seq, seq))); - - ts->tv_sec += __iter_div_u64_rem(ns, NSEC_PER_SEC, &ns); - ts->tv_nsec = ns; - - return mode; -} - /* returns true if host is using tsc clocksource */ static bool kvm_get_time_and_clockread(s64 *kernel_ns, u64 *cycle_now) { @@ -1713,34 +1589,28 @@ static bool kvm_get_walltime_and_clockread(struct timespec *ts, * */ -static void pvclock_update_vm_gtod_copy(struct kvm *kvm) +static void update_masterclock(struct kvm *kvm) { #ifdef CONFIG_X86_64 struct kvm_arch *ka = &kvm->arch; - int vclock_mode; - bool host_tsc_clocksource, vcpus_matched; + bool host_clocksource_stable, vcpus_matched; vcpus_matched = (ka->nr_vcpus_matched_tsc + 1 == atomic_read(&kvm->online_vcpus)); /* - * If the host uses TSC clock, then passthrough TSC as stable - * to the guest. + * kvm_get_time_and_clockread returns true if clocksource is stable */ - host_tsc_clocksource = kvm_get_time_and_clockread( + host_clocksource_stable = kvm_get_time_and_clockread( &ka->master_kernel_ns, &ka->master_cycle_now); - ka->use_master_clock = host_tsc_clocksource && vcpus_matched + ka->use_master_clock = host_clocksource_stable && vcpus_matched && !ka->backwards_tsc_observed && !ka->boot_vcpu_runs_old_kvmclock; - if (ka->use_master_clock) - atomic_set(&kvm_guest_has_master_clock, 1); - - vclock_mode = pvclock_gtod_data.clock.vclock_mode; - trace_kvm_update_master_clock(ka->use_master_clock, vclock_mode, - vcpus_matched); + trace_kvm_update_master_clock(ka->use_master_clock, + host_clocksource_stable, vcpus_matched); #endif } @@ -1756,10 +1626,10 @@ static void kvm_gen_update_masterclock(struct kvm *kvm) struct kvm_vcpu *vcpu; struct kvm_arch *ka = &kvm->arch; - spin_lock(&ka->pvclock_gtod_sync_lock); + spin_lock(&ka->masterclock_lock); kvm_make_mclock_inprogress_request(kvm); /* no guest entries from this point */ - pvclock_update_vm_gtod_copy(kvm); + update_masterclock(kvm); kvm_for_each_vcpu(i, vcpu, kvm) kvm_make_request(KVM_REQ_CLOCK_UPDATE, vcpu); @@ -1768,7 +1638,7 @@ static void kvm_gen_update_masterclock(struct kvm *kvm) kvm_for_each_vcpu(i, vcpu, kvm) kvm_clear_request(KVM_REQ_MCLOCK_INPROGRESS, vcpu); - spin_unlock(&ka->pvclock_gtod_sync_lock); + spin_unlock(&ka->masterclock_lock); #endif } @@ -1778,15 +1648,15 @@ u64 get_kvmclock_ns(struct kvm *kvm) struct pvclock_vcpu_time_info hv_clock; u64 ret; - spin_lock(&ka->pvclock_gtod_sync_lock); + spin_lock(&ka->masterclock_lock); if (!ka->use_master_clock) { - spin_unlock(&ka->pvclock_gtod_sync_lock); + spin_unlock(&ka->masterclock_lock); return ktime_get_boot_ns() + ka->kvmclock_offset; } hv_clock.tsc_timestamp = ka->master_cycle_now; hv_clock.system_time = ka->master_kernel_ns + ka->kvmclock_offset; - spin_unlock(&ka->pvclock_gtod_sync_lock); + spin_unlock(&ka->masterclock_lock); /* both __this_cpu_read() and rdtsc() should be on the same cpu */ get_cpu(); @@ -1872,13 +1742,13 @@ static int kvm_guest_time_update(struct kvm_vcpu *v) * If the host uses TSC clock, then passthrough TSC as stable * to the guest. */ - spin_lock(&ka->pvclock_gtod_sync_lock); + spin_lock(&ka->masterclock_lock); use_master_clock = ka->use_master_clock; if (use_master_clock) { host_tsc = ka->master_cycle_now; kernel_ns = ka->master_kernel_ns; } - spin_unlock(&ka->pvclock_gtod_sync_lock); + spin_unlock(&ka->masterclock_lock); /* Keep irq disabled to prevent changes to the clock */ local_irq_save(flags); @@ -4208,11 +4078,7 @@ long kvm_arch_vm_ioctl(struct file *filp, goto out; r = 0; - /* - * TODO: userspace has to take care of races with VCPU_RUN, so - * kvm_gen_update_masterclock() can be cut down to locked - * pvclock_update_vm_gtod_copy(). - */ + kvm_gen_update_masterclock(kvm); now_ns = get_kvmclock_ns(kvm); kvm->arch.kvmclock_offset += user_ns.clock - now_ns; @@ -6041,7 +5907,8 @@ static void kvm_set_mmio_spte_mask(void) } #ifdef CONFIG_X86_64 -static void pvclock_gtod_update_fn(struct work_struct *work) +static int process_clocksource_change(struct notifier_block *nb, + unsigned long unused0, void *unused1) { struct kvm *kvm; @@ -6052,35 +5919,12 @@ static void pvclock_gtod_update_fn(struct work_struct *work) list_for_each_entry(kvm, &vm_list, vm_list) kvm_for_each_vcpu(i, vcpu, kvm) kvm_make_request(KVM_REQ_MASTERCLOCK_UPDATE, vcpu); - atomic_set(&kvm_guest_has_master_clock, 0); spin_unlock(&kvm_lock); -} - -static DECLARE_WORK(pvclock_gtod_work, pvclock_gtod_update_fn); - -/* - * Notification about pvclock gtod data update. - */ -static int pvclock_gtod_notify(struct notifier_block *nb, unsigned long unused, - void *priv) -{ - struct pvclock_gtod_data *gtod = &pvclock_gtod_data; - struct timekeeper *tk = priv; - - update_pvclock_gtod(tk); - - /* disable master clock if host does not trust, or does not - * use, TSC clocksource - */ - if (gtod->clock.vclock_mode != VCLOCK_TSC && - atomic_read(&kvm_guest_has_master_clock) != 0) - queue_work(system_long_wq, &pvclock_gtod_work); - return 0; } -static struct notifier_block pvclock_gtod_notifier = { - .notifier_call = pvclock_gtod_notify, +static struct notifier_block clocksource_change_notifier = { + .notifier_call = process_clocksource_change, }; #endif @@ -6133,7 +5977,7 @@ int kvm_arch_init(void *opaque) kvm_lapic_init(); #ifdef CONFIG_X86_64 - pvclock_gtod_register_notifier(&pvclock_gtod_notifier); + clocksource_changes_register_notifier(&clocksource_change_notifier); #endif return 0; @@ -6154,7 +5998,7 @@ void kvm_arch_exit(void) CPUFREQ_TRANSITION_NOTIFIER); cpuhp_remove_state_nocalls(CPUHP_AP_X86_KVM_CLK_ONLINE); #ifdef CONFIG_X86_64 - pvclock_gtod_unregister_notifier(&pvclock_gtod_notifier); + clocksource_changes_unregister_notifier(&clocksource_change_notifier); #endif kvm_x86_ops = NULL; kvm_mmu_module_exit(); @@ -8056,10 +7900,10 @@ int kvm_arch_init_vm(struct kvm *kvm, unsigned long type) raw_spin_lock_init(&kvm->arch.tsc_write_lock); mutex_init(&kvm->arch.apic_map_lock); mutex_init(&kvm->arch.hyperv.hv_lock); - spin_lock_init(&kvm->arch.pvclock_gtod_sync_lock); + spin_lock_init(&kvm->arch.masterclock_lock); kvm->arch.kvmclock_offset = -ktime_get_boot_ns(); - pvclock_update_vm_gtod_copy(kvm); + update_masterclock(kvm); INIT_DELAYED_WORK(&kvm->arch.kvmclock_update_work, kvmclock_update_fn); INIT_DELAYED_WORK(&kvm->arch.kvmclock_sync_work, kvmclock_sync_fn); -- 2.7.4