On Thu, Sep 01, 2016 at 05:26:12PM +0200, Paolo Bonzini wrote: > We will use it in the next patches for KVM_GET_CLOCK and as a basis for the > contents of the Hyper-V TSC page. Get the values from the Linux > timekeeper even if kvmclock is not enabled. > > Signed-off-by: Paolo Bonzini <pbonzini@xxxxxxxxxx> > --- > arch/x86/kvm/x86.c | 109 +++++++++++++++++++++++++++++------------------------ > 1 file changed, 59 insertions(+), 50 deletions(-) > > diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c > index 19f9f9e05c2a..65974dd0565f 100644 > --- a/arch/x86/kvm/x86.c > +++ b/arch/x86/kvm/x86.c > @@ -1716,6 +1716,60 @@ static void kvm_gen_update_masterclock(struct kvm *kvm) > #endif > } > > +static void kvm_setup_pvclock_page(struct kvm_vcpu *v) > +{ > + struct kvm_vcpu_arch *vcpu = &v->arch; > + struct pvclock_vcpu_time_info guest_hv_clock; > + > + if (unlikely(kvm_read_guest_cached(v->kvm, &vcpu->pv_time, > + &guest_hv_clock, sizeof(guest_hv_clock)))) > + return; > + > + /* This VCPU is paused, but it's legal for a guest to read another > + * VCPU's kvmclock, so we really have to follow the specification where > + * it says that version is odd if data is being modified, and even after > + * it is consistent. > + * > + * Version field updates must be kept separate. This is because > + * kvm_write_guest_cached might use a "rep movs" instruction, and > + * writes within a string instruction are weakly ordered. So there > + * are three writes overall. > + * > + * As a small optimization, only write the version field in the first > + * and third write. The vcpu->pv_time cache is still valid, because the > + * version field is the first in the struct. > + */ > + BUILD_BUG_ON(offsetof(struct pvclock_vcpu_time_info, version) != 0); > + > + vcpu->hv_clock.version = guest_hv_clock.version + 1; > + kvm_write_guest_cached(v->kvm, &vcpu->pv_time, > + &vcpu->hv_clock, > + sizeof(vcpu->hv_clock.version)); > + > + smp_wmb(); > + > + /* retain PVCLOCK_GUEST_STOPPED if set in guest copy */ > + vcpu->hv_clock.flags |= (guest_hv_clock.flags & PVCLOCK_GUEST_STOPPED); > + > + if (vcpu->pvclock_set_guest_stopped_request) { > + vcpu->hv_clock.flags |= PVCLOCK_GUEST_STOPPED; > + vcpu->pvclock_set_guest_stopped_request = false; > + } > + > + trace_kvm_pvclock_update(v->vcpu_id, &vcpu->hv_clock); > + > + kvm_write_guest_cached(v->kvm, &vcpu->pv_time, > + &vcpu->hv_clock, > + sizeof(vcpu->hv_clock)); > + > + smp_wmb(); > + > + vcpu->hv_clock.version++; > + kvm_write_guest_cached(v->kvm, &vcpu->pv_time, > + &vcpu->hv_clock, > + sizeof(vcpu->hv_clock.version)); > +} > + > static int kvm_guest_time_update(struct kvm_vcpu *v) > { > unsigned long flags, tgt_tsc_khz; > @@ -1723,7 +1777,6 @@ static int kvm_guest_time_update(struct kvm_vcpu *v) > struct kvm_arch *ka = &v->kvm->arch; > s64 kernel_ns; > u64 tsc_timestamp, host_tsc; > - struct pvclock_vcpu_time_info guest_hv_clock; > u8 pvclock_flags; > bool use_master_clock; > > @@ -1777,8 +1830,7 @@ static int kvm_guest_time_update(struct kvm_vcpu *v) > > local_irq_restore(flags); > > - if (!vcpu->pv_time_enabled) > - return 0; Strictly speaking, you only need .hv_clock updated if either kvmclock or tsc_ref_page is enabled, so you may want to still skip the calculations otherwise. > + /* With all the info we got, fill in the values */ > > if (kvm_has_tsc_control) > tgt_tsc_khz = kvm_scale_tsc(v, tgt_tsc_khz); > @@ -1790,64 +1842,21 @@ static int kvm_guest_time_update(struct kvm_vcpu *v) > vcpu->hw_tsc_khz = tgt_tsc_khz; > } > > - /* With all the info we got, fill in the values */ > vcpu->hv_clock.tsc_timestamp = tsc_timestamp; > vcpu->hv_clock.system_time = kernel_ns + v->kvm->arch.kvmclock_offset; > vcpu->last_guest_tsc = tsc_timestamp; > > - if (unlikely(kvm_read_guest_cached(v->kvm, &vcpu->pv_time, > - &guest_hv_clock, sizeof(guest_hv_clock)))) > - return 0; > - > - /* This VCPU is paused, but it's legal for a guest to read another > - * VCPU's kvmclock, so we really have to follow the specification where > - * it says that version is odd if data is being modified, and even after > - * it is consistent. > - * > - * Version field updates must be kept separate. This is because > - * kvm_write_guest_cached might use a "rep movs" instruction, and > - * writes within a string instruction are weakly ordered. So there > - * are three writes overall. > - * > - * As a small optimization, only write the version field in the first > - * and third write. The vcpu->pv_time cache is still valid, because the > - * version field is the first in the struct. > - */ > - BUILD_BUG_ON(offsetof(struct pvclock_vcpu_time_info, version) != 0); > - > - vcpu->hv_clock.version = guest_hv_clock.version + 1; > - kvm_write_guest_cached(v->kvm, &vcpu->pv_time, > - &vcpu->hv_clock, > - sizeof(vcpu->hv_clock.version)); > - > - smp_wmb(); > - > - /* retain PVCLOCK_GUEST_STOPPED if set in guest copy */ > - pvclock_flags = (guest_hv_clock.flags & PVCLOCK_GUEST_STOPPED); > - > - if (vcpu->pvclock_set_guest_stopped_request) { > - pvclock_flags |= PVCLOCK_GUEST_STOPPED; > - vcpu->pvclock_set_guest_stopped_request = false; > - } > - > /* If the host uses TSC clocksource, then it is stable */ > + pvclock_flags = 0; > if (use_master_clock) > pvclock_flags |= PVCLOCK_TSC_STABLE_BIT; > > vcpu->hv_clock.flags = pvclock_flags; A nitpick: the above five lines can be collapsed into two: vcpu->hv_clock.flags = use_master_clock ? PVCLOCK_TSC_STABLE_BIT : 0; and pvclock_flags can be killed. > > - trace_kvm_pvclock_update(v->vcpu_id, &vcpu->hv_clock); > - > - kvm_write_guest_cached(v->kvm, &vcpu->pv_time, > - &vcpu->hv_clock, > - sizeof(vcpu->hv_clock)); > - > - smp_wmb(); > + if (!vcpu->pv_time_enabled) > + return 0; > > - vcpu->hv_clock.version++; > - kvm_write_guest_cached(v->kvm, &vcpu->pv_time, > - &vcpu->hv_clock, > - sizeof(vcpu->hv_clock.version)); > + kvm_setup_pvclock_page(v); > return 0; > } > > -- > 1.8.3.1 > > -- To unsubscribe from this list: send the line "unsubscribe kvm" in the body of a message to majordomo@xxxxxxxxxxxxxxx More majordomo info at http://vger.kernel.org/majordomo-info.html