Re: [PATCH 1/4] KVM: x86: always fill in vcpu->arch.hv_clock

Roman Kagan <rkagan@xxxxxxxxxxxxx> · Fri, 2 Sep 2016 16:31:31 +0300

On Thu, Sep 01, 2016 at 05:26:12PM +0200, Paolo Bonzini wrote:
> We will use it in the next patches for KVM_GET_CLOCK and as a basis for the
> contents of the Hyper-V TSC page.  Get the values from the Linux
> timekeeper even if kvmclock is not enabled.
> 
> Signed-off-by: Paolo Bonzini <pbonzini@xxxxxxxxxx>
> ---
>  arch/x86/kvm/x86.c | 109 +++++++++++++++++++++++++++++------------------------
>  1 file changed, 59 insertions(+), 50 deletions(-)
> 
> diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
> index 19f9f9e05c2a..65974dd0565f 100644
> --- a/arch/x86/kvm/x86.c
> +++ b/arch/x86/kvm/x86.c
> @@ -1716,6 +1716,60 @@ static void kvm_gen_update_masterclock(struct kvm *kvm)
>  #endif
>  }
>  
> +static void kvm_setup_pvclock_page(struct kvm_vcpu *v)
> +{
> +	struct kvm_vcpu_arch *vcpu = &v->arch;
> +	struct pvclock_vcpu_time_info guest_hv_clock;
> +
> +	if (unlikely(kvm_read_guest_cached(v->kvm, &vcpu->pv_time,
> +		&guest_hv_clock, sizeof(guest_hv_clock))))
> +		return;
> +
> +	/* This VCPU is paused, but it's legal for a guest to read another
> +	 * VCPU's kvmclock, so we really have to follow the specification where
> +	 * it says that version is odd if data is being modified, and even after
> +	 * it is consistent.
> +	 *
> +	 * Version field updates must be kept separate.  This is because
> +	 * kvm_write_guest_cached might use a "rep movs" instruction, and
> +	 * writes within a string instruction are weakly ordered.  So there
> +	 * are three writes overall.
> +	 *
> +	 * As a small optimization, only write the version field in the first
> +	 * and third write.  The vcpu->pv_time cache is still valid, because the
> +	 * version field is the first in the struct.
> +	 */
> +	BUILD_BUG_ON(offsetof(struct pvclock_vcpu_time_info, version) != 0);
> +
> +	vcpu->hv_clock.version = guest_hv_clock.version + 1;
> +	kvm_write_guest_cached(v->kvm, &vcpu->pv_time,
> +				&vcpu->hv_clock,
> +				sizeof(vcpu->hv_clock.version));
> +
> +	smp_wmb();
> +
> +	/* retain PVCLOCK_GUEST_STOPPED if set in guest copy */
> +	vcpu->hv_clock.flags |= (guest_hv_clock.flags & PVCLOCK_GUEST_STOPPED);
> +
> +	if (vcpu->pvclock_set_guest_stopped_request) {
> +		vcpu->hv_clock.flags |= PVCLOCK_GUEST_STOPPED;
> +		vcpu->pvclock_set_guest_stopped_request = false;
> +	}
> +
> +	trace_kvm_pvclock_update(v->vcpu_id, &vcpu->hv_clock);
> +
> +	kvm_write_guest_cached(v->kvm, &vcpu->pv_time,
> +				&vcpu->hv_clock,
> +				sizeof(vcpu->hv_clock));
> +
> +	smp_wmb();
> +
> +	vcpu->hv_clock.version++;
> +	kvm_write_guest_cached(v->kvm, &vcpu->pv_time,
> +				&vcpu->hv_clock,
> +				sizeof(vcpu->hv_clock.version));
> +}
> +
>  static int kvm_guest_time_update(struct kvm_vcpu *v)
>  {
>  	unsigned long flags, tgt_tsc_khz;
> @@ -1723,7 +1777,6 @@ static int kvm_guest_time_update(struct kvm_vcpu *v)
>  	struct kvm_arch *ka = &v->kvm->arch;
>  	s64 kernel_ns;
>  	u64 tsc_timestamp, host_tsc;
> -	struct pvclock_vcpu_time_info guest_hv_clock;
>  	u8 pvclock_flags;
>  	bool use_master_clock;
>  
> @@ -1777,8 +1830,7 @@ static int kvm_guest_time_update(struct kvm_vcpu *v)
>  
>  	local_irq_restore(flags);
>  
> -	if (!vcpu->pv_time_enabled)
> -		return 0;

Strictly speaking, you only need .hv_clock updated if either kvmclock or
tsc_ref_page is enabled, so you may want to still skip the calculations
otherwise.

> +	/* With all the info we got, fill in the values */
>  
>  	if (kvm_has_tsc_control)
>  		tgt_tsc_khz = kvm_scale_tsc(v, tgt_tsc_khz);
> @@ -1790,64 +1842,21 @@ static int kvm_guest_time_update(struct kvm_vcpu *v)
>  		vcpu->hw_tsc_khz = tgt_tsc_khz;
>  	}
>  
> -	/* With all the info we got, fill in the values */
>  	vcpu->hv_clock.tsc_timestamp = tsc_timestamp;
>  	vcpu->hv_clock.system_time = kernel_ns + v->kvm->arch.kvmclock_offset;
>  	vcpu->last_guest_tsc = tsc_timestamp;
>  
> -	if (unlikely(kvm_read_guest_cached(v->kvm, &vcpu->pv_time,
> -		&guest_hv_clock, sizeof(guest_hv_clock))))
> -		return 0;
> -
> -	/* This VCPU is paused, but it's legal for a guest to read another
> -	 * VCPU's kvmclock, so we really have to follow the specification where
> -	 * it says that version is odd if data is being modified, and even after
> -	 * it is consistent.
> -	 *
> -	 * Version field updates must be kept separate.  This is because
> -	 * kvm_write_guest_cached might use a "rep movs" instruction, and
> -	 * writes within a string instruction are weakly ordered.  So there
> -	 * are three writes overall.
> -	 *
> -	 * As a small optimization, only write the version field in the first
> -	 * and third write.  The vcpu->pv_time cache is still valid, because the
> -	 * version field is the first in the struct.
> -	 */
> -	BUILD_BUG_ON(offsetof(struct pvclock_vcpu_time_info, version) != 0);
> -
> -	vcpu->hv_clock.version = guest_hv_clock.version + 1;
> -	kvm_write_guest_cached(v->kvm, &vcpu->pv_time,
> -				&vcpu->hv_clock,
> -				sizeof(vcpu->hv_clock.version));
> -
> -	smp_wmb();
> -
> -	/* retain PVCLOCK_GUEST_STOPPED if set in guest copy */
> -	pvclock_flags = (guest_hv_clock.flags & PVCLOCK_GUEST_STOPPED);
> -
> -	if (vcpu->pvclock_set_guest_stopped_request) {
> -		pvclock_flags |= PVCLOCK_GUEST_STOPPED;
> -		vcpu->pvclock_set_guest_stopped_request = false;
> -	}
> -
>  	/* If the host uses TSC clocksource, then it is stable */
> +	pvclock_flags = 0;
>  	if (use_master_clock)
>  		pvclock_flags |= PVCLOCK_TSC_STABLE_BIT;
>  
>  	vcpu->hv_clock.flags = pvclock_flags;

A nitpick: the above five lines can be collapsed into two:

	vcpu->hv_clock.flags = use_master_clock ?
			       PVCLOCK_TSC_STABLE_BIT : 0;

and pvclock_flags can be killed.

>  
> -	trace_kvm_pvclock_update(v->vcpu_id, &vcpu->hv_clock);
> -
> -	kvm_write_guest_cached(v->kvm, &vcpu->pv_time,
> -				&vcpu->hv_clock,
> -				sizeof(vcpu->hv_clock));
> -
> -	smp_wmb();
> +	if (!vcpu->pv_time_enabled)
> +		return 0;
>  
> -	vcpu->hv_clock.version++;
> -	kvm_write_guest_cached(v->kvm, &vcpu->pv_time,
> -				&vcpu->hv_clock,
> -				sizeof(vcpu->hv_clock.version));
> +	kvm_setup_pvclock_page(v);
>  	return 0;
>  }
>  
> -- 
> 1.8.3.1
> 
> 
--
To unsubscribe from this list: send the line "unsubscribe kvm" in
the body of a message to majordomo@xxxxxxxxxxxxxxx
More majordomo info at  http://vger.kernel.org/majordomo-info.html