Re: [PATCH v1] kvm/x86: Hyper-V tsc page setup

Paolo Bonzini <pbonzini@xxxxxxxxxx> · Fri, 22 Jan 2016 11:08:15 +0100

On 24/12/2015 10:33, Andrey Smetanin wrote:
> Lately tsc page was implemented but filled with empty
> values. This patch setup tsc page scale and offset based
> on vcpu tsc, tsc_khz and  HV_X64_MSR_TIME_REF_COUNT value.
> 
> The valid tsc page drops HV_X64_MSR_TIME_REF_COUNT msr
> reads count to zero which potentially improves performance.
> 
> The patch applies on top of
> 'kvm: Make vcpu->requests as 64 bit bitmap'
> previously sent.
> 
> Signed-off-by: Andrey Smetanin <asmetanin@xxxxxxxxxxxxx>
> CC: Paolo Bonzini <pbonzini@xxxxxxxxxx>
> CC: Gleb Natapov <gleb@xxxxxxxxxx>
> CC: Roman Kagan <rkagan@xxxxxxxxxxxxx>
> CC: Denis V. Lunev <den@xxxxxxxxxx>
> CC: qemu-devel@xxxxxxxxxx

Actually there are some more issues:

- unless KVM can use a master clock, it is incorrect to set up the TSC
page this way; the sequence needs to be 0xFFFFFFFF in that case

- writing the TSC page must be done while all VCPUs are stopped, because
the TSC page doesn't provide the possibility for the guest to retry in
the middle of an update (like seqcount in Linux doess)

In the end, the TSC page is actually pretty similar to the kvmclock
master clock and it makes sense to build it on the master clock too.
I'll post a patch next week.

Paolo

> ---
>  arch/x86/kvm/hyperv.c    | 117 +++++++++++++++++++++++++++++++++++++++++------
>  arch/x86/kvm/hyperv.h    |   2 +
>  arch/x86/kvm/x86.c       |  12 +++++
>  include/linux/kvm_host.h |   1 +
>  4 files changed, 117 insertions(+), 15 deletions(-)
> 
> diff --git a/arch/x86/kvm/hyperv.c b/arch/x86/kvm/hyperv.c
> index d50675a..504fdc7 100644
> --- a/arch/x86/kvm/hyperv.c
> +++ b/arch/x86/kvm/hyperv.c
> @@ -753,6 +753,105 @@ static int kvm_hv_msr_set_crash_data(struct kvm_vcpu *vcpu,
>  	return 0;
>  }
>  
> +static u64 calc_tsc_page_scale(u32 tsc_khz)
> +{
> +	/*
> +	 * reftime (in 100ns) = tsc * tsc_scale / 2^64 + tsc_offset
> +	 * so reftime_delta = (tsc_delta * tsc_scale) / 2^64
> +	 * so tsc_scale = (2^64 * reftime_delta)/tsc_delta
> +	 * so tsc_scale = (2^64 * 10 * 10^6) / tsc_hz = (2^64 * 10000) / tsc_khz
> +	 * so tsc_scale = (2^63 * 2 * 10000) / tsc_khz
> +	 */
> +	return mul_u64_u32_div(1ULL << 63, 2 * 10000, tsc_khz);
> +}
> +
> +static int write_tsc_page(struct kvm *kvm, u64 gfn,
> +			  PHV_REFERENCE_TSC_PAGE tsc_ref)
> +{
> +	if (kvm_write_guest(kvm, gfn_to_gpa(gfn),
> +			    tsc_ref, sizeof(*tsc_ref)))
> +		return 1;
> +	mark_page_dirty(kvm, gfn);
> +	return 0;
> +}
> +
> +static int read_tsc_page(struct kvm *kvm, u64 gfn,
> +			 PHV_REFERENCE_TSC_PAGE tsc_ref)
> +{
> +	if (kvm_read_guest(kvm, gfn_to_gpa(gfn),
> +			   tsc_ref, sizeof(*tsc_ref)))
> +		return 1;
> +	return 0;
> +}
> +
> +static u64 calc_tsc_page_time(struct kvm_vcpu *vcpu,
> +			      PHV_REFERENCE_TSC_PAGE tsc_ref)
> +{
> +
> +	u64 tsc = kvm_read_l1_tsc(vcpu, rdtsc());
> +
> +	return mul_u64_u64_shr(tsc, tsc_ref->tsc_scale, 64)
> +		+ tsc_ref->tsc_offset;
> +}
> +
> +static int setup_blank_tsc_page(struct kvm_vcpu *vcpu, u64 gfn)
> +{
> +	HV_REFERENCE_TSC_PAGE tsc_ref;
> +
> +	memset(&tsc_ref, 0, sizeof(tsc_ref));
> +	return write_tsc_page(vcpu->kvm, gfn, &tsc_ref);
> +}
> +
> +int kvm_hv_setup_tsc_page(struct kvm_vcpu *vcpu)
> +{
> +	struct kvm *kvm = vcpu->kvm;
> +	struct kvm_hv *hv = &kvm->arch.hyperv;
> +	HV_REFERENCE_TSC_PAGE tsc_ref;
> +	u32 tsc_khz;
> +	int r;
> +	u64 gfn, ref_time, tsc_scale, tsc_offset, tsc;
> +
> +	if (WARN_ON_ONCE(!(hv->hv_tsc_page & HV_X64_MSR_TSC_REFERENCE_ENABLE)))
> +		return -EINVAL;
> +
> +	gfn = hv->hv_tsc_page >> HV_X64_MSR_TSC_REFERENCE_ADDRESS_SHIFT;
> +	vcpu_debug(vcpu, "tsc page gfn 0x%llx\n", gfn);
> +
> +	tsc_khz = vcpu->arch.virtual_tsc_khz;
> +	if (!tsc_khz) {
> +		vcpu_unimpl(vcpu, "no tsc khz\n");
> +		return setup_blank_tsc_page(vcpu, gfn);
> +	}
> +
> +	r = read_tsc_page(kvm, gfn, &tsc_ref);
> +	if (r) {
> +		vcpu_err(vcpu, "can't access tsc page gfn 0x%llx\n", gfn);
> +		return r;
> +	}
> +
> +	tsc_scale = calc_tsc_page_scale(tsc_khz);
> +	ref_time = get_time_ref_counter(kvm);
> +	tsc = kvm_read_l1_tsc(vcpu, rdtsc());
> +
> +	/* tsc_offset = reftime - tsc * tsc_scale / 2^64 */
> +	tsc_offset = ref_time - mul_u64_u64_shr(tsc, tsc_scale, 64);
> +	vcpu_debug(vcpu, "tsc khz %u tsc %llu scale %llu offset %llu\n",
> +		   tsc_khz, tsc, tsc_scale, tsc_offset);
> +
> +	tsc_ref.tsc_sequence++;
> +	if (tsc_ref.tsc_sequence == 0)
> +		tsc_ref.tsc_sequence = 1;
> +
> +	tsc_ref.tsc_scale = tsc_scale;
> +	tsc_ref.tsc_offset = tsc_offset;
> +
> +	vcpu_debug(vcpu, "tsc page calibration time %llu vs. reftime %llu\n",
> +		   calc_tsc_page_time(vcpu, &tsc_ref),
> +		   get_time_ref_counter(kvm));
> +
> +	return write_tsc_page(kvm, gfn, &tsc_ref);
> +}
> +
>  static int kvm_hv_set_msr_pw(struct kvm_vcpu *vcpu, u32 msr, u64 data,
>  			     bool host)
>  {
> @@ -790,23 +889,11 @@ static int kvm_hv_set_msr_pw(struct kvm_vcpu *vcpu, u32 msr, u64 data,
>  		mark_page_dirty(kvm, gfn);
>  		break;
>  	}
> -	case HV_X64_MSR_REFERENCE_TSC: {
> -		u64 gfn;
> -		HV_REFERENCE_TSC_PAGE tsc_ref;
> -
> -		memset(&tsc_ref, 0, sizeof(tsc_ref));
> +	case HV_X64_MSR_REFERENCE_TSC:
>  		hv->hv_tsc_page = data;
> -		if (!(data & HV_X64_MSR_TSC_REFERENCE_ENABLE))
> -			break;
> -		gfn = data >> HV_X64_MSR_TSC_REFERENCE_ADDRESS_SHIFT;
> -		if (kvm_write_guest(
> -				kvm,
> -				gfn << HV_X64_MSR_TSC_REFERENCE_ADDRESS_SHIFT,
> -				&tsc_ref, sizeof(tsc_ref)))
> -			return 1;
> -		mark_page_dirty(kvm, gfn);
> +		if (hv->hv_tsc_page & HV_X64_MSR_TSC_REFERENCE_ENABLE)
> +			kvm_make_request(KVM_REQ_HV_TSC_PAGE, vcpu);
>  		break;
> -	}
>  	case HV_X64_MSR_CRASH_P0 ... HV_X64_MSR_CRASH_P4:
>  		return kvm_hv_msr_set_crash_data(vcpu,
>  						 msr - HV_X64_MSR_CRASH_P0,
> diff --git a/arch/x86/kvm/hyperv.h b/arch/x86/kvm/hyperv.h
> index 60eccd4..e7cc446 100644
> --- a/arch/x86/kvm/hyperv.h
> +++ b/arch/x86/kvm/hyperv.h
> @@ -84,4 +84,6 @@ static inline bool kvm_hv_has_stimer_pending(struct kvm_vcpu *vcpu)
>  
>  void kvm_hv_process_stimers(struct kvm_vcpu *vcpu);
>  
> +int kvm_hv_setup_tsc_page(struct kvm_vcpu *vcpu);
> +
>  #endif
> diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
> index aedb1a0..1d821f6 100644
> --- a/arch/x86/kvm/x86.c
> +++ b/arch/x86/kvm/x86.c
> @@ -6500,6 +6500,18 @@ static int vcpu_enter_guest(struct kvm_vcpu *vcpu)
>  		 */
>  		if (kvm_check_request(KVM_REQ_HV_STIMER, vcpu))
>  			kvm_hv_process_stimers(vcpu);
> +
> +		/*
> +		 * KVM_REQ_HV_TSC_PAGE setup should be after
> +		 * KVM_REQ_CLOCK_UPDATE processing because
> +		 * Hyper-V tsc page content depends
> +		 * on kvm->arch.kvmclock_offset value.
> +		 */
> +		if (kvm_check_request(KVM_REQ_HV_TSC_PAGE, vcpu)) {
> +			r = kvm_hv_setup_tsc_page(vcpu);
> +			if (unlikely(r))
> +				goto out;
> +		}
>  	}
>  
>  	/*
> diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h
> index 6877b4d7e..93c9e25 100644
> --- a/include/linux/kvm_host.h
> +++ b/include/linux/kvm_host.h
> @@ -145,6 +145,7 @@ static inline bool is_error_page(struct page *page)
>  #define KVM_REQ_HV_RESET          29
>  #define KVM_REQ_HV_EXIT           30
>  #define KVM_REQ_HV_STIMER         31
> +#define KVM_REQ_HV_TSC_PAGE       32
>  
>  #define KVM_REQ_MAX               64
>  
> 
--
To unsubscribe from this list: send the line "unsubscribe kvm" in
the body of a message to majordomo@xxxxxxxxxxxxxxx
More majordomo info at  http://vger.kernel.org/majordomo-info.html