Re: [PATCH v1] kvm/x86: Hyper-V tsc page setup

"Denis V. Lunev" <den@xxxxxxxxxx> · Tue, 19 Jan 2016 10:48:11 +0300

On 12/24/2015 12:33 PM, Andrey Smetanin wrote:
Lately tsc page was implemented but filled with empty
values. This patch setup tsc page scale and offset based
on vcpu tsc, tsc_khz and  HV_X64_MSR_TIME_REF_COUNT value.

The valid tsc page drops HV_X64_MSR_TIME_REF_COUNT msr
reads count to zero which potentially improves performance.

The patch applies on top of
'kvm: Make vcpu->requests as 64 bit bitmap'
previously sent.

Signed-off-by: Andrey Smetanin <asmetanin@xxxxxxxxxxxxx>
CC: Paolo Bonzini <pbonzini@xxxxxxxxxx>
CC: Gleb Natapov <gleb@xxxxxxxxxx>
CC: Roman Kagan <rkagan@xxxxxxxxxxxxx>
CC: Denis V. Lunev <den@xxxxxxxxxx>
CC: qemu-devel@xxxxxxxxxx

---
  arch/x86/kvm/hyperv.c    | 117 +++++++++++++++++++++++++++++++++++++++++------
  arch/x86/kvm/hyperv.h    |   2 +
  arch/x86/kvm/x86.c       |  12 +++++
  include/linux/kvm_host.h |   1 +
  4 files changed, 117 insertions(+), 15 deletions(-)

diff --git a/arch/x86/kvm/hyperv.c b/arch/x86/kvm/hyperv.c
index d50675a..504fdc7 100644
--- a/arch/x86/kvm/hyperv.c
+++ b/arch/x86/kvm/hyperv.c
@@ -753,6 +753,105 @@ static int kvm_hv_msr_set_crash_data(struct kvm_vcpu *vcpu,
  	return 0;
  }
  
+static u64 calc_tsc_page_scale(u32 tsc_khz)
+{
+	/*
+	 * reftime (in 100ns) = tsc * tsc_scale / 2^64 + tsc_offset
+	 * so reftime_delta = (tsc_delta * tsc_scale) / 2^64
+	 * so tsc_scale = (2^64 * reftime_delta)/tsc_delta
+	 * so tsc_scale = (2^64 * 10 * 10^6) / tsc_hz = (2^64 * 10000) / tsc_khz
+	 * so tsc_scale = (2^63 * 2 * 10000) / tsc_khz
+	 */
+	return mul_u64_u32_div(1ULL << 63, 2 * 10000, tsc_khz);
+}
+
+static int write_tsc_page(struct kvm *kvm, u64 gfn,
+			  PHV_REFERENCE_TSC_PAGE tsc_ref)
+{
+	if (kvm_write_guest(kvm, gfn_to_gpa(gfn),
+			    tsc_ref, sizeof(*tsc_ref)))
+		return 1;
+	mark_page_dirty(kvm, gfn);
+	return 0;
+}
+
+static int read_tsc_page(struct kvm *kvm, u64 gfn,
+			 PHV_REFERENCE_TSC_PAGE tsc_ref)
+{
+	if (kvm_read_guest(kvm, gfn_to_gpa(gfn),
+			   tsc_ref, sizeof(*tsc_ref)))
+		return 1;
+	return 0;
+}
+
+static u64 calc_tsc_page_time(struct kvm_vcpu *vcpu,
+			      PHV_REFERENCE_TSC_PAGE tsc_ref)
+{
+
+	u64 tsc = kvm_read_l1_tsc(vcpu, rdtsc());
+
+	return mul_u64_u64_shr(tsc, tsc_ref->tsc_scale, 64)
+		+ tsc_ref->tsc_offset;
+}
+
+static int setup_blank_tsc_page(struct kvm_vcpu *vcpu, u64 gfn)
+{
+	HV_REFERENCE_TSC_PAGE tsc_ref;
+
+	memset(&tsc_ref, 0, sizeof(tsc_ref));
+	return write_tsc_page(vcpu->kvm, gfn, &tsc_ref);
+}
+
+int kvm_hv_setup_tsc_page(struct kvm_vcpu *vcpu)
+{
+	struct kvm *kvm = vcpu->kvm;
+	struct kvm_hv *hv = &kvm->arch.hyperv;
+	HV_REFERENCE_TSC_PAGE tsc_ref;
+	u32 tsc_khz;
+	int r;
+	u64 gfn, ref_time, tsc_scale, tsc_offset, tsc;
+
+	if (WARN_ON_ONCE(!(hv->hv_tsc_page & HV_X64_MSR_TSC_REFERENCE_ENABLE)))
+		return -EINVAL;
+
+	gfn = hv->hv_tsc_page >> HV_X64_MSR_TSC_REFERENCE_ADDRESS_SHIFT;
+	vcpu_debug(vcpu, "tsc page gfn 0x%llx\n", gfn);
+
+	tsc_khz = vcpu->arch.virtual_tsc_khz;
+	if (!tsc_khz) {
+		vcpu_unimpl(vcpu, "no tsc khz\n");
+		return setup_blank_tsc_page(vcpu, gfn);
+	}
+
+	r = read_tsc_page(kvm, gfn, &tsc_ref);
+	if (r) {
+		vcpu_err(vcpu, "can't access tsc page gfn 0x%llx\n", gfn);
+		return r;
+	}
+
+	tsc_scale = calc_tsc_page_scale(tsc_khz);
+	ref_time = get_time_ref_counter(kvm);
+	tsc = kvm_read_l1_tsc(vcpu, rdtsc());
+
+	/* tsc_offset = reftime - tsc * tsc_scale / 2^64 */
+	tsc_offset = ref_time - mul_u64_u64_shr(tsc, tsc_scale, 64);
+	vcpu_debug(vcpu, "tsc khz %u tsc %llu scale %llu offset %llu\n",
+		   tsc_khz, tsc, tsc_scale, tsc_offset);
+
+	tsc_ref.tsc_sequence++;
+	if (tsc_ref.tsc_sequence == 0)
+		tsc_ref.tsc_sequence = 1;
+
+	tsc_ref.tsc_scale = tsc_scale;
+	tsc_ref.tsc_offset = tsc_offset;
+
+	vcpu_debug(vcpu, "tsc page calibration time %llu vs. reftime %llu\n",
+		   calc_tsc_page_time(vcpu, &tsc_ref),
+		   get_time_ref_counter(kvm));
+
+	return write_tsc_page(kvm, gfn, &tsc_ref);
+}
+
  static int kvm_hv_set_msr_pw(struct kvm_vcpu *vcpu, u32 msr, u64 data,
  			     bool host)
  {
@@ -790,23 +889,11 @@ static int kvm_hv_set_msr_pw(struct kvm_vcpu *vcpu, u32 msr, u64 data,
  		mark_page_dirty(kvm, gfn);
  		break;
  	}
-	case HV_X64_MSR_REFERENCE_TSC: {
-		u64 gfn;
-		HV_REFERENCE_TSC_PAGE tsc_ref;
-
-		memset(&tsc_ref, 0, sizeof(tsc_ref));
+	case HV_X64_MSR_REFERENCE_TSC:
  		hv->hv_tsc_page = data;
-		if (!(data & HV_X64_MSR_TSC_REFERENCE_ENABLE))
-			break;
-		gfn = data >> HV_X64_MSR_TSC_REFERENCE_ADDRESS_SHIFT;
-		if (kvm_write_guest(
-				kvm,
-				gfn << HV_X64_MSR_TSC_REFERENCE_ADDRESS_SHIFT,
-				&tsc_ref, sizeof(tsc_ref)))
-			return 1;
-		mark_page_dirty(kvm, gfn);
+		if (hv->hv_tsc_page & HV_X64_MSR_TSC_REFERENCE_ENABLE)
+			kvm_make_request(KVM_REQ_HV_TSC_PAGE, vcpu);
  		break;
-	}
  	case HV_X64_MSR_CRASH_P0 ... HV_X64_MSR_CRASH_P4:
  		return kvm_hv_msr_set_crash_data(vcpu,
  						 msr - HV_X64_MSR_CRASH_P0,
diff --git a/arch/x86/kvm/hyperv.h b/arch/x86/kvm/hyperv.h
index 60eccd4..e7cc446 100644
--- a/arch/x86/kvm/hyperv.h
+++ b/arch/x86/kvm/hyperv.h
@@ -84,4 +84,6 @@ static inline bool kvm_hv_has_stimer_pending(struct kvm_vcpu *vcpu)
  
  void kvm_hv_process_stimers(struct kvm_vcpu *vcpu);
  
+int kvm_hv_setup_tsc_page(struct kvm_vcpu *vcpu);
+
  #endif
diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
index aedb1a0..1d821f6 100644
--- a/arch/x86/kvm/x86.c
+++ b/arch/x86/kvm/x86.c
@@ -6500,6 +6500,18 @@ static int vcpu_enter_guest(struct kvm_vcpu *vcpu)
  		 */
  		if (kvm_check_request(KVM_REQ_HV_STIMER, vcpu))
  			kvm_hv_process_stimers(vcpu);
+
+		/*
+		 * KVM_REQ_HV_TSC_PAGE setup should be after
+		 * KVM_REQ_CLOCK_UPDATE processing because
+		 * Hyper-V tsc page content depends
+		 * on kvm->arch.kvmclock_offset value.
+		 */
+		if (kvm_check_request(KVM_REQ_HV_TSC_PAGE, vcpu)) {
+			r = kvm_hv_setup_tsc_page(vcpu);
+			if (unlikely(r))
+				goto out;
+		}
  	}
  
  	/*
diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h
index 6877b4d7e..93c9e25 100644
--- a/include/linux/kvm_host.h
+++ b/include/linux/kvm_host.h
@@ -145,6 +145,7 @@ static inline bool is_error_page(struct page *page)
  #define KVM_REQ_HV_RESET          29
  #define KVM_REQ_HV_EXIT           30
  #define KVM_REQ_HV_STIMER         31
+#define KVM_REQ_HV_TSC_PAGE       32
  
  #define KVM_REQ_MAX               64
  
ping
--
To unsubscribe from this list: send the line "unsubscribe kvm" in
the body of a message to majordomo@xxxxxxxxxxxxxxx
More majordomo info at  http://vger.kernel.org/majordomo-info.html