Lately tsc page was implemented but filled with empty values. This patch setup tsc page scale and offset based on vcpu tsc, tsc_khz and HV_X64_MSR_TIME_REF_COUNT value. The valid tsc page drops HV_X64_MSR_TIME_REF_COUNT msr reads count to zero which potentially improves performance. The patch applies on top of 'kvm: Make vcpu->requests as 64 bit bitmap' previously sent. Signed-off-by: Andrey Smetanin <asmetanin@xxxxxxxxxxxxx> CC: Paolo Bonzini <pbonzini@xxxxxxxxxx> CC: Gleb Natapov <gleb@xxxxxxxxxx> CC: Roman Kagan <rkagan@xxxxxxxxxxxxx> CC: Denis V. Lunev <den@xxxxxxxxxx> CC: qemu-devel@xxxxxxxxxx --- arch/x86/kvm/hyperv.c | 117 +++++++++++++++++++++++++++++++++++++++++------ arch/x86/kvm/hyperv.h | 2 + arch/x86/kvm/x86.c | 12 +++++ include/linux/kvm_host.h | 1 + 4 files changed, 117 insertions(+), 15 deletions(-) diff --git a/arch/x86/kvm/hyperv.c b/arch/x86/kvm/hyperv.c index d50675a..504fdc7 100644 --- a/arch/x86/kvm/hyperv.c +++ b/arch/x86/kvm/hyperv.c @@ -753,6 +753,105 @@ static int kvm_hv_msr_set_crash_data(struct kvm_vcpu *vcpu, return 0; } +static u64 calc_tsc_page_scale(u32 tsc_khz) +{ + /* + * reftime (in 100ns) = tsc * tsc_scale / 2^64 + tsc_offset + * so reftime_delta = (tsc_delta * tsc_scale) / 2^64 + * so tsc_scale = (2^64 * reftime_delta)/tsc_delta + * so tsc_scale = (2^64 * 10 * 10^6) / tsc_hz = (2^64 * 10000) / tsc_khz + * so tsc_scale = (2^63 * 2 * 10000) / tsc_khz + */ + return mul_u64_u32_div(1ULL << 63, 2 * 10000, tsc_khz); +} + +static int write_tsc_page(struct kvm *kvm, u64 gfn, + PHV_REFERENCE_TSC_PAGE tsc_ref) +{ + if (kvm_write_guest(kvm, gfn_to_gpa(gfn), + tsc_ref, sizeof(*tsc_ref))) + return 1; + mark_page_dirty(kvm, gfn); + return 0; +} + +static int read_tsc_page(struct kvm *kvm, u64 gfn, + PHV_REFERENCE_TSC_PAGE tsc_ref) +{ + if (kvm_read_guest(kvm, gfn_to_gpa(gfn), + tsc_ref, sizeof(*tsc_ref))) + return 1; + return 0; +} + +static u64 calc_tsc_page_time(struct kvm_vcpu *vcpu, + PHV_REFERENCE_TSC_PAGE tsc_ref) +{ + + u64 tsc = kvm_read_l1_tsc(vcpu, rdtsc()); + + return mul_u64_u64_shr(tsc, tsc_ref->tsc_scale, 64) + + tsc_ref->tsc_offset; +} + +static int setup_blank_tsc_page(struct kvm_vcpu *vcpu, u64 gfn) +{ + HV_REFERENCE_TSC_PAGE tsc_ref; + + memset(&tsc_ref, 0, sizeof(tsc_ref)); + return write_tsc_page(vcpu->kvm, gfn, &tsc_ref); +} + +int kvm_hv_setup_tsc_page(struct kvm_vcpu *vcpu) +{ + struct kvm *kvm = vcpu->kvm; + struct kvm_hv *hv = &kvm->arch.hyperv; + HV_REFERENCE_TSC_PAGE tsc_ref; + u32 tsc_khz; + int r; + u64 gfn, ref_time, tsc_scale, tsc_offset, tsc; + + if (WARN_ON_ONCE(!(hv->hv_tsc_page & HV_X64_MSR_TSC_REFERENCE_ENABLE))) + return -EINVAL; + + gfn = hv->hv_tsc_page >> HV_X64_MSR_TSC_REFERENCE_ADDRESS_SHIFT; + vcpu_debug(vcpu, "tsc page gfn 0x%llx\n", gfn); + + tsc_khz = vcpu->arch.virtual_tsc_khz; + if (!tsc_khz) { + vcpu_unimpl(vcpu, "no tsc khz\n"); + return setup_blank_tsc_page(vcpu, gfn); + } + + r = read_tsc_page(kvm, gfn, &tsc_ref); + if (r) { + vcpu_err(vcpu, "can't access tsc page gfn 0x%llx\n", gfn); + return r; + } + + tsc_scale = calc_tsc_page_scale(tsc_khz); + ref_time = get_time_ref_counter(kvm); + tsc = kvm_read_l1_tsc(vcpu, rdtsc()); + + /* tsc_offset = reftime - tsc * tsc_scale / 2^64 */ + tsc_offset = ref_time - mul_u64_u64_shr(tsc, tsc_scale, 64); + vcpu_debug(vcpu, "tsc khz %u tsc %llu scale %llu offset %llu\n", + tsc_khz, tsc, tsc_scale, tsc_offset); + + tsc_ref.tsc_sequence++; + if (tsc_ref.tsc_sequence == 0) + tsc_ref.tsc_sequence = 1; + + tsc_ref.tsc_scale = tsc_scale; + tsc_ref.tsc_offset = tsc_offset; + + vcpu_debug(vcpu, "tsc page calibration time %llu vs. reftime %llu\n", + calc_tsc_page_time(vcpu, &tsc_ref), + get_time_ref_counter(kvm)); + + return write_tsc_page(kvm, gfn, &tsc_ref); +} + static int kvm_hv_set_msr_pw(struct kvm_vcpu *vcpu, u32 msr, u64 data, bool host) { @@ -790,23 +889,11 @@ static int kvm_hv_set_msr_pw(struct kvm_vcpu *vcpu, u32 msr, u64 data, mark_page_dirty(kvm, gfn); break; } - case HV_X64_MSR_REFERENCE_TSC: { - u64 gfn; - HV_REFERENCE_TSC_PAGE tsc_ref; - - memset(&tsc_ref, 0, sizeof(tsc_ref)); + case HV_X64_MSR_REFERENCE_TSC: hv->hv_tsc_page = data; - if (!(data & HV_X64_MSR_TSC_REFERENCE_ENABLE)) - break; - gfn = data >> HV_X64_MSR_TSC_REFERENCE_ADDRESS_SHIFT; - if (kvm_write_guest( - kvm, - gfn << HV_X64_MSR_TSC_REFERENCE_ADDRESS_SHIFT, - &tsc_ref, sizeof(tsc_ref))) - return 1; - mark_page_dirty(kvm, gfn); + if (hv->hv_tsc_page & HV_X64_MSR_TSC_REFERENCE_ENABLE) + kvm_make_request(KVM_REQ_HV_TSC_PAGE, vcpu); break; - } case HV_X64_MSR_CRASH_P0 ... HV_X64_MSR_CRASH_P4: return kvm_hv_msr_set_crash_data(vcpu, msr - HV_X64_MSR_CRASH_P0, diff --git a/arch/x86/kvm/hyperv.h b/arch/x86/kvm/hyperv.h index 60eccd4..e7cc446 100644 --- a/arch/x86/kvm/hyperv.h +++ b/arch/x86/kvm/hyperv.h @@ -84,4 +84,6 @@ static inline bool kvm_hv_has_stimer_pending(struct kvm_vcpu *vcpu) void kvm_hv_process_stimers(struct kvm_vcpu *vcpu); +int kvm_hv_setup_tsc_page(struct kvm_vcpu *vcpu); + #endif diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c index aedb1a0..1d821f6 100644 --- a/arch/x86/kvm/x86.c +++ b/arch/x86/kvm/x86.c @@ -6500,6 +6500,18 @@ static int vcpu_enter_guest(struct kvm_vcpu *vcpu) */ if (kvm_check_request(KVM_REQ_HV_STIMER, vcpu)) kvm_hv_process_stimers(vcpu); + + /* + * KVM_REQ_HV_TSC_PAGE setup should be after + * KVM_REQ_CLOCK_UPDATE processing because + * Hyper-V tsc page content depends + * on kvm->arch.kvmclock_offset value. + */ + if (kvm_check_request(KVM_REQ_HV_TSC_PAGE, vcpu)) { + r = kvm_hv_setup_tsc_page(vcpu); + if (unlikely(r)) + goto out; + } } /* diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h index 6877b4d7e..93c9e25 100644 --- a/include/linux/kvm_host.h +++ b/include/linux/kvm_host.h @@ -145,6 +145,7 @@ static inline bool is_error_page(struct page *page) #define KVM_REQ_HV_RESET 29 #define KVM_REQ_HV_EXIT 30 #define KVM_REQ_HV_STIMER 31 +#define KVM_REQ_HV_TSC_PAGE 32 #define KVM_REQ_MAX 64 -- 2.4.3 -- To unsubscribe from this list: send the line "unsubscribe kvm" in the body of a message to majordomo@xxxxxxxxxxxxxxx More majordomo info at http://vger.kernel.org/majordomo-info.html