Lately tsc page was implemented but filled with empty values. This patch setup tsc page scale and offset based on vcpu tsc, tsc_khz and HV_X64_MSR_TIME_REF_COUNT value. The valid tsc page drops HV_X64_MSR_TIME_REF_COUNT msr reads count to zero which potentially improves performance. Signed-off-by: Andrey Smetanin <asmetanin@xxxxxxxxxxxxx> Reviewed-by: Peter Hornyack <peterhornyack@xxxxxxxxxx> CC: Paolo Bonzini <pbonzini@xxxxxxxxxx> CC: Radim Krčmář <rkrcmar@xxxxxxxxxx> CC: Roman Kagan <rkagan@xxxxxxxxxxxxx> CC: Denis V. Lunev <den@xxxxxxxxxx> [Computation of TSC page parameters rewritten to use the Linux timekeeper parameters. - Paolo] Signed-off-by: Paolo Bonzini <pbonzini@xxxxxxxxxx> --- arch/x86/kvm/hyperv.c | 141 ++++++++++++++++++++++++++++++++++++++++++++------ arch/x86/kvm/hyperv.h | 3 ++ arch/x86/kvm/x86.c | 8 +-- 3 files changed, 133 insertions(+), 19 deletions(-) diff --git a/arch/x86/kvm/hyperv.c b/arch/x86/kvm/hyperv.c index ed5b77f39ffb..e089d1f52dc0 100644 --- a/arch/x86/kvm/hyperv.c +++ b/arch/x86/kvm/hyperv.c @@ -756,6 +756,129 @@ static int kvm_hv_msr_set_crash_data(struct kvm_vcpu *vcpu, return 0; } +/* + * The kvmclock and Hyper-V TSC page use similar formulas. Because the KVM + * one is more precise, it is a little more complex. However, converting + * between them is possible: + * + * kvmclock formula: + * nsec = (ticks - tsc_timestamp) * tsc_to_system_mul * 2^(tsc_shift-32) + * + system_time + * + * Hyper-V formula: + * nsec/100 = ticks * scale / 2^64 + offset + * + * When tsc_timestamp = system_time = 0, offset is zero in the Hyper-V formula. + * By dividing the kvmclock formula by 100 and equating what's left we get: + * ticks * scale / 2^64 = ticks * tsc_to_system_mul * 2^(tsc_shift-32) / 100 + * scale / 2^64 = tsc_to_system_mul * 2^(tsc_shift-32) / 100 + * scale = tsc_to_system_mul * 2^(32+tsc_shift) / 100 + * + * Now expand the kvmclock formula and divide by 100: + * nsec = ticks * tsc_to_system_mul * 2^(tsc_shift-32) + * - tsc_timestamp * tsc_to_system_mul * 2^(tsc_shift-32) + * + system_time + * nsec/100 = ticks * tsc_to_system_mul * 2^(tsc_shift-32) / 100 + * - tsc_timestamp * tsc_to_system_mul * 2^(tsc_shift-32) / 100 + * + system_time / 100 + * + * Replace tsc_to_system_mul * 2^(tsc_shift-32) / 100 by scale / 2^64: + * nsec/100 = ticks * scale / 2^64 + * - tsc_timestamp * scale / 2^64 + * + system_time / 100 + * + * Equate with the Hyper-V formula so that ticks * scale / 2^64 cancels out: + * offset = system_time / 100 - tsc_timestamp * scale / 2^64 + * + * These two equivalencies are implemented in this function. + */ +static bool compute_tsc_page_parameters(struct pvclock_vcpu_time_info *hv_clock, + HV_REFERENCE_TSC_PAGE *tsc_ref) +{ + u64 max_mul; + + if (!(hv_clock->flags & PVCLOCK_TSC_STABLE_BIT)) + return false; + + /* + * check if scale would overflow, if so we use the time ref counter + * tsc_to_system_mul * 2^(tsc_shift+32) / 100 >= 2^64 + * tsc_to_system_mul / 100 >= 2^(32-tsc_shift) + * tsc_to_system_mul >= 100 * 2^(32-tsc_shift) + */ + max_mul = 100ull << (32 - hv_clock->tsc_shift); + if (hv_clock->tsc_to_system_mul >= max_mul) + return false; + + /* + * Otherwise compute the scale and offset according to the formulas + * derived above. + */ + tsc_ref->tsc_scale = + mul_u64_u32_div(1ULL << (32 + hv_clock->tsc_shift), + hv_clock->tsc_to_system_mul, + 100); + + tsc_ref->tsc_offset = hv_clock->system_time; + do_div(tsc_ref->tsc_offset, 100); + tsc_ref->tsc_offset -= + mul_u64_u64_shr(hv_clock->tsc_timestamp, tsc_ref->tsc_scale, 64); + return true; +} + +void kvm_hv_setup_tsc_page(struct kvm *kvm, + struct pvclock_vcpu_time_info *hv_clock) +{ + struct kvm_hv *hv = &kvm->arch.hyperv; + HV_REFERENCE_TSC_PAGE tsc_ref = { 0 }; + u32 tsc_seq; + u64 gfn; + + BUILD_BUG_ON(sizeof(tsc_seq) != sizeof(tsc_ref.tsc_sequence)); + BUILD_BUG_ON(offsetof(HV_REFERENCE_TSC_PAGE, tsc_sequence) != 0); + + if (!(hv->hv_tsc_page & HV_X64_MSR_TSC_REFERENCE_ENABLE)) + return; + + gfn = hv->hv_tsc_page >> HV_X64_MSR_TSC_REFERENCE_ADDRESS_SHIFT; + /* + * Because the TSC parameters only vary when there is a + * change in the master clock, do not bother with caching. + */ + if (unlikely(kvm_read_guest(kvm, gfn_to_gpa(gfn), + &tsc_seq, sizeof(tsc_seq)))) + return; + + /* + * While we're computing and writing the parameters, force the + * guest to use the time reference count MSR. + */ + if (kvm_write_guest(kvm, gfn_to_gpa(gfn), + &tsc_ref, sizeof(tsc_ref.tsc_sequence))) + return; + + if (!compute_tsc_page_parameters(hv_clock, &tsc_ref)) + return; + + /* Ensure sequence is zero before writing the rest of the struct. */ + smp_wmb(); + if (kvm_write_guest(kvm, gfn_to_gpa(gfn), &tsc_ref, sizeof(tsc_ref))) + return; + + /* + * Now switch to the TSC page mechanism by writing the sequence. + */ + if (tsc_seq == 0xFFFFFFFF || tsc_seq == 0) + tsc_seq = 1; + else + tsc_seq++; + + /* Write the struct entirely before the non-zero sequence. */ + smp_wmb(); + + kvm_write_guest(kvm, gfn_to_gpa(gfn), &tsc_seq, sizeof(tsc_seq)); +} + static int kvm_hv_set_msr_pw(struct kvm_vcpu *vcpu, u32 msr, u64 data, bool host) { @@ -793,23 +916,11 @@ static int kvm_hv_set_msr_pw(struct kvm_vcpu *vcpu, u32 msr, u64 data, mark_page_dirty(kvm, gfn); break; } - case HV_X64_MSR_REFERENCE_TSC: { - u64 gfn; - HV_REFERENCE_TSC_PAGE tsc_ref; - - memset(&tsc_ref, 0, sizeof(tsc_ref)); + case HV_X64_MSR_REFERENCE_TSC: hv->hv_tsc_page = data; - if (!(data & HV_X64_MSR_TSC_REFERENCE_ENABLE)) - break; - gfn = data >> HV_X64_MSR_TSC_REFERENCE_ADDRESS_SHIFT; - if (kvm_write_guest( - kvm, - gfn << HV_X64_MSR_TSC_REFERENCE_ADDRESS_SHIFT, - &tsc_ref, sizeof(tsc_ref))) - return 1; - mark_page_dirty(kvm, gfn); + if (hv->hv_tsc_page & HV_X64_MSR_TSC_REFERENCE_ENABLE) + kvm_make_request(KVM_REQ_MASTERCLOCK_UPDATE, vcpu); break; - } case HV_X64_MSR_CRASH_P0 ... HV_X64_MSR_CRASH_P4: return kvm_hv_msr_set_crash_data(vcpu, msr - HV_X64_MSR_CRASH_P0, diff --git a/arch/x86/kvm/hyperv.h b/arch/x86/kvm/hyperv.h index 60eccd4bd1d3..cd1119538add 100644 --- a/arch/x86/kvm/hyperv.h +++ b/arch/x86/kvm/hyperv.h @@ -84,4 +84,7 @@ static inline bool kvm_hv_has_stimer_pending(struct kvm_vcpu *vcpu) void kvm_hv_process_stimers(struct kvm_vcpu *vcpu); +void kvm_hv_setup_tsc_page(struct kvm *kvm, + struct pvclock_vcpu_time_info *hv_clock); + #endif diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c index 2edcfa054cbe..2e792bf80912 100644 --- a/arch/x86/kvm/x86.c +++ b/arch/x86/kvm/x86.c @@ -1881,10 +1881,10 @@ static int kvm_guest_time_update(struct kvm_vcpu *v) vcpu->hv_clock.flags = pvclock_flags; - if (!vcpu->pv_time_enabled) - return 0; - - kvm_setup_pvclock_page(v); + if (vcpu->pv_time_enabled) + kvm_setup_pvclock_page(v); + if (v == kvm_get_vcpu(v->kvm, 0)) + kvm_hv_setup_tsc_page(v->kvm, &vcpu->hv_clock); return 0; } -- 1.8.3.1 -- To unsubscribe from this list: send the line "unsubscribe kvm" in the body of a message to majordomo@xxxxxxxxxxxxxxx More majordomo info at http://vger.kernel.org/majordomo-info.html