From: Wanpeng Li <wanpengli@xxxxxxxxxxx> Advance lapic timer tries to hidden the hypervisor overhead between the host emulated timer fires and the guest awares the timer is fired. However, even though after more sustaining optimizations, kvm-unit-tests/tscdeadline_latency still awares ~1000 cycles latency since we lost the time between the end of wait_lapic_expire and the guest awares the timer is fired. There are codes between the end of wait_lapic_expire and the world switch, futhermore, the world switch itself also has overhead. Actually the guest_tsc is equal to the target deadline time in wait_lapic_expire is too late, guest will aware the latency between the end of wait_lapic_expire() and after vmentry to the guest. This patch takes this time into consideration. The vmentry_lapic_timer_advance_ns module parameter should be well tuned by host admin, it can reduce average cyclictest latency from 3us to 2us on Skylake server. (guest w/ nohz=off, idle=poll, host w/ preemption_timer=N, the cyclictest latency is not too sensitive when preemption_timer=Y for this optimization in my testing), kvm-unit-tests/tscdeadline_latency can reach 0. Cc: Paolo Bonzini <pbonzini@xxxxxxxxxx> Cc: Radim Krčmář <rkrcmar@xxxxxxxxxx> Cc: Sean Christopherson <sean.j.christopherson@xxxxxxxxx> Signed-off-by: Wanpeng Li <wanpengli@xxxxxxxxxxx> --- arch/x86/kvm/lapic.c | 17 +++++++++++++++-- arch/x86/kvm/lapic.h | 1 + arch/x86/kvm/vmx/vmx.c | 2 +- arch/x86/kvm/x86.c | 3 +++ arch/x86/kvm/x86.h | 2 ++ 5 files changed, 22 insertions(+), 3 deletions(-) diff --git a/arch/x86/kvm/lapic.c b/arch/x86/kvm/lapic.c index fcf42a3..6f85221 100644 --- a/arch/x86/kvm/lapic.c +++ b/arch/x86/kvm/lapic.c @@ -1531,6 +1531,19 @@ static inline void adjust_lapic_timer_advance(struct kvm_vcpu *vcpu, apic->lapic_timer.timer_advance_ns = timer_advance_ns; } +u64 get_vmentry_advance_delta(struct kvm_vcpu *vcpu) +{ + u64 vmentry_lapic_timer_advance_cycles = 0; + + if (vmentry_lapic_timer_advance_ns) { + vmentry_lapic_timer_advance_cycles = vmentry_lapic_timer_advance_ns * + vcpu->arch.virtual_tsc_khz; + do_div(vmentry_lapic_timer_advance_cycles, 1000000); + } + return vmentry_lapic_timer_advance_cycles; +} +EXPORT_SYMBOL_GPL(get_vmentry_advance_delta); + void kvm_wait_lapic_expire(struct kvm_vcpu *vcpu) { struct kvm_lapic *apic = vcpu->arch.apic; @@ -1544,7 +1557,7 @@ void kvm_wait_lapic_expire(struct kvm_vcpu *vcpu) tsc_deadline = apic->lapic_timer.expired_tscdeadline; apic->lapic_timer.expired_tscdeadline = 0; - guest_tsc = kvm_read_l1_tsc(vcpu, rdtsc()); + guest_tsc = kvm_read_l1_tsc(vcpu, rdtsc()) + get_vmentry_advance_delta(vcpu); apic->lapic_timer.advance_expire_delta = guest_tsc - tsc_deadline; if (guest_tsc < tsc_deadline) @@ -1572,7 +1585,7 @@ static void start_sw_tscdeadline(struct kvm_lapic *apic) local_irq_save(flags); now = ktime_get(); - guest_tsc = kvm_read_l1_tsc(vcpu, rdtsc()); + guest_tsc = kvm_read_l1_tsc(vcpu, rdtsc()) + get_vmentry_advance_delta(vcpu); ns = (tscdeadline - guest_tsc) * 1000000ULL; do_div(ns, this_tsc_khz); diff --git a/arch/x86/kvm/lapic.h b/arch/x86/kvm/lapic.h index f974a3d..df2fe17 100644 --- a/arch/x86/kvm/lapic.h +++ b/arch/x86/kvm/lapic.h @@ -221,6 +221,7 @@ static inline int kvm_lapic_latched_init(struct kvm_vcpu *vcpu) bool kvm_apic_pending_eoi(struct kvm_vcpu *vcpu, int vector); void kvm_wait_lapic_expire(struct kvm_vcpu *vcpu); +u64 get_vmentry_advance_delta(struct kvm_vcpu *vcpu); bool kvm_intr_is_single_vcpu_fast(struct kvm *kvm, struct kvm_lapic_irq *irq, struct kvm_vcpu **dest_vcpu); diff --git a/arch/x86/kvm/vmx/vmx.c b/arch/x86/kvm/vmx/vmx.c index da24f18..0199ac3 100644 --- a/arch/x86/kvm/vmx/vmx.c +++ b/arch/x86/kvm/vmx/vmx.c @@ -7047,7 +7047,7 @@ static int vmx_set_hv_timer(struct kvm_vcpu *vcpu, u64 guest_deadline_tsc, vmx = to_vmx(vcpu); tscl = rdtsc(); - guest_tscl = kvm_read_l1_tsc(vcpu, tscl); + guest_tscl = kvm_read_l1_tsc(vcpu, tscl) + get_vmentry_advance_delta(vcpu); delta_tsc = max(guest_deadline_tsc, guest_tscl) - guest_tscl; lapic_timer_advance_cycles = nsec_to_cycles(vcpu, ktimer->timer_advance_ns); diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c index a4eb711..a02e2c3 100644 --- a/arch/x86/kvm/x86.c +++ b/arch/x86/kvm/x86.c @@ -145,6 +145,9 @@ module_param(tsc_tolerance_ppm, uint, S_IRUGO | S_IWUSR); static int __read_mostly lapic_timer_advance_ns = -1; module_param(lapic_timer_advance_ns, int, S_IRUGO | S_IWUSR); +u32 __read_mostly vmentry_lapic_timer_advance_ns = 0; +module_param(vmentry_lapic_timer_advance_ns, uint, S_IRUGO | S_IWUSR); + static bool __read_mostly vector_hashing = true; module_param(vector_hashing, bool, S_IRUGO); diff --git a/arch/x86/kvm/x86.h b/arch/x86/kvm/x86.h index 275b3b6..b0a3b84 100644 --- a/arch/x86/kvm/x86.h +++ b/arch/x86/kvm/x86.h @@ -294,6 +294,8 @@ extern u64 kvm_supported_xcr0(void); extern unsigned int min_timer_period_us; +extern unsigned int vmentry_lapic_timer_advance_ns; + extern bool enable_vmware_backdoor; extern struct static_key kvm_no_apic_vcpu; -- 2.7.4