This patch adds the main logic of lazy_tscdeadline of host side. There are 3 operations: - UPDATE, when the guest update msr of tsc deadline, we need to update the value of 'armed' field of kvm_lazy_tscdeadline - KICK, when the hv or sw timer is fired, we need to check the 'pending' field to decide whether to re-arm timer or inject local timer vector. The sw timer is not in vcpu context, so a new kvm req is added to handle the kick in vcpu context. - CLEAR, this is a bit tricky. We need to clear the 'armed' field properly otherwise the guestOS can be hung. The scenerios need to do CLEAR: - convert between period & onshot and tscdeadline - mask the lapic timer - tscdeadline value has expired before we arm the timer Here is the test result of netperf TCP_RR on loopback, Close Open -------------------------------------------------------- VM-Exit sum 12617503 5815737 intr 0% 37023 0% 33002 cpuid 0% 1 0% 0 halt 19% 2503932 47% 2780683 msr-write 79% 10046340 51% 2966824 pause 0% 90 0% 84 ept-violation 0% 584 0% 336 ept-misconfig 0% 0 0% 2 preemption-timer 0% 29518 0% 34800 ------------------------------------------------------- MSR-Write sum 10046455 2966864 apic-icr 25% 2533498 93% 2781235 tsc-deadline 74% 7512945 6% 185629 The vm-exit caused by writing msr of tsc-deadline is reduced by 70% Signed-off-by: Li Shujin <arkinjob@xxxxxxxxxxx> Signed-off-by: Wang Jianchao <jianchwa@xxxxxxxxxxx> --- arch/x86/include/asm/kvm_host.h | 1 + arch/x86/kvm/lapic.c | 93 +++++++++++++++++++++++++++++++++++++---- arch/x86/kvm/lapic.h | 3 +- arch/x86/kvm/x86.c | 3 ++ 4 files changed, 90 insertions(+), 10 deletions(-) diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h index b036874..b217ae7 100644 --- a/arch/x86/include/asm/kvm_host.h +++ b/arch/x86/include/asm/kvm_host.h @@ -113,6 +113,7 @@ KVM_ARCH_REQ_FLAGS(31, KVM_REQUEST_WAIT | KVM_REQUEST_NO_WAKEUP) #define KVM_REQ_HV_TLB_FLUSH \ KVM_ARCH_REQ_FLAGS(32, KVM_REQUEST_WAIT | KVM_REQUEST_NO_WAKEUP) +#define KVM_REQ_LAZY_TSCDEADLINE KVM_ARCH_REQ(33) #define CR0_RESERVED_BITS \ (~(unsigned long)(X86_CR0_PE | X86_CR0_MP | X86_CR0_EM | X86_CR0_TS \ diff --git a/arch/x86/kvm/lapic.c b/arch/x86/kvm/lapic.c index 71da41e..781516f 100644 --- a/arch/x86/kvm/lapic.c +++ b/arch/x86/kvm/lapic.c @@ -1720,6 +1720,54 @@ void kvm_lazy_tscdeadline_exit(struct kvm_vcpu *vcpu) vcpu->arch.lazy_tscdeadline.guest = NULL; } +static void kvm_lazy_tscdeadline_update(struct kvm_vcpu *vcpu, u64 tsc) +{ + struct kvm_host_lazy_tscdeadline *hlt = &vcpu->arch.lazy_tscdeadline; + + if (!(hlt->msr_val & KVM_MSR_ENABLED) || + !hlt->guest) + return; + + hlt->guest->armed = tsc; + hlt->cached_armed = tsc; +} + +bool kvm_lazy_tscdeadline_kick(struct kvm_vcpu *vcpu) +{ + struct kvm_host_lazy_tscdeadline *hlt = &vcpu->arch.lazy_tscdeadline; + u64 next; + bool ret = false; + + if (!hlt->cached_armed || + !(hlt->msr_val & KVM_MSR_ENABLED) || + !hlt->guest) + return ret; + + next = hlt->guest->pending; + if (next && next > hlt->guest->armed) { + kvm_set_lapic_tscdeadline_msr(vcpu, next); + ret = true; + } else { + hlt->guest->armed = 0; + hlt->cached_armed = 0; + } + + return ret; +} + +void kvm_lazy_tscdeadline_clear(struct kvm_vcpu *vcpu) +{ + struct kvm_host_lazy_tscdeadline *hlt = &vcpu->arch.lazy_tscdeadline; + + if (!hlt->cached_armed || + !(hlt->msr_val & KVM_MSR_ENABLED) || + !hlt->guest) + return; + + hlt->guest->armed = 0; + hlt->cached_armed = 0; +} + static void update_divide_count(struct kvm_lapic *apic) { u32 tmp1, tmp2, tdcr; @@ -1765,8 +1813,12 @@ static void cancel_apic_timer(struct kvm_lapic *apic) static void apic_update_lvtt(struct kvm_lapic *apic) { - u32 timer_mode = kvm_lapic_get_reg(apic, APIC_LVTT) & - apic->lapic_timer.timer_mode_mask; + u32 reg, timer_mode; + bool clear; + + reg = kvm_lapic_get_reg(apic, APIC_LVTT); + clear = !!(reg & APIC_LVT_MASKED); + timer_mode = reg & apic->lapic_timer.timer_mode_mask; if (apic->lapic_timer.timer_mode != timer_mode) { if (apic_lvtt_tscdeadline(apic) != (timer_mode == @@ -1775,10 +1827,14 @@ static void apic_update_lvtt(struct kvm_lapic *apic) kvm_lapic_set_reg(apic, APIC_TMICT, 0); apic->lapic_timer.period = 0; apic->lapic_timer.tscdeadline = 0; + clear = true; } apic->lapic_timer.timer_mode = timer_mode; limit_periodic_timer_frequency(apic); } + + if (clear) + kvm_lazy_tscdeadline_clear(apic->vcpu); } /* @@ -1966,8 +2022,15 @@ static void start_sw_tscdeadline(struct kvm_lapic *apic) expire = ktime_add_ns(now, ns); expire = ktime_sub_ns(expire, ktimer->timer_advance_ns); hrtimer_start(&ktimer->timer, expire, HRTIMER_MODE_ABS_HARD); - } else + } else { apic_timer_expired(apic, false); + /* + * If the current pending tscdeadline has been expired, we need + * to clear the armed_tscddl otherwise guest will skip following + * msr wtite and clock event hangs + */ + kvm_lazy_tscdeadline_clear(vcpu); + } local_irq_restore(flags); } @@ -2145,6 +2208,9 @@ static bool start_hv_timer(struct kvm_lapic *apic) } } + if (apic_lvtt_tscdeadline(apic) && expired) + kvm_lazy_tscdeadline_clear(vcpu); + trace_kvm_hv_timer_state(vcpu->vcpu_id, ktimer->hv_timer_in_use); return true; @@ -2189,8 +2255,12 @@ void kvm_lapic_expired_hv_timer(struct kvm_vcpu *vcpu) if (!apic->lapic_timer.hv_timer_in_use) goto out; WARN_ON(kvm_vcpu_is_blocking(vcpu)); - apic_timer_expired(apic, false); - cancel_hv_timer(apic); + + if (!apic_lvtt_tscdeadline(apic) || + !kvm_lazy_tscdeadline_kick(vcpu)) { + apic_timer_expired(apic, false); + cancel_hv_timer(apic); + } if (apic_lvtt_period(apic) && apic->lapic_timer.period) { advance_periodic_target_expiration(apic); @@ -2522,6 +2592,7 @@ void kvm_set_lapic_tscdeadline_msr(struct kvm_vcpu *vcpu, u64 data) hrtimer_cancel(&apic->lapic_timer.timer); apic->lapic_timer.tscdeadline = data; + kvm_lazy_tscdeadline_update(vcpu, data); start_apic_timer(apic); } @@ -2802,15 +2873,19 @@ static enum hrtimer_restart apic_timer_fn(struct hrtimer *data) { struct kvm_timer *ktimer = container_of(data, struct kvm_timer, timer); struct kvm_lapic *apic = container_of(ktimer, struct kvm_lapic, lapic_timer); + enum hrtimer_restart ret = HRTIMER_NORESTART; apic_timer_expired(apic, true); - if (lapic_is_periodic(apic)) { + if (apic_lvtt_tscdeadline(apic)) { + kvm_make_request(KVM_REQ_LAZY_TSCDEADLINE, apic->vcpu); + } else if (lapic_is_periodic(apic)) { advance_periodic_target_expiration(apic); hrtimer_add_expires_ns(&ktimer->timer, ktimer->period); - return HRTIMER_RESTART; - } else - return HRTIMER_NORESTART; + ret = HRTIMER_RESTART; + } + + return ret; } int kvm_create_lapic(struct kvm_vcpu *vcpu, int timer_advance_ns) diff --git a/arch/x86/kvm/lapic.h b/arch/x86/kvm/lapic.h index 51b9d5b..0387a02 100644 --- a/arch/x86/kvm/lapic.h +++ b/arch/x86/kvm/lapic.h @@ -280,5 +280,6 @@ static inline u8 kvm_xapic_id(struct kvm_lapic *apic) int kvm_lazy_tscdeadline_init(struct kvm_vcpu *vcpu); void kvm_lazy_tscdeadline_exit(struct kvm_vcpu *vcpu); - +void kvm_lazy_tscdeadline_clear(struct kvm_vcpu *vcpu); +bool kvm_lazy_tscdeadline_kick(struct kvm_vcpu *vcpu); #endif diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c index 7225fc9..26f0ef3 100644 --- a/arch/x86/kvm/x86.c +++ b/arch/x86/kvm/x86.c @@ -3879,6 +3879,7 @@ int kvm_set_msr_common(struct kvm_vcpu *vcpu, struct msr_data *msr_info) return 1; if (!(data & KVM_MSR_ENABLED)) { + kvm_lazy_tscdeadline_clear(vcpu); kvm_lazy_tscdeadline_exit(vcpu); } else { kvm_lazy_tscdeadline_exit(vcpu); @@ -10584,6 +10585,8 @@ static int vcpu_enter_guest(struct kvm_vcpu *vcpu) } if (kvm_check_request(KVM_REQ_STEAL_UPDATE, vcpu)) record_steal_time(vcpu); + if (kvm_check_request(KVM_REQ_LAZY_TSCDEADLINE, vcpu)) + kvm_lazy_tscdeadline_kick(vcpu); #ifdef CONFIG_KVM_SMM if (kvm_check_request(KVM_REQ_SMI, vcpu)) process_smi(vcpu); -- 2.7.4