2017-06-28 22:30 GMT+08:00 Paolo Bonzini <pbonzini@xxxxxxxxxx>: > > > On 28/06/2017 16:27, Wanpeng Li wrote: >> 2017-06-28 20:10 GMT+08:00 Paolo Bonzini <pbonzini@xxxxxxxxxx>: >>> >>> >>> On 28/06/2017 03:29, Wanpeng Li wrote: >>>> u64 tscdeadline = apic->lapic_timer.tscdeadline; >>>> + int ret = 0; >>>> >>>> if ((atomic_read(&apic->lapic_timer.pending) && >>>> !apic_lvtt_period(apic)) || >>>> - kvm_x86_ops->set_hv_timer(apic->vcpu, tscdeadline)) { >>>> + (ret = kvm_x86_ops->set_hv_timer(apic->vcpu, tscdeadline))) { >>>> if (apic->lapic_timer.hv_timer_in_use) >>>> cancel_hv_timer(apic); >>>> + if (ret == 1) { >>>> + apic_timer_expired(apic); >>>> + return true; >>>> + } >>> >>> The preemption timer can also be used for modes other than TSC deadline. >>> >>> In periodic mode, your patch would miss a call to >>> advance_periodic_target_expiration, which is only called by >>> kvm_lapic_expired_hv_timer. >>> >>> You could use something like this: >>> >>> diff --git a/arch/x86/kvm/lapic.c b/arch/x86/kvm/lapic.c >>> index d24c8742d9b0..15b751aa7625 100644 >>> --- a/arch/x86/kvm/lapic.c >>> +++ b/arch/x86/kvm/lapic.c >>> @@ -1504,21 +1504,26 @@ static void cancel_hv_timer(struct kvm_lapic *apic) >>> static bool start_hv_timer(struct kvm_lapic *apic) >>> { >>> u64 tscdeadline = apic->lapic_timer.tscdeadline; >>> + bool need_cancel = apic->lapic_timer.hv_timer_in_use; >>> + if (!atomic_read(&apic->lapic_timer.pending) || apic_lvtt_period(apic)) { >>> + int r = kvm_x86_ops->set_hv_timer(apic->vcpu, tscdeadline); >>> + if (r >= 0) { >>> + need_cancel = false; >>> + apic->lapic_timer.hv_timer_in_use = true; >>> + hrtimer_cancel(&apic->lapic_timer.timer); >>> >>> - if ((atomic_read(&apic->lapic_timer.pending) && >>> - !apic_lvtt_period(apic)) || >>> - kvm_x86_ops->set_hv_timer(apic->vcpu, tscdeadline)) { >>> - if (apic->lapic_timer.hv_timer_in_use) >>> - cancel_hv_timer(apic); >>> - } else { >>> - apic->lapic_timer.hv_timer_in_use = true; >>> - hrtimer_cancel(&apic->lapic_timer.timer); >>> - >>> - /* In case the sw timer triggered in the window */ >>> - if (atomic_read(&apic->lapic_timer.pending) && >>> - !apic_lvtt_period(apic)) >>> - cancel_hv_timer(apic); >>> + /* In case the sw timer triggered in the window */ >>> + if (atomic_read(&apic->lapic_timer.pending) && >>> + !apic_lvtt_period(apic)) >>> + need_cancel = true; >>> + else if (r) >>> + kvm_lapic_expired_hv_timer(vcpu); >>> + } >>> } >>> + >>> + if (need_cancel) >>> + cancel_hv_timer(apic); >>> + >>> trace_kvm_hv_timer_state(apic->vcpu->vcpu_id, >>> apic->lapic_timer.hv_timer_in_use); >>> return apic->lapic_timer.hv_timer_in_use; >>> >>> but I'm afraid of introducing a mutual recursion between >>> start_hv_timer and kvm_lapic_expired_hv_timer. >> >> We can just handle the apic timer oneshot/tscdeadline mode instead of >> periodic mode just like which is emulated by hrtimer to avoid the >> mutual recusion, what do you think? > > In that case, set_hv_timer should probably always enable the preemption > timer. You can then cancel it if it returns 1 _and_ the APIC timer's > mode is oneshot/tscdeadline. > > Paolo So how about something like this? diff --git a/arch/x86/kvm/lapic.c b/arch/x86/kvm/lapic.c index d24c874..900ce86 100644 --- a/arch/x86/kvm/lapic.c +++ b/arch/x86/kvm/lapic.c @@ -1504,21 +1504,26 @@ static void cancel_hv_timer(struct kvm_lapic *apic) static bool start_hv_timer(struct kvm_lapic *apic) { u64 tscdeadline = apic->lapic_timer.tscdeadline; + bool need_cancel = apic->lapic_timer.hv_timer_in_use; + if (!atomic_read(&apic->lapic_timer.pending) || apic_lvtt_period(apic)) { + int r = kvm_x86_ops->set_hv_timer(apic->vcpu, tscdeadline); + if (r >= 0) { + need_cancel = false; + apic->lapic_timer.hv_timer_in_use = true; + hrtimer_cancel(&apic->lapic_timer.timer); + + /* In case the sw timer triggered in the window */ + if (atomic_read(&apic->lapic_timer.pending) && + !apic_lvtt_period(apic)) + need_cancel = true; + else if (r && (apic_lvtt_oneshot(apic) || apic_lvtt_tscdeadline(apic))) + apic_timer_expired(apic); + } + } - if ((atomic_read(&apic->lapic_timer.pending) && - !apic_lvtt_period(apic)) || - kvm_x86_ops->set_hv_timer(apic->vcpu, tscdeadline)) { - if (apic->lapic_timer.hv_timer_in_use) - cancel_hv_timer(apic); - } else { - apic->lapic_timer.hv_timer_in_use = true; - hrtimer_cancel(&apic->lapic_timer.timer); + if (need_cancel) + cancel_hv_timer(apic); - /* In case the sw timer triggered in the window */ - if (atomic_read(&apic->lapic_timer.pending) && - !apic_lvtt_period(apic)) - cancel_hv_timer(apic); - } trace_kvm_hv_timer_state(apic->vcpu->vcpu_id, apic->lapic_timer.hv_timer_in_use); return apic->lapic_timer.hv_timer_in_use; diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c index 4f616db..0a0e696 100644 --- a/arch/x86/kvm/vmx.c +++ b/arch/x86/kvm/vmx.c @@ -11125,6 +11125,9 @@ static int vmx_set_hv_timer(struct kvm_vcpu *vcpu, u64 guest_deadline_tsc) u64 guest_tscl = kvm_read_l1_tsc(vcpu, tscl); u64 delta_tsc = max(guest_deadline_tsc, guest_tscl) - guest_tscl; + if (delta_tsc == 0) + return 1; + /* Convert to host delta tsc if tsc scaling is enabled */ if (vcpu->arch.tsc_scaling_ratio != kvm_default_tsc_scaling_ratio && u64_shl_div_u64(delta_tsc,