> On 16 Apr 2019, at 21:44, Liran Alon <liran.alon@xxxxxxxxxx> wrote: > > > >> On 16 Apr 2019, at 20:48, Sean Christopherson <sean.j.christopherson@xxxxxxxxx> wrote: >> >> start_hv_timer() currently returns a bool, with %false interpreted as >> "failure" and %true as "success". But the reality is far more complex, >> as the %false is also returned when programming was successful, but >> must be canceled because there was already a pending timer interrupt. >> It can also return %false when programming was successful but the timer >> was marked expired because the deadline already passed, in which case >> the hv timer must also be canceled to avoid double injection. >> >> Functionally, the existing code is correct in the sense that it doesn't >> doing anything visibily wrong, e.g. generate spurious interrupts or miss >> an interrupt. But it's extremely confusing and inefficient, e.g. there >> are multiple extraneous calls to apic_timer_expired() that effectively >> get dropped due to @timer_pending being %true. >> >> Refactor start_hv_timer() to return an 'int' and move the handling of >> the various combinations of success vs. expiration vs. periodic up a >> level to restart_apic_timer(), which eliminates the needs to overload >> the meaning of %false. >> >> Cc: Wanpeng Li <wanpengli@xxxxxxxxxxx> >> Suggested-by: Liran Alon <liran.alon@xxxxxxxxxx> >> Suggested-by: Paolo Bonzini <pbonzini@xxxxxxxxxx> >> Signed-off-by: Sean Christopherson <sean.j.christopherson@xxxxxxxxx> >> --- >> arch/x86/kvm/lapic.c | 59 ++++++++++++++++++++++++++++++-------------- >> 1 file changed, 40 insertions(+), 19 deletions(-) >> >> diff --git a/arch/x86/kvm/lapic.c b/arch/x86/kvm/lapic.c >> index fa2dbca5ad6a..b867569107d5 100644 >> --- a/arch/x86/kvm/lapic.c >> +++ b/arch/x86/kvm/lapic.c >> @@ -1678,41 +1678,35 @@ static void cancel_hv_timer(struct kvm_lapic *apic) >> apic->lapic_timer.hv_timer_in_use = false; >> } >> >> -static bool start_hv_timer(struct kvm_lapic *apic) >> +/* >> + * Return: >> + * Negative errno on failure, >> + * '1' on success and the timer's deadline has passed, >> + * '0' on success and the timer's deadline is some time in the future. >> + */ >> +static int start_hv_timer(struct kvm_lapic *apic) >> { >> struct kvm_timer *ktimer = &apic->lapic_timer; >> int r; >> >> WARN_ON(preemptible()); >> if (!kvm_x86_ops->set_hv_timer) >> - return false; >> + return -ENOENT; >> >> if (!apic_lvtt_period(apic) && atomic_read(&ktimer->pending)) >> - return false; >> + return -EBUSY; >> >> if (!ktimer->tscdeadline) >> - return false; >> + return -EINVAL; > > These 2 upper “if” blocks can just be moved to start of restart_apic_timer() and early-return > in case any of them is true. > > In your code, start_hv_timer() will return r<0 which will cause restart_apic_timer() to call start_sw_timer() > which will then see one of these conditions and do nothing. > This is what I tried to warn about in the v1 email thread. > >> >> r = kvm_x86_ops->set_hv_timer(apic->vcpu, ktimer->tscdeadline); >> if (r < 0) >> - return false; >> + return r; >> >> ktimer->hv_timer_in_use = true; >> hrtimer_cancel(&ktimer->timer); >> >> - /* >> - * Also recheck ktimer->pending, in case the sw timer triggered in >> - * the window. For periodic timer, leave the hv timer running for >> - * simplicity, and the deadline will be recomputed on the next vmexit. >> - */ >> - if (!apic_lvtt_period(apic) && (r || atomic_read(&ktimer->pending))) { >> - if (r) >> - apic_timer_expired(apic); >> - return false; >> - } >> - >> - trace_kvm_hv_timer_state(apic->vcpu->vcpu_id, true); >> - return true; >> + return r; >> } >> >> static void start_sw_timer(struct kvm_lapic *apic) >> @@ -1734,9 +1728,36 @@ static void start_sw_timer(struct kvm_lapic *apic) >> >> static void restart_apic_timer(struct kvm_lapic *apic) >> { >> + int r; >> + >> preempt_disable(); >> - if (!start_hv_timer(apic)) >> + r = start_hv_timer(apic); >> + if (r < 0) { >> start_sw_timer(apic); >> + } else if (!apic_lvtt_period(apic)) { >> + /* >> + * For a periodic timer, leave the timer running for simplicity >> + * so that the deadline will be recomputed on the next VM-Exit. >> + */ >> + if (atomic_read(&apic->lapic_timer.pending)) { >> + /* >> + * Cancel the hv timer if the sw timer fired while the >> + * hv timer was being programmed. >> + */ >> + cancel_hv_timer(apic); >> + } else if (r) { >> + /* >> + * start_hv_timer() returns '1' if the timer's deadline >> + * has already passed. Again, go through with the full >> + * programming in the periodic case as the VM-Exit is >> + * needed to recompute the periodic timer deadline. >> + */ >> + apic_timer_expired(apic); >> + cancel_hv_timer(apic); >> + } >> + } >> + trace_kvm_hv_timer_state(apic->vcpu->vcpu_id, >> + apic->lapic_timer.hv_timer_in_use); >> preempt_enable(); >> } >> >> -- >> 2.21.0 >> > > I think I had a simpler patch. > I will try to send it and see what you think. > > -Liran Not tested, but something like: diff --git a/arch/x86/kvm/lapic.c b/arch/x86/kvm/lapic.c index fbd37e07e90d..151d88833cfe 100644 --- a/arch/x86/kvm/lapic.c +++ b/arch/x86/kvm/lapic.c @@ -1531,7 +1531,7 @@ static void start_sw_tscdeadline(struct kvm_lapic *apic) unsigned long flags; ktime_t now; - if (unlikely(!tscdeadline || !this_tsc_khz)) + if (unlikely(!this_tsc_khz)) return; local_irq_save(flags); @@ -1675,13 +1675,8 @@ static bool start_hv_timer(struct kvm_lapic *apic) int r; WARN_ON(preemptible()); - if (!kvm_x86_ops->set_hv_timer) - return false; - if (!apic_lvtt_period(apic) && atomic_read(&ktimer->pending)) - return false; - - if (!ktimer->tscdeadline) + if (!kvm_x86_ops->set_hv_timer) return false; r = kvm_x86_ops->set_hv_timer(apic->vcpu, ktimer->tscdeadline); @@ -1697,9 +1692,11 @@ static bool start_hv_timer(struct kvm_lapic *apic) * simplicity, and the deadline will be recomputed on the next vmexit. */ if (!apic_lvtt_period(apic) && (r || atomic_read(&ktimer->pending))) { - if (r) + if (r) { apic_timer_expired(apic); - return false; + ktimer->hv_timer_in_use = false; + } + return true; } trace_kvm_hv_timer_state(apic->vcpu->vcpu_id, true); @@ -1708,13 +1705,9 @@ static bool start_hv_timer(struct kvm_lapic *apic) static void start_sw_timer(struct kvm_lapic *apic) { - struct kvm_timer *ktimer = &apic->lapic_timer; - WARN_ON(preemptible()); if (apic->lapic_timer.hv_timer_in_use) cancel_hv_timer(apic); - if (!apic_lvtt_period(apic) && atomic_read(&ktimer->pending)) - return; if (apic_lvtt_period(apic) || apic_lvtt_oneshot(apic)) start_sw_period(apic); @@ -1725,9 +1718,20 @@ static void start_sw_timer(struct kvm_lapic *apic) static void restart_apic_timer(struct kvm_lapic *apic) { + struct kvm_timer *ktimer = &apic->lapic_timer; + preempt_disable(); + + if (!apic_lvtt_period(apic) && atomic_read(&ktimer->pending)) + goto out; + + if (!ktimer->tscdeadline) + goto out; + if (!start_hv_timer(apic)) start_sw_timer(apic); + +out: preempt_enable(); } What do you think? -Liran