On 05/03/19 09:03, Evgeny Yakovlev wrote: > We are seeing very rare test failures in apic/split-apic with the > following: "FAIL: TMCCT should be reset to the initial-count". We are > running on 4.14 kernels and this problem occurs sporadically although we > didn't try to reproduce it on any other stable kernel branch. > > It looks like under KVM lapic timer in periodic mode may return several > consecutive 0 values in apic_get_tmccr when hrtimer async callback is still > restarting the hrtimer and recalculating expiration_time value but > apic_get_tmccr is seeing ktime_now() already ahead of (still old) > expiration_time value. After a couple of 0-es from TMCCR everything > goes back to normal. > > This change forces test_apic_change_mode test to wait specifically for > lapic timer wrap around skipping zero TMCCR values in one case when we are > testing for timer in periodic mode to correctly reload TMCCR from TMICR. > If timer mode change is indeed broken and TMCCR is not reset then we > will see apic test timeout. > > Signed-off-by: Evgeny Yakovlev <wrfsh@xxxxxxxxxxxxxx> > --- > x86/apic.c | 23 +++++++++++++++++++++-- > 1 file changed, 21 insertions(+), 2 deletions(-) > > diff --git a/x86/apic.c b/x86/apic.c > index cfdbef2..51744cf 100644 > --- a/x86/apic.c > +++ b/x86/apic.c > @@ -489,7 +489,7 @@ static void test_physical_broadcast(void) > report("APIC physical broadcast shorthand", broadcast_received(ncpus)); > } > > -static void wait_until_tmcct_is_zero(uint32_t initial_count, bool stop_when_half) > +static void wait_until_tmcct_common(uint32_t initial_count, bool stop_when_half, bool should_wrap_around) > { > uint32_t tmcct = apic_read(APIC_TMCCT); > > @@ -503,9 +503,23 @@ static void wait_until_tmcct_is_zero(uint32_t initial_count, bool stop_when_half > /* Wait until the counter reach 0 or wrap-around */ > while ( tmcct <= (initial_count / 2) && tmcct > 0 ) > tmcct = apic_read(APIC_TMCCT); > + > + /* Wait specifically for wrap around to skip 0 TMCCR if we were asked to */ > + while (should_wrap_around && !tmcct) > + tmcct = apic_read(APIC_TMCCT); > } > } > > +static void wait_until_tmcct_is_zero(uint32_t initial_count, bool stop_when_half) > +{ > + return wait_until_tmcct_common(initial_count, stop_when_half, false); > +} > + > +static void wait_until_tmcct_wrap_around(uint32_t initial_count, bool stop_when_half) > +{ > + return wait_until_tmcct_common(initial_count, stop_when_half, true); > +} > + > static inline void apic_change_mode(unsigned long new_mode) > { > uint32_t lvtt; > @@ -548,7 +562,12 @@ static void test_apic_change_mode(void) > * counting down from where it was > */ > report("TMCCT should not be reset to TMICT value", apic_read(APIC_TMCCT) < (tmict / 2)); > - wait_until_tmcct_is_zero(tmict, false); > + /* > + * Specifically wait for timer wrap around and skip 0. > + * Under KVM lapic there is a possibility that a small amount of consecutive > + * TMCCR reads return 0 while hrtimer is reset in an async callback > + */ > + wait_until_tmcct_wrap_around(tmict, false); > report("TMCCT should be reset to the initial-count", apic_read(APIC_TMCCT) > (tmict / 2)); > > wait_until_tmcct_is_zero(tmict, true); > Applied, thanks. Paolo